In [14]:
import levit 
import levit_c 
import torch 
import tensorrt as trt 
from cuda import cudart
import numpy as np

import trt_inference 
import trt_convert_onnx 
import trt_build_engine
import sys 

sys.path.append("/home/wentaoy/BraggNN")



In [None]:
from model import BraggNN
my_model  = BraggNN(imgsz=11, fcsz=(16, 8, 4, 2))
my_model_fn = "../../BraggNN/models/fc16_8_4_2-sz11.pth" 
my_model.load_state_dict(torch.load(my_model_fn, map_location=torch.device('cpu')))
onnx_path = "./test_onnx.onnx"

input_tensor = torch.randn(16,1,11,11)
torch.onnx.export(my_model,               # model being run
                  input_tensor,                         # model input (or a tuple for multiple inputs)
                  onnx_path,   # where to save the model (can be a file or file-like object)
                  export_params=True,        # store the trained parameter weights inside the model file
                  opset_version=11,          # the ONNX version to export the model to
                  do_constant_folding=True, 
                  input_names = ['input'],   # the model's input names
                  output_names = ['output'], # the model's output names
                  dynamic_axes={'input' : {0 : 'batch_size'},    # variable length axes
                                'output' : {0 : 'batch_size'}})  # whether to execute constant folding for optimization)
trt_build_engine.setup_engine(onnx_path="./test_onnx.onnx",trtfile="./test_plan.plan",min_shape=(1,1,11,11),common_shape=(16,1,11,11),max_shape=(32,1,11,11))

In [None]:
my_engine = trt_build_engine.load_engine("./test_plan.plan")

In [11]:
import tensorrt as trt 
from cuda import cudart
import numpy as np

from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from PIL import Image
import trt_build_engine 
import os 
# from datasets import build_dataset

def inference(engine,batch_size,input_data):
    context = engine.create_execution_context()
    context.set_binding_shape(0,[batch_size, 1, 11,11])

    _, stream = cudart.cudaStreamCreate()
    inputHost = np.ascontiguousarray(input_data.reshape(-1))
    outputHost = np.empty(context.get_binding_shape(1),dtype = trt.nptype(engine.get_binding_dtype(1)))

    _, inputDevice = cudart.cudaMallocAsync(inputHost.nbytes,stream)
    _, outputDevice = cudart.cudaMallocAsync(outputHost.nbytes,stream)

    
    cudart.cudaMemcpyAsync(inputDevice,inputHost.ctypes.data,inputHost.nbytes,cudart.cudaMemcpyKind.cudaMemcpyHostToDevice,stream)
    
    context.execute_async_v2([int(inputDevice),int(outputDevice)],stream)

    cudart.cudaMemcpyAsync(outputHost.ctypes.data,outputDevice,outputHost.nbytes,cudart.cudaMemcpyKind.cudaMemcpyDeviceToHost,stream)
    cudart.cudaStreamSynchronize(stream)

    cudart.cudaStreamDestroy(stream)
    cudart.cudaFree(inputDevice)
    cudart.cudaFree(outputDevice)
    return outputHost

In [25]:
my_input = np.random.randn(16,1,11,11).astype(np.float32)
my_input_tensor = torch.from_numpy(my_input)

In [26]:
my_output = inference(my_engine,16,my_input.astype(np.float32))
my_out_tensor = my_model(my_input_tensor)

[05/27/2022-12:52:25] [TRT] [V] Using cublas as a tactic source
[05/27/2022-12:52:25] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.5.1
[05/27/2022-12:52:25] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 1035, GPU 635 (MiB)
[05/27/2022-12:52:25] [TRT] [V] Total per-runner device persistent memory is 1536
[05/27/2022-12:52:25] [TRT] [V] Total per-runner host persistent memory is 14592
[05/27/2022-12:52:25] [TRT] [V] Allocated activation device memory of size 3162624
[05/27/2022-12:52:25] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +3, now: CPU 0, GPU 3 (MiB)


In [27]:
my_output

array([[2.946626 , 4.817519 ],
       [2.1243587, 3.0912056],
       [2.968986 , 0.954828 ],
       [1.755752 , 1.7829952],
       [1.5640985, 0.9984462],
       [1.5669798, 0.9349642],
       [1.9920723, 1.3910123],
       [2.2827024, 2.6263838],
       [3.0488167, 4.55049  ],
       [3.9996562, 6.26191  ],
       [2.700595 , 3.6245704],
       [3.8992593, 4.8770175],
       [2.0428529, 2.8271317],
       [1.6770633, 1.5390544],
       [1.0882812, 1.3045433],
       [3.2923229, 2.749569 ]], dtype=float32)

In [28]:
my_out_tensor

tensor([[2.9487, 4.8205],
        [2.1217, 3.0876],
        [2.9687, 0.9558],
        [1.7593, 1.7867],
        [1.5645, 0.9982],
        [1.5674, 0.9361],
        [1.9883, 1.3909],
        [2.2868, 2.6371],
        [3.0540, 4.5593],
        [4.0004, 6.2618],
        [2.7031, 3.6269],
        [3.9001, 4.8796],
        [2.0434, 2.8281],
        [1.6739, 1.5430],
        [1.0880, 1.3048],
        [3.2928, 2.7526]], grad_fn=<AddmmBackward0>)

In [None]:
engine_str,logger = trt_build_engine.setup_engine(max_batch_size = 512,
                 max_workspace_size_n = 4,
                 onnx_path = "./onnx_models/levit_128_onnx.onnx",
                 trtfile = "./trt_plans/model_128.plan",
                 min_shape = (2,3,224,224),
                 common_shape = (4,3,224,224),
                 max_shape = (16,3,224,224)
                 )

In [2]:
engine = trt_build_engine.load_engine("./trt_plans/model_128S.plan")

[05/26/2022-20:09:21] [TRT] [I] [MemUsageChange] Init CUDA: CPU +201, GPU +0, now: CPU 322, GPU 373 (MiB)
[05/26/2022-20:09:21] [TRT] [I] Loaded engine size: 31 MiB
[05/26/2022-20:09:22] [TRT] [V] Using cublas as a tactic source
[05/26/2022-20:09:22] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.5.1
[05/26/2022-20:09:22] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +226, GPU +92, now: CPU 576, GPU 497 (MiB)
[05/26/2022-20:09:22] [TRT] [V] Deserialization required 398358 microseconds.
[05/26/2022-20:09:22] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +30, now: CPU 0, GPU 30 (MiB)


In [3]:
input_tensor = np.random.rand(2,3,224,224).astype(np.float32)

In [4]:
trt_inference.inference(engine,2,input_tensor)

[05/26/2022-20:09:29] [TRT] [V] Using cublas as a tactic source
[05/26/2022-20:09:29] [TRT] [W] TensorRT was linked against cuBLAS/cuBLAS LT 11.8.0 but loaded cuBLAS/cuBLAS LT 11.5.1
[05/26/2022-20:09:29] [TRT] [I] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +0, GPU +8, now: CPU 546, GPU 497 (MiB)
[05/26/2022-20:09:29] [TRT] [V] Total per-runner device persistent memory is 103424
[05/26/2022-20:09:29] [TRT] [V] Total per-runner host persistent memory is 45552
[05/26/2022-20:09:29] [TRT] [V] Allocated activation device memory of size 21375488
[05/26/2022-20:09:29] [TRT] [I] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +21, now: CPU 0, GPU 51 (MiB)


array([[-0.08842054, -0.85783243,  0.3030634 , ...,  0.73470837,
         1.1878492 ,  0.11229506],
       [ 0.25198007, -0.6299014 ,  0.4541248 , ...,  0.70646733,
         1.7449799 ,  0.28991875]], dtype=float32)

In [6]:
my_model = levit.LeViT_128S(pretrained=True,distillation=True)


In [4]:
import cv2 
from PIL import Image
output = cv2.imread("../../data/img/ILSVRC2012_val_00000001.JPEG").transpose(2,0,1).astype(np.float32)
# output = cv2.resize(output,(224,224)).astype(np.uint8)
input_tensor = torch.from_numpy(output)

In [89]:
input_tensor.size()

torch.Size([3, 375, 500])

In [73]:
input_tensor.size()

torch.Size([224, 224, 3])

In [None]:
my_model(input_tensor.unsqueeze(0))

In [57]:
input_tensor.size()

torch.Size([224, 224, 500])

In [42]:
np.shape(output.transpose(2,0,1))

(3, 224, 224)

In [34]:
output

array([[[189, 187, 177],
        [191, 189, 179],
        [193, 191, 181],
        ...,
        [201, 201, 189],
        [198, 198, 186],
        [197, 197, 185]],

       [[191, 189, 179],
        [192, 190, 180],
        [192, 190, 180],
        ...,
        [199, 198, 188],
        [197, 196, 186],
        [198, 197, 187]],

       [[189, 189, 177],
        [192, 191, 181],
        [191, 190, 180],
        ...,
        [197, 197, 187],
        [195, 195, 186],
        [193, 193, 183]],

       ...,

       [[156, 162, 162],
        [162, 169, 164],
        [174, 180, 175],
        ...,
        [172, 179, 175],
        [179, 187, 182],
        [171, 178, 174]],

       [[158, 165, 163],
        [157, 164, 159],
        [165, 171, 167],
        ...,
        [171, 182, 179],
        [171, 182, 180],
        [157, 164, 163]],

       [[156, 163, 159],
        [165, 170, 169],
        [168, 173, 171],
        ...,
        [151, 160, 157],
        [164, 175, 173],
        [168, 175, 173]]

In [25]:
output

array([[[188.65866, 186.65866, 176.65866],
        [191.42169, 189.42169, 179.42169],
        [192.79747, 190.79747, 180.79747],
        ...,
        [200.71622, 200.71622, 188.71622],
        [198.56233, 198.56233, 186.56233],
        [197.33705, 197.33705, 185.33705]],

       [[190.99312, 189.01544, 178.99312],
        [192.28294, 190.29411, 180.29411],
        [192.25223, 190.17278, 180.35402],
        ...,
        [199.07008, 198.07008, 188.07008],
        [197.13893, 196.13893, 186.13893],
        [197.97339, 196.97339, 186.97339]],

       [[188.90938, 188.90938, 176.90938],
        [191.82588, 190.9299 , 180.72188],
        [191.24033, 189.87054, 180.61012],
        ...,
        [196.68005, 197.05058, 187.10565],
        [195.21072, 195.58125, 185.58125],
        [192.875  , 193.24554, 183.24554]],

       ...,

       [[155.80281, 162.57066, 161.57066],
        [162.2822 , 169.01523, 164.3188 ],
        [173.997  , 180.05205, 175.05205],
        ...,
        [171.67929, 179.20

In [17]:
input_tensor.size()

torch.Size([224, 224, 3])

In [29]:
from PIL import Image
def read_img_numpy(img_path ="../../data/img/ILSVRC2012_val_00000001.JPEG" ):
    out = Image.open(img_path)
    out = out.resize((224,224))
    input_numpy = np.array(out)
    print(np.shape(out))

    try:
        input_numpy = input_numpy.transpose(2,0,1).squeeze().astype(np.float32)
    except:
        print(np.shape(out))
    return input_numpy

In [30]:
img = read_img_numpy()

In [53]:
from torch.utils.data import Dataset
import os 
class img_dataset(Dataset):
    def read_img_numpy(self,img_path):
        out = Image.open(img_path)
        out = out.convert("RGB")
        out = out.resize((224,224))
        input_numpy = np.array(out)
        try:
            input_numpy = input_numpy.transpose(2,0,1).squeeze().astype(np.float32)
        except:
            print("error")     
        return input_numpy
    def __init__(self,dir_path):
        self.dir_path = dir_path
        self.image_names = os.listdir(dir_path)
    def __len__(self):
        return len(self.image_names)
    
    def __getitem__(self,idx):
        name = self.image_names[idx]
        ret_numpy_img = self.read_img_numpy(self.dir_path + name)
        return ret_numpy_img

def evaluate(dataset,model):
    engine = 


In [54]:
my_dataset = img_dataset("../../data/img/")

In [65]:
out = my_dataset[14].transpose(1,2,0)

In [67]:
np.shape(out)

(224, 224, 3)

In [19]:
import os 
os.listdir("../../data/img")

['ILSVRC2012_val_00046108.JPEG',
 'ILSVRC2012_val_00044030.JPEG',
 'ILSVRC2012_val_00042898.JPEG',
 'ILSVRC2012_val_00041481.JPEG',
 'ILSVRC2012_val_00039493.JPEG',
 'ILSVRC2012_val_00037864.JPEG',
 'ILSVRC2012_val_00037578.JPEG',
 'ILSVRC2012_val_00033095.JPEG',
 'ILSVRC2012_val_00031091.JPEG',
 'ILSVRC2012_val_00030154.JPEG',
 'ILSVRC2012_val_00029819.JPEG',
 'ILSVRC2012_val_00029360.JPEG',
 'ILSVRC2012_val_00028777.JPEG',
 'ILSVRC2012_val_00028553.JPEG',
 'ILSVRC2012_val_00027575.JPEG',
 'ILSVRC2012_val_00025401.JPEG',
 'ILSVRC2012_val_00024855.JPEG',
 'ILSVRC2012_val_00023386.JPEG',
 'ILSVRC2012_val_00022933.JPEG',
 'ILSVRC2012_val_00021388.JPEG',
 'ILSVRC2012_val_00021057.JPEG',
 'ILSVRC2012_val_00020878.JPEG',
 'ILSVRC2012_val_00020583.JPEG',
 'ILSVRC2012_val_00018525.JPEG',
 'ILSVRC2012_val_00018454.JPEG',
 'ILSVRC2012_val_00017993.JPEG',
 'ILSVRC2012_val_00017876.JPEG',
 'ILSVRC2012_val_00017779.JPEG',
 'ILSVRC2012_val_00017000.JPEG',
 'ILSVRC2012_val_00015032.JPEG',
 'ILSVRC20

In [14]:
img = read_img_numpy()

In [15]:
np.shape(img)

(3, 224, 224)

In [102]:
img = Image.fromarray(input_numpy)

TypeError: Cannot handle this data type: (1, 1, 224), |u1

In [79]:
np.shape(out.transpose(2,0,1))

(3, 375, 500)