In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install --upgrade ultralytics

In [None]:
!pip install onnx
!pip install onnxruntime

In [None]:
!pip install mlflow

In [5]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.2.3


In [None]:
# ✅ Install Dependencies
#!pip install --upgrade ultralytics
!apt-get install -y libnvinfer8 libnvinfer-plugin8 python3-libnvinfer
!pip install nvidia-pyindex && pip install nvidia-tensorrt
!apt-get install -y tensorrt

In [7]:
import torch
import time
import onnx
import onnxruntime
import numpy as np
import cv2
import tensorrt as trt
import requests
from ultralytics import YOLO

# ✅ Check GPU & TensorRT
print("CUDA Available:", torch.cuda.is_available())
print("GPU:", torch.cuda.get_device_name(0))
print("TensorRT Version:", trt.__version__)

# ✅ Download YOLOv8 Model
yolo_model = YOLO('yolov8s.pt')  # Using YOLOv8-Small
yolo_model.export(format="onnx")  # Convert to ONNX

# ✅ Convert ONNX to TensorRT
onnx_model_path = "yolov8s.onnx"
trt_engine_path = "yolov8s.trt"

TRT_LOGGER = trt.Logger(trt.Logger.WARNING)

def build_engine(onnx_path, engine_path):
    with trt.Builder(TRT_LOGGER) as builder, \
         builder.create_network(1) as network, \
         trt.OnnxParser(network, TRT_LOGGER) as parser:

        with open(onnx_path, "rb") as model:
            parser.parse(model.read())

        config = builder.create_builder_config()
        config.set_flag(trt.BuilderFlag.FP16)  # Enable FP16 Optimization

        # 🔹 Optimization profile
        profile = builder.create_optimization_profile()
        profile.set_shape("input", (1, 3, 640, 640), (1, 3, 640, 640), (1, 3, 640, 640))  # Min, Opt, Max shapes
        config.add_optimization_profile(profile)

        engine = builder.build_serialized_network(network, config)

        with open(engine_path, "wb") as f:
            f.write(engine)

    return engine

print("⚡ Converting ONNX to TensorRT...")
engine = build_engine(onnx_model_path, trt_engine_path)
print("✅ TensorRT model saved at:", trt_engine_path)

# ✅ Load an Image for Inference (Fixed URL Handling)
image_url = "https://ultralytics.com/images/zidane.jpg"

# 🔹 Fetch image from URL properly
resp = requests.get(image_url, stream=True).raw
image_array = np.asarray(bytearray(resp.read()), dtype=np.uint8)
img = cv2.imdecode(image_array, cv2.IMREAD_COLOR)

# 🔹 Preprocess Image
img = cv2.resize(img, (640, 640))
img = img.astype(np.float32) / 255.0
img = np.transpose(img, (2, 0, 1))[None, :, :, :]

# ✅ PyTorch Inference (Baseline)
device = "cuda" if torch.cuda.is_available() else "cpu"
yolo_model.to(device)
img_tensor = torch.tensor(img).to(device)

start_time = time.time()
pred = yolo_model(img_tensor)
end_time = time.time()
pytorch_inference_time= end_time - start_time
print(f"PyTorch Inference Time: {end_time - start_time:.4f} seconds")



Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
CUDA Available: True
GPU: NVIDIA A100-SXM4-40GB
TensorRT Version: 10.8.0.43
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8s.pt to 'yolov8s.pt'...


100%|██████████| 21.5M/21.5M [00:00<00:00, 138MB/s] 


Ultralytics 8.3.80 🚀 Python-3.11.11 torch-2.5.1+cu124 CPU (Intel Xeon 2.20GHz)
YOLOv8s summary (fused): 72 layers, 11,156,544 parameters, 0 gradients, 28.6 GFLOPs

[34m[1mPyTorch:[0m starting from 'yolov8s.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 84, 8400) (21.5 MB)
[31m[1mrequirements:[0m Ultralytics requirements ['onnxslim', 'onnxruntime-gpu'] not found, attempting AutoUpdate...
Collecting onnxslim
  Downloading onnxslim-0.1.48-py3-none-any.whl.metadata (4.6 kB)
Collecting onnxruntime-gpu
  Downloading onnxruntime_gpu-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (4.5 kB)
Downloading onnxslim-0.1.48-py3-none-any.whl (142 kB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 142.9/142.9 kB 4.1 MB/s eta 0:00:00
Downloading onnxruntime_gpu-1.20.1-cp311-cp311-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (291.5 MB)
   ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 291.5/291.5 MB 42.7 MB/s eta 0:00:00
Installing collected packages: onnxs

In [None]:
##Comparing the Inference Time with TensorRT inference time.

In [8]:
# ✅ TensorRT Inference
def infer_trt(engine_path, img):
    runtime = trt.Runtime(TRT_LOGGER)
    with open(engine_path, "rb") as f:
        engine = runtime.deserialize_cuda_engine(f.read())

    context = engine.create_execution_context()

    # ✅ Fix: Use `engine.get_binding_index()` and `engine.get_tensor_name()`
    binding_index = 0  # Usually input is at index 0
    binding_name = engine[binding_index]  # Get input tensor name
    input_shape = context.get_tensor_shape(binding_name)  # Corrected way

    img = np.ascontiguousarray(img).astype(np.float32)

    d_input = torch.from_numpy(img).cuda()
    d_output = torch.empty(*input_shape).cuda()

    bindings = [int(d_input.data_ptr()), int(d_output.data_ptr())]
    context.execute_v2(bindings)

    return d_output.cpu().numpy()




start_time = time.time()
trt_output = infer_trt(trt_engine_path, img)
end_time = time.time()
tensorrt_inference_time= end_time - start_time
print(f"TensorRT Inference Time: {end_time - start_time:.4f} seconds")

# ✅ Compare Results
speedup = (pytorch_inference_time) / (tensorrt_inference_time)
print(f"🚀 TensorRT Speed-up: {speedup:.2f}x Faster than PyTorch")


TensorRT Inference Time: 0.0437 seconds
🚀 TensorRT Speed-up: 83.07x Faster than PyTorch
