Ultralytics libraries

In [2]:
# Change the current working directory to 'ultralytics'
%cd ultralytics

# Install the 'ultralytics' package
!pip install ultralytics

# Import the 'ultralytics' package
import ultralytics

# Run checks to ensure the 'ultralytics' package and its dependencies are properly installed
ultralytics.checks()

Ultralytics YOLOv8.2.32 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
Setup complete ✅ (2 CPUs, 12.7 GB RAM, 30.2/78.2 GB disk)


**Input required libraries**

In [3]:
import cv2  # OpenCV library for image and video processing

from ultralytics import YOLO   # Ultralytics YOLO library for object detection

import yaml # PyYAML library for parsing YAML configuration files


In [1]:
from google.colab import drive
# drive.flush_and_unmount()  # Unmount Google Drive if already mounted
drive.mount('/content/drive')


Mounted at /content/drive


**Train on custom dataset**

In [None]:
model = YOLO('yolov8n.pt')

# Train the model using the specified configuration file and settings
# - data: Path to the dataset configuration file
# - epochs: Number of training epochs
# - project: Directory to save training results

result = model.train(data ="/content/drive/MyDrive/image processing/config.yaml",epochs = 50,project = '/content/drive/MyDrive/image processing/image processing/image processing/runs/detect')

**Optimize using TensorRT**

In [None]:
#install required lib

# Install TensorRT, a high-performance deep learning inference library by NVIDIA
!pip install tensorrt

# Install TensorRT Lean, a leaner wrapper around TensorRT to simplify its usage
!pip install tensorrt_lean

# Install TensorRT Dispatch, a library for managing TensorRT model dispatching
!pip install tensorrt_dispatch

# Install ONNX, a framework to represent deep learning models
# Install ONNX Simplifier, a tool to simplify ONNX models
# Install ONNX Runtime GPU, a high-performance runtime for executing ONNX models on GPUs
!pip install onnx onnxsim onnxruntime-gpu

Collecting tensorrt
  Downloading tensorrt-10.0.1.tar.gz (16 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorrt-cu12 (from tensorrt)
  Downloading tensorrt-cu12-10.0.1.tar.gz (18 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: tensorrt, tensorrt-cu12
  Building wheel for tensorrt (setup.py) ... [?25l[?25hdone
  Created wheel for tensorrt: filename=tensorrt-10.0.1-py2.py3-none-any.whl size=16331 sha256=dc4702dd470a1e044f713fe9192e17b3f6145f7b7902e617a9d366cbab7fd56a
  Stored in directory: /root/.cache/pip/wheels/43/0e/35/f5de9a100ae769ece311b8f653e493f7e49bbcc25dcdc92e27
  Building wheel for tensorrt-cu12 (setup.py) ... [?25l[?25hdone
  Created wheel for tensorrt-cu12: filename=tensorrt_cu12-10.0.1-py2.py3-none-any.whl size=17551 sha256=e6613d9c600ce91dee3c5f748f1e9b0a6282047c68c13ba695548dd23b06050e
  Stored in directory: /root/.cache/pip/wheels/3d/65/7d/75f1bf55d7e57b0c82cb037d1d39d53851946315222eaf4c76
Suc

In [15]:
# Load the trained YOLOv8 model from the specified path
model = YOLO("/content/drive/MyDrive/image processing/runs/detect/train9/weights/best.pt")

# Export the trained model to TensorRT format for optimized inference
# - format: Specifies the export format ('engine' for TensorRT)
# - device: Specifies the device (0 for CPU in this context)
model.export(format="engine", device=0)  # creates 'best.engine'


Ultralytics YOLOv8.2.32 🚀 Python-3.10.12 torch-2.3.0+cu121 CUDA:0 (Tesla T4, 15102MiB)
Model summary (fused): 168 layers, 3005843 parameters, 0 gradients, 8.1 GFLOPs

[34m[1mPyTorch:[0m starting from '/content/drive/MyDrive/image processing/image processing/image processing/runs/detect/train9/weights/best.pt' with input shape (1, 3, 640, 640) BCHW and output shape(s) (1, 5, 8400) (5.9 MB)

[34m[1mONNX:[0m starting export with onnx 1.16.1 opset 17...
[34m[1mONNX:[0m export success ✅ 0.8s, saved as '/content/drive/MyDrive/image processing/image processing/image processing/runs/detect/train9/weights/best.onnx' (11.7 MB)

[34m[1mTensorRT:[0m starting export with TensorRT 8.4.3.1...
[34m[1mTensorRT:[0m input "images" with shape(1, 3, 640, 640) DataType.FLOAT
[34m[1mTensorRT:[0m output "output0" with shape(1, 5, 8400) DataType.FLOAT
[34m[1mTensorRT:[0m building FP32 engine as /content/drive/MyDrive/image processing/image processing/image processing/runs/detect/train9/wei

'/content/drive/MyDrive/image processing/image processing/image processing/runs/detect/train9/weights/best.engine'

**Test on Image**

In [16]:
# Load the trained YOLOv8 model from the specified path
model = YOLO('/content/drive/MyDrive/image processing/runs/detect/train9/weights/best.pt')

# Path to the input image for object detection
image_path = '/content/drive/MyDrive/image processing/image1.jpg'

# Read the input image from the specified path
image = cv2.imread(image_path)

# Perform object detection on the input image
# - source: The image to perform detection on
# - save: Save the output image with detected objects (set to True)
results = model.predict(source=image_path, save=True)



image 1/1 /content/drive/MyDrive/image processing/image processing/image processing/image1.jpg: 448x640 8 Humans, 14.2ms
Speed: 3.2ms preprocess, 14.2ms inference, 4.1ms postprocess per image at shape (1, 3, 448, 640)
Results saved to [1mruns/detect/predict2[0m


**Test on Video**

In [18]:



# Load the pre-trained YOLOv8 model
# model = YOLO('/content/drive/MyDrive/image processing/yolov8n.pt')
# model = YOLO ('/content/drive/MyDrive/image processing/runs/detect/train11/weights/best.pt')
model = YOLO ('/content/drive/MyDrive/image processing/runs/detect/train9/weights/best.onnx')


# Path to the video
video_path = '/content/drive/MyDrive/image processing/videoplayback.mp4.crdownload'
# video_path = '/content/drive/MyDrive/image processing/853889-hd_1920_1080_25fps.mp4'
output_path = '/content/drive/MyDrive/image processing/output/output_video7.mp4'

# Open video file
cap = cv2.VideoCapture(video_path)

# Check if video opened successfully
if not cap.isOpened():
    print("Error: Could not open video.")
    exit()

# Get the width and height of the video frames
frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc(*'mp4v')  # You can use other codecs like 'XVID' or 'MJPG'
out = cv2.VideoWriter(output_path, fourcc, 20.0, (frame_width, frame_height))

# Process video frames
while cap.isOpened():
    ret, frame = cap.read()
    if not ret:
        break

    # Perform object detection on the frame
    results = model.predict(source=frame, save=False)  # Use save=False for frames

    # Draw bounding boxes around detected objects
    for result in results:
        boxes = result.boxes.data.cpu().numpy()  # Get boxes as numpy array
        for box in boxes:
            x1, y1, x2, y2, score, class_id = box
            if int(class_id) == 0:  # Assuming '0' is the class ID for 'person'
                cv2.rectangle(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0, 255, 0), 2)
                cv2.putText(frame, 'Person', (int(x1), int(y1) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 2)

    # Write the frame into the output video file
    out.write(frame)

    # Display the resulting frame (optional)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the video capture and writer objects, and close display window
cap.release()
out.release()
cv2.destroyAllWindows()
print("Processing complete!")


Loading /content/drive/MyDrive/image processing/image processing/image processing/runs/detect/train9/weights/best.onnx for ONNX Runtime inference...

0: 640x640 1 Human, 176.3ms
Speed: 2.2ms preprocess, 176.3ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 143.1ms
Speed: 2.3ms preprocess, 143.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 139.4ms
Speed: 1.9ms preprocess, 139.4ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 155.3ms
Speed: 2.2ms preprocess, 155.3ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 152.2ms
Speed: 2.2ms preprocess, 152.2ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 155.4ms
Speed: 1.9ms preprocess, 155.4ms inference, 2.1ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 1 Human, 144.5ms
Speed: 2.0ms preprocess, 144.5ms inference, 1.6ms postproces