In [1]:
import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import cv2
import csv
import time
import pandas as pd
import numpy as np
import os
from PIL import Image, ImageDraw
from pathlib import Path
from tqdm import tqdm
import subprocess
import torch.nn as nn
from sort.tracker import Sort
from scipy.spatial.distance import cdist
from filterpy.kalman import KalmanFilter

In [2]:
# Load YOLOv5 model
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
model.eval()
class Image_Classifier(nn.Module):
    def __init__(self):
        super().__init__()
        self.model = nn.Sequential(
            nn.Conv2d(3, 32,(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32, 64,(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(64, 128,(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(128, 256,(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(256, 256,(3,3)),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Flatten(),
            nn.Linear(9216, 10),
        )
        # Define the class labels
        self.classes = ['Bus', 'Heavy-Duty', 'Lorry', 'Luxury', 'Motorbike', 'Pickup', 'SUV', 'Sedan', 'Three Wheel', 'Van']
            
    def forward(self, x):
        return self.model(x)
# Load Bedo model for car classification
classifier = Image_Classifier()
classifier.load_state_dict(torch.load("Bedo_model.pt"))
classifier = classifier.cuda()
classifier.eval()

Using cache found in C:\Users\ahmed/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-18 Python-3.9.13 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 SUPER, 6144MiB)



[31m[1mrequirements:[0m C:\Users\ahmed\.cache\torch\hub\requirements.txt not found, check failed.


Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


Image_Classifier(
  (model): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1))
    (10): ReLU()
    (11): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1))
    (13): ReLU()
    (14): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (15): Flatten(start_dim=1, end_dim=-1)
    (16): Linear(in_features=9216, out_features=10, bias=True)
  )
)

In [9]:
def detect_Bedo(video_path, output_path):
    # Check that the models are loaded correctly
    if not model:
        print("Error: could not load YOLOv5 model")
        return
    if not classifier:
        print("Error: could not load Bedo model")
        return

    # Check that the input video file exists and is readable
    if not os.path.isfile(video_path):
        print(f"Error: input video file {video_path} does not exist or is not readable")
        return

    # Open the video file
    cap = cv2.VideoCapture(video_path)

    # Get the frames per second and frame size of the video
    fps = cap.get(cv2.CAP_PROP_FPS) 
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH)) 
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))  

    # Define the ffmpeg command to write the output video  
    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -crf 18 -preset veryfast {output_path}"  

    # Open the output file for writing  
    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)  


    # Loop through the frames of the video
    while cap.isOpened():
        # Read the next frame from the video
        ret, frame = cap.read()

        # If there are no more frames, break out of the loop
        if not ret:
            break

        # Run the frame through the YOLOv5 model
        yolov5_results = model(frame)

        # Check that the results contain valid bounding boxes
        if len(yolov5_results.xyxy[0]) == 0:
            continue

        # Loop through the bounding boxes and classify the cars
        for label, box, conf in zip(yolov5_results.xyxy[0][:, -1], yolov5_results.xyxy[0][:, :4], yolov5_results.xyxy[0][:, 4]):
            box = [round(i, 2) for i in box.tolist()]
            class_label = model.names[int(label)]

            # Check if the class label is "car", "truck", "bus", "bicycle", or "motorcycle"
            if class_label in ["car", "truck", "bus", "bicycle", "motorcycle"]:
                # Ensure that the bounding box is within the bounds of the frame
                x1, y1, x2, y2 = box
                x1, y1, x2, y2 = max(0, int(x1)), max(0, int(y1)), min(frame.shape[1], int(x2)), min(frame.shape[0], int(y2))
                if x1 >= x2 or y1 >= y2:
                    continue

                # Crop the car image from the frame
                car_image = frame[y1:y2, x1:x2]

                # Resize the car image to 256x256 using OpenCV functions
                car_image = cv2.resize(car_image, (256, 256), interpolation=cv2.INTER_LINEAR)

                # Convert the car image to a PyTorch tensor
                car_image = torch.from_numpy(car_image).permute(2, 0, 1).float().cuda()
                car_image /= 255.0
                
                # Classify the car image using the Bedo model
                with torch.no_grad():
                    output = classifier(car_image.unsqueeze(0))
                    prediction = torch.argmax(output).item()
                    class_name = classifier.classes[prediction]

                # Draw the bounding box and class label on the frame
                font_scale = 0.5
                thickness = 1  # Set the thickness to a larger value for a bolder font
                cv2.rectangle(frame, (x1, y1), (x2, y2), (0, 0, 255), 2)
                cv2.putText(frame, class_name, (x1, y1), cv2.FONT_HERSHEY_SIMPLEX, font_scale, (0, 0, 0), thickness+1)

        # Write the frame to the output video
        output_file.stdin.write(frame.tobytes()) 

    # Release the video file and close the output video
    cap.release() 
    output_file.stdin.close() 
    output_file.wait()

In [10]:
detect_Bedo('india.mp4','test2.mp4')