In [4]:
import torch
import torch.nn as nn
import cv2
import csv
import json
import torchvision.models as models
import numpy as np
import pandas as pd
import statistics
import threading
import math
import subprocess
from tqdm import tqdm
from torchvision import transforms
from object_tracking import *
import time

In [5]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
model.eval();

class Image_Classifier(nn.Module):
    def init(self):
        super().init()
        self.model = nn.Sequential(
             Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
             Flatten(start_dim=1, end_dim=-1),
             Dropout(p=0.25, inplace=False),
             Linear(in_features=6272, out_features=132, bias=True),
             ReLU(),
             Dropout(p=0.5, inplace=False),
             Linear(in_features=132, out_features=11, bias=True),
        )

    def forward(self, x):
        return self.model(x)

color_classifier = torch.load("2layers_colour_model.pt")
color_classifier = color_classifier.cuda()
color_classifier.eval()

body_classifier = torch.load('model.pt')
body_classifier.eval()

Using cache found in C:\Users\ahmed/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-18 Python-3.9.13 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 SUPER, 6144MiB)

Fusing layers... 


[31m[1mrequirements:[0m C:\Users\ahmed\.cache\torch\hub\requirements.txt not found, check failed.


YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


EfficientNet(
  (features): Sequential(
    (0): Conv2dNormActivation(
      (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
      (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (2): SiLU(inplace=True)
    )
    (1): Sequential(
      (0): MBConv(
        (block): Sequential(
          (0): Conv2dNormActivation(
            (0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
            (1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
            (2): SiLU(inplace=True)
          )
          (1): SqueezeExcitation(
            (avgpool): AdaptiveAvgPool2d(output_size=1)
            (fc1): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (fc2): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (activation): SiLU(inplace=True)
            (scale_activation): Sigmoid()
          )
          (2): Conv2dNormActivat

In [8]:
if __name__ == '__main__':
    video_path = 'light traffic.mp4'
    output_path = 'track_test.mp4'
    cap = cv2.VideoCapture(video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -preset fast -crf 30 -pix_fmt nv12 -an -vcodec libx264 {output_path}"

    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)
    
    mot_tracker = Sort(max_age=30, min_hits=60) 

    object_dict = {}

    frame_cut = 0
    frame_count = 0
    clf_state = False
#     color_thread = threading.Thread(target=color_classifier)
#     color_thread.start()

#     body_thread = threading.Thread(target=body_classifier)
#     body_thread.start()
    
    start = time.time()
    while True:
        ret, frame = cap.read()
        if not ret:
            print('Video processing completed')
            break

        frame_model = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = model(frame_model[frame_cut:])

        track_bbs_ids = mot_tracker.update(results.xyxy[0][:,:4].cpu())
        
        
        for x1, y1, x2, y2, obj_id in track_bbs_ids:
            cx1 = int((x1 + x2) / 2)
            cy1 = int((y1 + y2) / 2)
            width = abs(x2 - x1)
            height = abs(y2 - y1)
            diagonal = math.sqrt(width**2 + height**2)
            if obj_id not in object_dict:
                    object_dict[obj_id] = {
                        'bboxes': [(x1, y1, x2, y2)],
                        'frames': [frame_count],
                        'last_seen_frame': 0,
                        'color_classifier_preds': [],
                        'body_classifier_preds': []
                    }
            
            if diagonal > 300 :
                try:
                    car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    transform = transforms.Compose([transforms.ToTensor(),
                                                    transforms.Resize((32,32)),])
                    car_image = transform(car_image)
                    car_image = car_image.cuda()
                except:
                    print('no object detected')
                    continue
                with torch.no_grad():
                    color_output = color_classifier(car_image.unsqueeze(0))
                    color_prediction = torch.argmax(color_output).item()
                    color_name = ['black','blue','brown','green','grey','orange','pink','purple','red','white','yellow']
                    color_class_name = color_name[color_prediction]

                try:
                    body_car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    transform = transforms.Compose([transforms.ToTensor(),
                                                    transforms.Resize((256,256)),])
                    body_car_image = transform(body_car_image)
                    body_car_image = body_car_image.cuda()
                except:
                    print('no object detected')
                    continue

                with torch.no_grad():
                    body_output = body_classifier(body_car_image.unsqueeze(0))
                    body_prediction = torch.argmax(body_output).item()
                    body_name = ['Heavy-Duty', 'Lorry', 'Luxury', 'Pickup', 'SUV', 'Sedan', 'Van']
                    body_class_name = body_name[body_prediction]
                    clf_state = True

            
                if obj_id not in object_dict:
                    object_dict[obj_id] = {
                        'bboxes': [(x1, y1, x2, y2)],
                        'frames':[frame_count],
                        'last_seen_frame': 0,
                        'color_classifier_preds': [color_prediction],
                        'body_classifier_preds': [body_prediction]
                    }
                else:
                    object_dict[obj_id]['bboxes'].append(( x1, y1, x2, y2))
                    object_dict[obj_id]['frames'].append((frame_count))
                    object_dict[obj_id]['color_classifier_preds'].append(color_prediction)
                    object_dict[obj_id]['body_classifier_preds'].append(body_prediction)

                object_dict[obj_id]['last_seen_frame'] = frame_count

                # Calculate the mode prediction of the classifier for the tracked object
                color_mode_pred = statistics.mode(object_dict[obj_id]['color_classifier_preds'])
                object_dict[obj_id]['color_mode_pred'] = str(color_name[color_mode_pred])

                body_mode_pred = statistics.mode(object_dict[obj_id]['body_classifier_preds'])
                object_dict[obj_id]['body_mode_pred'] = str(body_name[body_mode_pred])

            cv2.putText(frame, str(obj_id), (cx1, cy1), 0, 0.5, (255, 255, 255), 2)
            if clf_state == True:
                cv2.putText(frame, color_name[color_mode_pred], (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)
                cv2.putText(frame,  body_name[body_mode_pred], (int(cx1), int(y2)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)
                clf_state = False

        output_file.stdin.write(frame.tobytes())


        frame_count += 1
#     color_thread.join()
#     body_thread.join()
    cap.release()
    output_file.stdin.close()
    output_file.wait()
    end = time.time()
    
    print(end - start)
    
    with open("object_tracks.json", "w") as f:
        json.dump(object_dict, f, indent=4)
        

no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
Video processing completed
17.02169966697693


In [None]:
results.xyxy[0][:,:4]

In [None]:
for x1,y1,x2,y2, obj_id in track_bbs_ids:
    print(x1,y1,x2,y2, obj_id)

In [None]:
import torchvision.transforms as T
from PIL import Image
transform = T.ToPILImage()
img = transform(car_image)
img.show()
output=classifier(car_image.unsqueeze(0))
prediction = torch.argmax(output).item()
print(prediction)

In [14]:
def find_objects_by_prediction(color_mode, body_mode, object_dict):
    object_ids = []
    for obj_id, obj_data in object_dict.items():
        if obj_data['color_mode_pred'] == color_mode and obj_data['body_mode_pred'] == body_mode:
            object_ids.append(obj_id)
    return object_ids

In [15]:
def get_object_frames_and_last_seen(object_id, object_dict):
    obj_data = object_dict[object_id]
    frames = obj_data['frames']
    last_seen = obj_data['last_seen_frame']
    return frames, last_seen

In [19]:
color_mode = 'red'
body_mode = 'Sedan'
data = open('object_tracks.json')
object_dict = json.load(data)
print(object_dict)
object_ids = find_objects_by_prediction(color_mode, body_mode, object_dict)
for obj_id in object_ids:
    frames, last_seen = get_object_frames_and_last_seen(obj_id, object_dict)
    print(f'Object {obj_id} appeared in frames {frames} and was last seen in frame {last_seen}.')

{'131.0': {'bboxes': [[1367.3381959036108, 16.32118589066346, 1403.1466673776392, 39.76498049117248]], 'frames': [0], 'last_seen_frame': 0, 'color_classifier_preds': [], 'body_classifier_preds': []}, '130.0': {'bboxes': [[910.9993898426983, 4.266929583372277, 926.8303220713642, 18.809503598390418], [859.2452365962313, 491.6265268149396, 1044.1579959386224, 732.5995086592402], [853.7285966096248, 522.4345149546751, 1048.6957148986871, 779.6076002068709], [847.6843444609761, 556.517152449702, 1052.7016148128225, 831.2031668875265], [844.8932896485501, 594.1583298166172, 1059.4849300909602, 890.3076378794478], [842.3038286308769, 604.1532211281492, 1063.385270653494, 915.8309895279772], [838.6403771647651, 634.4012940514706, 1067.3267651274223, 965.2335016752222], [833.4128423363992, 678.1479851224151, 1074.0273940511797, 1032.155246777155], [829.5590097318333, 713.8605170573517, 1081.3369021066787, 1069.9403238275956], [824.0834768773373, 734.9445728840391, 1088.7259741533173, 1087.62820

KeyError: 'color_mode_pred'