In [4]:
import torch
import torch.nn as nn
import cv2
import csv
import json
import torchvision.models as models
import numpy as np
import pandas as pd
import statistics
import threading
import math
import subprocess
from tqdm import tqdm
from torchvision import transforms
from object_tracking import *
import time
from PIL import Image 

In [12]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
model.eval();

class Image_Classifier(nn.Module):
    def init(self):
        super().init()
        self.model = nn.Sequential(
             Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
             Flatten(start_dim=1, end_dim=-1),
             Dropout(p=0.25, inplace=False),
             Linear(in_features=6272, out_features=132, bias=True),
             ReLU(),
             Dropout(p=0.5, inplace=False),
             Linear(in_features=132, out_features=11, bias=True),
        )

    def forward(self, x):
        return self.model(x)

color_classifier = torch.load("2layers_colour_model.pt")
color_classifier = color_classifier.cuda()
color_classifier.eval()

body_classifier = torch.load('model.pt')
body_classifier.eval();

Using cache found in C:\Users\bedo-/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-18 Python-3.9.16 torch-2.0.0 CUDA:0 (NVIDIA GeForce GTX 1660, 6144MiB)

Fusing layers... 


[31m[1mrequirements:[0m C:\Users\bedo-\.cache\torch\hub\requirements.txt not found, check failed.


YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [6]:
if __name__ == '__main__':
    video_path = 'light traffic.mp4'
    output_path = 'track_test.mp4'
    cap = cv2.VideoCapture(video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -preset fast -crf 30 -pix_fmt nv12 -an -vcodec libx264 {output_path}"

    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)
    
    mot_tracker = Sort(max_age=30, min_hits=60) 

    object_dict = {}

    frame_cut = 0
    frame_count = 0
    clf_state = False
#     color_thread = threading.Thread(target=color_classifier)
#     color_thread.start()

#     body_thread = threading.Thread(target=body_classifier)
#     body_thread.start()
    
    start = time.time()
    while True:
        ret, frame = cap.read()
        if not ret:
            print('Video processing completed')
            break

        frame_model = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

        results = model(frame_model[frame_cut:])

        track_bbs_ids = mot_tracker.update(results.xyxy[0][:,:4].cpu())
        
        for x1, y1, x2, y2, obj_id in track_bbs_ids:
            cx1 = int((x1 + x2) / 2)
            cy1 = int((y1 + y2) / 2)
            width = abs(x2 - x1)
            height = abs(y2 - y1)
            diagonal = math.sqrt(width**2 + height**2)
            
            
            if diagonal > 100 :
                try: #TODO optimize
                    car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    transform = transforms.Compose([transforms.ToTensor(),
                                                    transforms.Resize((32,32)),])
                    car_image = transform(car_image)
                    car_image = car_image.cuda()
                    
                    body_car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    transform = transforms.Compose([transforms.ToTensor(),
                                                    transforms.Resize((256,256)),])
                    body_car_image = transform(body_car_image)
                    body_car_image = body_car_image.cuda()
                except:
                    print('no object detected')
                    continue
                with torch.no_grad():
                    color_output = color_classifier(car_image.unsqueeze(0))
                    color_prediction = torch.argmax(color_output).item()
                    color_name = ['black','blue','brown','green','grey','orange','pink','purple','red','white','yellow']
                    color_class_name = color_name[color_prediction]

                    body_output = body_classifier(body_car_image.unsqueeze(0))
                    body_prediction = torch.argmax(body_output).item()
                    body_name = ['Heavy-Duty', 'Lorry', 'Luxury', 'Pickup', 'SUV', 'Sedan', 'Van']
                    body_class_name = body_name[body_prediction]
                    clf_state = True

            
                if obj_id not in object_dict:
                    object_dict[obj_id] = {
                        'bboxes': [(x1, y1, x2, y2)],
                        'frames':[frame_count],
                        'last_seen_frame': 0,
                        'color_classifier_preds': [color_prediction],
                        'body_classifier_preds': [body_prediction]
                    }
                else:
                    object_dict[obj_id]['bboxes'].append(( x1, y1, x2, y2))
                    object_dict[obj_id]['frames'].append((frame_count))
                    object_dict[obj_id]['color_classifier_preds'].append(color_prediction)
                    object_dict[obj_id]['body_classifier_preds'].append(body_prediction)

                object_dict[obj_id]['last_seen_frame'] = frame_count

                # Calculate the mode prediction of the classifier for the tracked object
                color_mode_pred = statistics.mode(object_dict[obj_id]['color_classifier_preds'])
                object_dict[obj_id]['color_mode_pred'] = str(color_name[color_mode_pred])

                body_mode_pred = statistics.mode(object_dict[obj_id]['body_classifier_preds'])
                object_dict[obj_id]['body_mode_pred'] = str(body_name[body_mode_pred])

            cv2.putText(frame, str(obj_id), (cx1, cy1), 0, 0.5, (255, 255, 255), 2)
            if clf_state == True:
                cv2.putText(frame, color_name[color_mode_pred], (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)
                cv2.putText(frame,  body_name[body_mode_pred], (int(cx1), int(y2)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)
                clf_state = False

        output_file.stdin.write(frame.tobytes())


        frame_count += 1
#     color_thread.join()
#     body_thread.join()
    cap.release()
    output_file.stdin.close()
    output_file.wait()
    end = time.time()
    
    print(end - start)
    
    with open("object_tracks.json", "w") as f:
        json.dump(object_dict, f, indent=4)
        

no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
no object detected
Video processing completed
26.762486934661865


In [7]:
results.xyxy[0][:,:4]

tensor([[1621.93909,  187.92599, 1765.44250,  283.95273],
        [1789.03381,  175.26503, 1919.26355,  277.11584],
        [ 830.76105,   86.43929,  874.58551,  127.79375],
        [   2.78865,  888.41809,  360.98422, 1078.31165],
        [1732.77502,   33.43250, 1788.35486,   68.20553],
        [ 862.72717,   18.53886,  900.86108,   49.74484],
        [ 995.87817,   73.63623, 1039.29956,  116.14877],
        [ 991.27368,   46.02561, 1035.75000,   96.28868],
        [1505.88318,    3.30035, 1546.27954,   25.46384],
        [1619.51416,  180.94038, 1920.00000,  277.43561]], device='cuda:0')

In [8]:
for x1,y1,x2,y2, obj_id in track_bbs_ids:
    print(x1,y1,x2,y2, obj_id)

1791.0547222461435 175.44359519107914 1924.1391523925397 279.381310085834 50.0
862.5076409704981 18.507258348607568 900.999890648785 50.0153013766313 46.0
1623.5478982117977 188.45009006717754 1768.832003121114 285.6204869503526 43.0
995.9485593610135 74.02485747631778 1039.6077471537883 116.63913170862818 41.0
830.4881467479073 86.60317803000335 874.670299832159 128.308336826105 40.0
-4.108739180834164 892.7713896274313 357.7180696017726 1086.2534768464468 23.0


In [9]:
def get_object_frames_and_last_seen(object_id, object_dict):
    obj_data = object_dict[object_id]
    frames = obj_data['frames']
    last_seen = obj_data['last_seen_frame']
    return frames, last_seen

In [10]:
def find_objects_by_prediction(color_mode, body_mode, object_dict):
    object_ids = []
    for obj_id, obj_data in object_dict.items():
        if obj_data['color_mode_pred'] == color_mode and obj_data['body_mode_pred'] == body_mode:
            object_ids.append(obj_id)
    return object_ids

In [11]:
color_mode = 'grey'
body_mode = 'Sedan'
data = open('object_tracks.json')
object_dict = json.load(data)

object_ids = find_objects_by_prediction(color_mode, body_mode, object_dict)
cap = cv2.VideoCapture(video_path)
for obj_id in object_ids:
    frames,bboxes = object_dict['frames'], object_dict['bboxes']
    centers = (bboxes[0] + bboxes[1])/2 + (bboxes[2] + bboxes[3])/2  
    cap.set(1,frames[0])
    ret, frame = cap.read()
    for frame, i in range(frames[0], frames[-1]), range(len(frames)):
        if frame == frames[i]:
            cv2.putText(frame, str(f"{obj_id} {color_mode} {body_mode}"), (cx1, cy1), 0, 0.5, (255, 255, 255), 2)
            
    cv2.imshow('z',frame)
    cv2.waitKey(0)
    print(f'Object {obj_id} appeared in frames {frames} and was last seen in frame {last_seen}.')

Object 4.0 appeared in frames [63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107] and was last seen in frame 107.
Object 26.0 appeared in frames [216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252] and was last seen in frame 252.


In [None]:
frames[0]

In [None]:
video_path = 'light traffic.mp4'
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cv2.startWindowThread()
cv2.namedWindow("preview")
cv2.imshow("preview", img)
cv2.waitKey()

In [None]:
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cv2.imshow('s',frame)

In [19]:
object_dict['1.0']['color_mode_pred']

'green'

In [50]:
video_path

'light traffic.mp4'

In [14]:
centers = (1,2)

In [18]:
*centers

SyntaxError: can't use starred expression here (4107792713.py, line 1)