In [1]:
import torch
import torch.nn as nn
import cv2
import csv
import json
import torchvision.models as models
import numpy as np
import pandas as pd
import statistics
import threading
import math
import subprocess
from tqdm import tqdm
from torchvision import transforms
from object_tracking import *
import time
from PIL import Image 

In [2]:
model = torch.hub.load('ultralytics/yolov5', 'yolov5m', pretrained=True)
model.eval();

class Image_Classifier(nn.Module):
    def init(self):
        super().init()
        self.model = nn.Sequential(
             Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1)),
             ReLU(),
             MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
             Flatten(start_dim=1, end_dim=-1),
             Dropout(p=0.25, inplace=False),
             Linear(in_features=6272, out_features=132, bias=True),
             ReLU(),
             Dropout(p=0.5, inplace=False),
             Linear(in_features=132, out_features=11, bias=True),
        )

    def forward(self, x):
        return self.model(x)

color_classifier = torch.load("2layers_colour_model.pt")
color_classifier = color_classifier.cuda()
color_classifier.eval()

body_classifier = torch.load('model.pt')
body_classifier.eval();

Using cache found in C:\Users\ahmed/.cache\torch\hub\ultralytics_yolov5_master
YOLOv5  2023-4-18 Python-3.9.13 torch-2.0.0+cu118 CUDA:0 (NVIDIA GeForce GTX 1660 SUPER, 6144MiB)



[31m[1mrequirements:[0m C:\Users\ahmed\.cache\torch\hub\requirements.txt not found, check failed.


Fusing layers... 
YOLOv5m summary: 290 layers, 21172173 parameters, 0 gradients
Adding AutoShape... 


In [3]:
if __name__ == '__main__':
    video_path = 'light traffic.mp4'
    output_path = 'track_test.mp4'
    cap = cv2.VideoCapture(video_path)

    fps = cap.get(cv2.CAP_PROP_FPS)
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -preset fast -crf 30 -pix_fmt nv12 -an -vcodec libx264 {output_path}"

    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)
    
    mot_tracker = Sort(max_age=30, min_hits=60) 

    object_dict = {}

    frame_count = 0
    clf_state = False 


    transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((32,32)),]) 
    body_transform = transforms.Compose([transforms.ToTensor(), transforms.Resize((256,256)),])
    
    batch_images = []
    batch_body_images = []
    
    color_name = ['black','blue','brown','green','grey','orange','pink','purple','red','white','yellow']
    body_name = ['Heavy-Duty', 'Lorry', 'Luxury', 'Pickup', 'SUV', 'Sedan', 'Van']

    start = time.time()     
    while True:         
        ret, frame = cap.read()        
        if not ret:             
            print('Video processing completed')            
            break  

        frame_model = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB) 
        results = model(frame_model)

        track_bbs_ids = mot_tracker.update(results.xyxy[0][:,:4].cpu())

        for x1, y1, x2, y2, obj_id in track_bbs_ids:
            cx1 = int((x1 + x2) / 2)
            cy1 = int((y1 + y2) / 2)
            width = abs(x2 - x1)
            height = abs(y2 - y1)
            diagonal = math.sqrt(width**2 + height**2)
            if obj_id not in object_dict:
                object_dict[obj_id] = {
                    'bboxes': [(x1, y1, x2, y2)],
                    'frames':[frame_count],
                    'color_classifier_preds': [],
                    'body_classifier_preds': [],
                    'color_mode_pred':str(),
                    'body_mode_pred':str()}

            if diagonal > 100:
                
                try:
                    car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    car_image = transform(car_image)
                    car_image = car_image.cuda()

                    body_car_image = frame_model[int(y1):int(y2), int(x1):int(x2)]
                    body_car_image = body_transform(body_car_image)
                    body_car_image = body_car_image.cuda()

                    batch_images.append(car_image)
                    batch_body_images.append(body_car_image)
                    print(len(batch_images))
                except:
                    print('no object detected')
                    continue

                if len(batch_images) >= 37:
                    with torch.no_grad():
                        color_output = color_classifier(torch.stack(batch_images).cuda())
                        color_prediction = torch.argmax(color_output, dim=1)
                    batch_images.clear()

                if len(batch_body_images) >= 37:
                    with torch.no_grad():
                        body_output = body_classifier(torch.stack(batch_body_images).cuda())
                        body_prediction = torch.argmax(body_output, dim=1)
                        clf_state = True
                    batch_body_images.clear()
                    if obj_id not in object_dict:
                        object_dict[obj_id] = {
                            'bboxes': [(x1, y1, x2, y2)],
                            'frames':[frame_count],
                            'last_seen_frame': 0,
                            'color_classifier_preds': [tuple(color_prediction.cpu().numpy())],
                            'body_classifier_preds': [tuple(body_prediction.cpu().numpy())]
                        }
                    else:
                        object_dict[obj_id]['bboxes'].append(( x1, y1, x2, y2))
                        object_dict[obj_id]['frames'].append((frame_count))
                        object_dict[obj_id]['color_classifier_preds'].append(tuple(color_prediction.cpu().numpy()))
                        object_dict[obj_id]['body_classifier_preds'].append(tuple(body_prediction.cpu().numpy()))
                    object_dict[obj_id]['last_seen_frame'] = frame_count

                    color_mode_pred = statistics.mode(object_dict[obj_id]['color_classifier_preds'])
                    color_mode_pred = int(color_mode_pred[0])
                    object_dict[obj_id]['color_mode_pred'] = color_name[color_mode_pred]

                    body_mode_pred = statistics.mode(object_dict[obj_id]['body_classifier_preds'])
                    body_mode_pred = int(body_mode_pred[0])
                    object_dict[obj_id]['body_mode_pred'] = body_name[body_mode_pred]
                    with open("object_tracks.json", "w") as f:        
                        json.dump(object_dict, f, indent=4, default=lambda x: x.tolist())

            cv2.putText(frame, str(obj_id), (cx1, cy1), 0, 0.5, (255, 255, 255), 2)
            if clf_state == True:               
                cv2.putText(frame, color_name[color_mode_pred], (int(x1), int(y1)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)                 
                cv2.putText(frame, body_name[body_mode_pred], (int(cx1), int(y2)), cv2.FONT_HERSHEY_SIMPLEX, 1, (203, 192, 255), 2)               
                clf_state = False    
                    
        output_file.stdin.write(frame.tobytes())   
        frame_count += 1
    cap.release() 
    output_file.stdin.close()  
    output_file.wait()
    end = time.time()     
    print(end - start)  
    

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
3

In [7]:
results.xyxy[0][:,:4]

tensor([[1621.93909,  187.92599, 1765.44250,  283.95273],
        [1789.03381,  175.26503, 1919.26355,  277.11584],
        [ 830.76105,   86.43929,  874.58551,  127.79375],
        [   2.78865,  888.41809,  360.98422, 1078.31165],
        [1732.77502,   33.43250, 1788.35486,   68.20553],
        [ 862.72717,   18.53886,  900.86108,   49.74484],
        [ 995.87817,   73.63623, 1039.29956,  116.14877],
        [ 991.27368,   46.02561, 1035.75000,   96.28868],
        [1505.88318,    3.30035, 1546.27954,   25.46384],
        [1619.51416,  180.94038, 1920.00000,  277.43561]], device='cuda:0')

In [8]:
for x1,y1,x2,y2, obj_id in track_bbs_ids:
    print(x1,y1,x2,y2, obj_id)

1791.0547222461435 175.44359519107914 1924.1391523925397 279.381310085834 50.0
862.5076409704981 18.507258348607568 900.999890648785 50.0153013766313 46.0
1623.5478982117977 188.45009006717754 1768.832003121114 285.6204869503526 43.0
995.9485593610135 74.02485747631778 1039.6077471537883 116.63913170862818 41.0
830.4881467479073 86.60317803000335 874.670299832159 128.308336826105 40.0
-4.108739180834164 892.7713896274313 357.7180696017726 1086.2534768464468 23.0


In [4]:
def get_object_frames_and_last_seen(object_id, object_dict):
    obj_data = object_dict[object_id]
    frames = obj_data['frames']
    last_seen = obj_data['last_seen_frame']
    return frames, last_seen

In [5]:
def find_objects_by_prediction(color_mode, body_mode, object_dict):
    object_ids = []
    for obj_id, obj_data in object_dict.items():
        if obj_data['color_mode_pred'] == color_mode and obj_data['body_mode_pred'] == body_mode:
            object_ids.append(obj_id)
    return object_ids

In [16]:
def car_search(json_file, input_video, output_video='test.mp4',car_colour ='', car_body = ''):
    """
    Search for car's body and colour in a given video using the video's json file
    json_file : a file which contains all information about cars in the given videos
                where the value of the json is the car's ID
    """

    data = open('object_tracks.json')
    object_dict = json.load(data)

    object_ids = find_objects_by_prediction(car_colour, car_body, object_dict)
    cap = cv2.VideoCapture(input_video)
    ffmpeg_cmd = f"ffmpeg -y -f rawvideo -pix_fmt bgr24 -s {frame_width}x{frame_height} -r {fps} -i - -c:v libx264 -preset fast -crf 30 -pix_fmt nv12 -an -vcodec libx264 {output_video}"
    output_file = subprocess.Popen(ffmpeg_cmd.split(' '), stdin=subprocess.PIPE)


    for obj_id in object_ids:
        frames , bboxes = object_dict[obj_id]['frames'], object_dict[obj_id]['bboxes']

        cap.set(1,frames[1])
        j = 0
        for i in range(frames[1],frames[-1]):
            bbox = bboxes[j]
            ret, frame = cap.read()
            if not ret:
                print('Error, frame doesn\'t exist')
                break

            if i == frames[j]:
                x,y = int((bbox[0] + bbox[2])/2 ), int( (bbox[1] + bbox[3])/2)
                cv2.putText(frame,  f'{obj_id}, {car_colour}, {car_body}', (int(x), int(y)),
                            cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 0), 2)
                print(x,y)
            output_file.stdin.write(frame.tobytes())
            j+=1
        print(f'Object {obj_id} appeared in frames {frames}.')



    cap.release()
    output_file.stdin.close()
    output_file.wait()

In [19]:
car_search('tracked_objects.json','light traffic.mp4','test.mp4','grey','Sedan')

Object 144.0 appeared in frames [0, 44].
Object 186.0 appeared in frames [294, 310].
Object 193.0 appeared in frames [334, 334].


In [None]:
video_path = 'light traffic.mp4'
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cv2.startWindowThread()
cv2.namedWindow("preview")
cv2.imshow("preview", img)
cv2.waitKey()

In [None]:
cap = cv2.VideoCapture(video_path)
ret, frame = cap.read()
cv2.imshow('s',frame)

In [19]:
object_dict['1.0']['color_mode_pred']

'green'

In [50]:
video_path

'light traffic.mp4'

In [14]:
centers = (1,2)

In [18]:
*centers

SyntaxError: can't use starred expression here (4107792713.py, line 1)