In [10]:
import time
import os
import torch
import cv2
import numpy as np
from deep_sort_realtime.deepsort_tracker import DeepSort

In [13]:
model = torch.hub.load(r'C:/Users/Hanish/.cache/torch/hub/ultralytics_yolov5_master', 'custom', path='yolov5s.pt', force_reload=True, source='local') 


YOLOv5  2024-11-3 Python-3.11.7 torch-2.3.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 6140MiB)



Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


In [16]:
model.names

{0: 'person',
 1: 'bicycle',
 2: 'car',
 3: 'motorcycle',
 4: 'airplane',
 5: 'bus',
 6: 'train',
 7: 'truck',
 8: 'boat',
 9: 'traffic light',
 10: 'fire hydrant',
 11: 'stop sign',
 12: 'parking meter',
 13: 'bench',
 14: 'bird',
 15: 'cat',
 16: 'dog',
 17: 'horse',
 18: 'sheep',
 19: 'cow',
 20: 'elephant',
 21: 'bear',
 22: 'zebra',
 23: 'giraffe',
 24: 'backpack',
 25: 'umbrella',
 26: 'handbag',
 27: 'tie',
 28: 'suitcase',
 29: 'frisbee',
 30: 'skis',
 31: 'snowboard',
 32: 'sports ball',
 33: 'kite',
 34: 'baseball bat',
 35: 'baseball glove',
 36: 'skateboard',
 37: 'surfboard',
 38: 'tennis racket',
 39: 'bottle',
 40: 'wine glass',
 41: 'cup',
 42: 'fork',
 43: 'knife',
 44: 'spoon',
 45: 'bowl',
 46: 'banana',
 47: 'apple',
 48: 'sandwich',
 49: 'orange',
 50: 'broccoli',
 51: 'carrot',
 52: 'hot dog',
 53: 'pizza',
 54: 'donut',
 55: 'cake',
 56: 'chair',
 57: 'couch',
 58: 'potted plant',
 59: 'bed',
 60: 'dining table',
 61: 'toilet',
 62: 'tv',
 63: 'laptop',
 64: 'mou

In [7]:
os.path.dirname(os.path.realpath('.'))

'C:\\Users\\Hanish\\Desktop\\IE643'

In [8]:
os.path.realpath('.')

'C:\\Users\\Hanish\\Desktop\\IE643\\main'

In [9]:
os.chdir('..')
os.path.realpath('.')

'C:\\Users\\Hanish\\Desktop\\IE643'

In [11]:
class YoloDectector():
    def __init__(self, model_name) -> None:
        self.model = self.load_model(model_name)
        self.classes = self.model.names
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print('Using {} device'.format(self.device))
        
    def load_model(self, model_name):
        if model_name:
            model = torch.hub.load('ultralytics/yolov5', 'custom', path=model_name, force_reload=True)
        else:
            model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, force_reload=True)
        return model
    
    
    def score_frame(self, frame):
        self.model.to(self.device)
        downscale_factor = 2
        width = int(frame.shape[1] / downscale_factor)
        height = int(frame.shape[0] / downscale_factor)
        frame = cv2.resize(frame, (width, height))
        
        results = self.model(frame)
        labels, cord = results.xyxyn[0][:, -1], results.xyxyn[0][:, :-1]
        return labels, cord
    
    def class_to_label(self, x):
        return self.classes[int(x)]
    
    def plot_boxes(self, results, frame, height, width, confidence):
        labels, cord = results
        detections = []
        
        n = len(labels)
        x_shape, y_shape = width, height
        
        for i in range(n):
            row = cord[i]
            
            if row[4] >= confidence:
                x1,  y1, x2, y2 = int(row[0] * x_shape), int(row[1] * y_shape), int(row[2] * x_shape),int(row[3] * y_shape)

                if self.class_to_label(labels[i]) == 'person':
                    x_center = x1 + (x2 - x1)/2
                    y_center = y1 + (y2 - y1)/2
                    
                    tlwh = np.array([x1, y1, int(x2-x1), int(y2-y1)], dtype=np.float32)
                    confidence = float(row[4].item())
                    feature = 'person'
                    
                    detections.append(([x1, y1, int(x2-x1), int(y2-y1)], row[4].item(), 'person'))
                    
        return frame, detections
                    
                    

In [20]:
cap = cv2.VideoCapture(0) # from camera
# video_path = "sample_vids/1.mp4"  
# cap = cv2.VideoCapture(video_path)

In [21]:
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720)

True

In [22]:
detector = YoloDectector(model_name=None)

Downloading: "https://github.com/ultralytics/yolov5/zipball/master" to C:\Users\Hanish/.cache\torch\hub\master.zip
YOLOv5  2024-11-3 Python-3.11.7 torch-2.3.1+cu121 CUDA:0 (NVIDIA GeForce RTX 4050 Laptop GPU, 6140MiB)

Fusing layers... 
YOLOv5s summary: 213 layers, 7225885 parameters, 0 gradients, 16.4 GFLOPs
Adding AutoShape... 


Using cuda device


In [23]:
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [24]:
## pip install deepsort for object tracking

# ! pip install deep_sort_realtime -q --upgrade

In [26]:
object_tracker = DeepSort(
                max_age=5, # allow tracker to miss up to this many frames before discarding bounding box/track
                n_init=2, # initialises 2 frames 
                nms_max_overlap=1.0,
                max_cosine_distance=0.3,
                ### Deafults settings:
                # nn_budget=None,
                # override_track_class=None,
                # embedder="mobilenet",
                # half=True,
                # bgr=True,
                # embedder_gpu= True,
                # embedder_model_name=None,
                # polygon=False,
                # today=None
                )

In [27]:
if not cap.isOpened():
    ret, img = cap.read()
    start = time.perf_counter()
    
    results = detector.score_frame(img)
    img, detections = detector.plot_boxes(results, img, height=img.shape[0], width=img.shape[1], confidence=0.5)
    
    tracks = object_tracker.update_tracks(detections, frame=img)
    for track in tracks:
        if not track.is_confirmed() or track.time_since_update > 1:
            continue
        track_id = track.track_id
        ltrb = track.to_ltrb()
        
        bbox = ltrb
        cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2)
        cv2.putText(img, "ID: " + str(track_id), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255,0,0), 2)
        
    end = time.perf_counter()
    fps = 1 / (end - start)
    
    cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
    cv2.imshow('output', img)
    
    # if cv2.waitKey(1) & 0xFF == ord('q'):
    #     break

cap.release()
cv2.destroyAllWindows()


In [None]:
cap.set(cv2.CAP_PROP_FRAME_WIDTH, 1280)
    cap.set(cv2.CAP_PROP_FRAME_HEIGHT, 720) 
    
    detector = YoloDectector(model_name=None)
    os.environ['KMP_DUPLICATE_LIB_OK']='True'
    
    object_tracker = DeepSort(
                max_age=5, # allow tracker to miss up to this many frames before discarding bounding box/track
                n_init=2, # initialises 2 frames 
                nms_max_overlap=1.0,
                max_cosine_distance=0.3,
                ### Deafults settings:
                # nn_budget=None,
                # override_track_class=None,
                # embedder="mobilenet",
                # half=True,
                # bgr=True,
                # embedder_gpu= True,
                # embedder_model_name=None,
                # polygon=False,
                # today=None
                )
    
    
    if not cap.isOpened():
        ret, img = cap.read()
        start = time.perf_counter()
        
        results = detector.score_frame(img)
        img, detections = detector.plot_boxes(results, img, height=img.shape[0], width=img.shape[1], confidence=0.5)
        
        tracks = object_tracker.update_tracks(detections, frame=img)
        for track in tracks:
            if not track.is_confirmed() or track.time_since_update > 1:
                continue
            track_id = track.track_id
            ltrb = track.to_ltrb()
            
            bbox = ltrb
            cv2.rectangle(img, (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])), (255,0,0), 2)
            cv2.putText(img, "ID: " + str(track_id), (int(bbox[0]), int(bbox[1] - 10)), cv2.FONT_HERSHEY_SIMPLEX, 0.75, (255,0,0), 2)
            
        end = time.perf_counter()
        fps = 1 / (end - start)
        
        cv2.putText(img, f'FPS: {int(fps)}', (20,70), cv2.FONT_HERSHEY_SIMPLEX, 1.5, (0,255,0), 2)
        cv2.imshow('output', img)
        
        # if cv2.waitKey(1) & 0xFF == ord('q'):
        #     break

    cap.release()
    cv2.destroyAllWindows()