In [10]:
!pip install deep-sort-realtime



In [2]:
# !cp -r /kaggle/input/yolov2/other/yolo_gpu/1/yolo_gpu/* /kaggle/working/

In [11]:
!cp -r /kaggle/input/sort/other/abewley_sort/2/sort/* /kaggle/working/

In [12]:
import cv2
import torch
from torchvision.models.detection import fasterrcnn_resnet50_fpn
from torchvision.transforms import functional as F
from deep_sort_realtime.deepsort_tracker import DeepSort
import os
from tqdm import tqdm
import numpy as np

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [13]:
#Detector and Tracker classes for a uniform interface
class Detector:
    def __init__(self):
        pass
    def getDetections(self, frame):
        pass
    def getDetectionsSORT(self, frame):
        pass

class Tracker:
    def __init__(self):
        pass
    def getTrackedObjects(self, detections, frame):
        pass
    def id_and_bbox(self, output):
        pass

In [14]:
# Detectors : 

class DetectorFasterRCNN(Detector):
    def __init__(self):
        self.model = fasterrcnn_resnet50_fpn(pretrained = True)
        self.model.eval()
        self.model.to(device)
        self.vehicle_classes = [2, 3, 4, 6]
        
    def getDetections(self, frame):
        frame_tensor = F.to_tensor(frame).unsqueeze(0).to(device)
        with torch.no_grad():
            outputs = self.model(frame_tensor)
        boxes = outputs[0]['boxes'].cpu().numpy()
        scores = outputs[0]['scores'].cpu().numpy()
        labels = outputs[0]['labels'].cpu().numpy()
        detections = [(box, score, label) for box, score, label in zip(boxes, scores, labels) if score > 0.8 and label in self.vehicle_classes]
        return detections
    
    def getDetectionsSort(self, frame):
        frame_tensor = F.to_tensor(frame).unsqueeze(0).to(device)
        with torch.no_grad():
            outputs = self.model(frame_tensor)
        boxes = outputs[0]['boxes'].cpu().numpy()
        scores = outputs[0]['scores'].cpu().numpy()
        labels = outputs[0]['labels'].cpu().numpy()
        detections = [np.array([box[0], box[1], box[2], box[3], score]) for box, score, label in zip(boxes, scores, labels) if score > 0.8 and label in self.vehicle_classes]
        return np.array(detections)
    
detectorFasterRCNN = DetectorFasterRCNN()

In [15]:
from sort import Sort

In [19]:
# Trackers
class TrackerSORT(Tracker):
    def __init__(self):
        self.tracker = Sort()
    
    def getTrackedObjects(self, detections, frame):
        return self.tracker.update(detections)
    
    def id_and_bbox(self, obj):
        return obj[4], obj[:4].astype("int")
    
class TrackerDeepSORT(Tracker):
    def __init__(self):
        self.tracker = DeepSort()
    
    def getTrackedObjects(self, detections, frame):
        return self.tracker.update_tracks(detections, frame = frame)
    
    def id_and_bbox(self, obj):
        track_id = obj.track_id
        bbox = obj.to_ltrb().astype("int")
        return track_id, bbox
    
trackerSORT = TrackerSORT()
trackerDeepSORT = TrackerDeepSORT()

In [26]:
class VehicleTracker:
    def __init__(self, detector, tracker):
        self.detector = detector
        self.tracker = tracker
        
    def writeVideo(self, frames, output_file='output.mp4', fps=30):
        height, width, _ = frames[0].shape
        fourcc = cv2.VideoWriter_fourcc(*'mp4v')
        out = cv2.VideoWriter(output_file, fourcc, fps, (width, height))
        for frame in frames:
            out.write(frame)
        out.release()
        
    def putText(self, frame, text, top_left, bottom_right):
        cv2.rectangle(frame, top_left, bottom_right, (0, 0, 0), thickness=cv2.FILLED)
        cv2.putText(frame, text, (top_left[0] + 15, top_left[1] + 35), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)

    def getVideo(self, videofile, outputfile):
        # The following x-coordinate describes the line, the intersection with which determines our vehicle count
        xline = 500
        # This set contains the ids of the tracked vehicles which intersected with our vertical line, vehicle_count should be the size of this set
        intersectedIds = set()
        
        cap = cv2.VideoCapture(videofile)
        video_frames = []
        framect = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
        
        print(f"Processing {framect} frames")
        
        for i in tqdm(range(framect)):
            ret, frame = cap.read()
            if not ret:
                break
                
            if isinstance(self.tracker, TrackerSORT):
                detections = self.detector.getDetectionsSort(frame)
            else:
                detections = self.detector.getDetections(frame) 
            tracked_objects = self.tracker.getTrackedObjects(detections, frame)

            video_frame = frame.copy()
            linecolor = (252, 227, 3)
            for obj in tracked_objects : 
                track_id, bbox = self.tracker.id_and_bbox(obj)
                cv2.rectangle(video_frame, (bbox[0], bbox[1]), (bbox[2], bbox[3]), (int(track_id), 255, int(track_id)), 2)
                cv2.putText(video_frame, str(track_id), (bbox[0], bbox[1] - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0), 2)
                
                if bbox[0] <= xline and xline <= bbox[2]:
                    if track_id not in intersectedIds:
                        intersectedIds.add(track_id)
                        linecolor = (255, 255, 255)
                    
            # Drawing the line and adding the count
            h, w = video_frame.shape[:2]
            cv2.line(video_frame, (xline, 0), (xline, h - 1), linecolor, 2)
            self.putText(video_frame, f'Count: {len(intersectedIds)}', (w - 200, 50), (w - 45, 100))
            
            video_frames.append(video_frame)

        cap.release()
        self.writeVideo(video_frames, output_file = outputfile)
       

In [27]:
vehicleTracker = VehicleTracker(detectorFasterRCNN, trackerSORT)
vehicleTracker.getVideo('/kaggle/input/intersectiondata01/vid2.mp4', "output2.mp4")

Processing 1775 frames


100%|██████████| 1775/1775 [04:45<00:00,  6.21it/s]


In [None]:
vehicleTracker = VehicleTracker(detectorFasterRCNN, trackerDeepSORT)  
vehicleTracker.getVideo('/kaggle/input/intersectiondata01/vid2.mp4', "output1.mp4")