<a href="https://colab.research.google.com/github/Sidhtang/CNN-classification-using-mnist-dataset/blob/main/last_and_final_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install ultralytics scikit-learn deep_sort_realtime opencv-python-headless


Collecting ultralytics
  Downloading ultralytics-8.3.57-py3-none-any.whl.metadata (35 kB)
Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.57-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m27.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m54.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: deep_sort_realtime, ultralytics-thop, ultralytics
Successfully installed deep_sort_realtime-1.3.2 ultralytics-8.3.57 ultralytics-thop-2.0.13


In [3]:
from ultralytics import YOLO
import cv2
import numpy as np
from collections import Counter
from sklearn.cluster import KMeans
from deep_sort_realtime.deepsort_tracker import DeepSort

# Constants
COLOR_MAP = {
    'car': (0, 255, 0),
    'truck': (255, 0, 0),
    'bus': (0, 0, 255),
    'motorcycle': (255, 255, 0),
    'bicycle': (255, 0, 255),
    'van': (0, 255, 255),
    'person': (255, 165, 0)
}

def setup_model():
    model = YOLO('yolov8x.pt')
    return model

def setup_tracker():
    return DeepSort(max_age=30, nn_budget=70, max_iou_distance=0.7)

def classify_color(rgb):
    bgr = rgb[::-1]  # Convert RGB to BGR
    hsv = cv2.cvtColor(np.uint8([[bgr]]), cv2.COLOR_BGR2HSV)[0][0]
    h, s, v = hsv

    if s < 50:  # Low saturation
        if v < 60:
            return "black"
        elif v > 200:
            return "white"
        else:
            return "gray"

    # Hue ranges for basic colors
    if 0 <= h < 10 or 160 <= h <= 180:
        return "red"
    elif 10 <= h < 25:
        return "orange"
    elif 25 <= h < 35:
        return "yellow"
    elif 35 <= h < 85:
        return "green"
    elif 85 <= h < 130:
        return "blue"
    elif 130 <= h < 160:
        return "purple"
    return "unknown"

def get_dominant_color(img, box):
    x1, y1, x2, y2 = map(int, box)
    region = img[y1:y2, x1:x2]
    pixels = region.reshape(-1, 3)
    if len(pixels) < 3:
        return "unknown"
    kmeans = KMeans(n_clusters=3, n_init=10)
    kmeans.fit(pixels)
    colors = kmeans.cluster_centers_
    labels = kmeans.labels_
    counts = Counter(labels)
    dominant_color = colors[counts.most_common(1)[0][0]]
    return classify_color(dominant_color)

def process_video(video_path, output_path, model, tracker):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detection_list = []
        results = model(frame)  # Get detections
        for box, conf, cls in zip(results[0].boxes.xyxy, results[0].boxes.conf, results[0].boxes.cls):
            x1, y1, x2, y2 = map(int, box.tolist())
            detection = ([x1, y1, x2 - x1, y2 - y1], conf.item(), int(cls.item()))
            detection_list.append(detection)

        tracks = tracker.update_tracks(detection_list, frame=frame)

        for track in tracks:
            if not track.is_confirmed():
                continue

            try:
                track_id = track.track_id
                ltwh = track.to_ltwh()
                x1, y1 = int(ltwh[0]), int(ltwh[1])
                x2, y2 = int(ltwh[0] + ltwh[2]), int(ltwh[1] + ltwh[3])

                cls_id = track.get_det_class()
                if cls_id is None:
                    continue

                class_name = model.names[cls_id]
                conf = track.get_det_conf() or 0.0

                color = get_dominant_color(frame, [x1, y1, x2, y2]) if class_name != 'person' else 'N/A'
                label = f'{class_name} #{track_id} ({color}): {conf:.2f}'

                bbox_color = COLOR_MAP.get(class_name.lower(), (200, 200, 200))
                cv2.rectangle(frame, (x1, y1), (x2, y2), bbox_color, 3)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, bbox_color, 2)
            except Exception as e:
                print(f"Error processing track {track.track_id}: {e}")

        out.write(frame)

    cap.release()
    out.release()
    print(f"Processed video saved to {output_path}")

def main():
    video_path = "/content/WhatsApp Video 2025-01-03 at 01.11.52_2850e0bf (1).mp4"
    output_path = "output_video.mp4"

    model = setup_model()
    tracker = setup_tracker()
    process_video(video_path, output_path, model, tracker)

if __name__ == "__main__":
    main()


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


100%|██████████| 131M/131M [00:00<00:00, 387MB/s]



0: 384x640 3 persons, 11 cars, 1 motorcycle, 67.6ms
Speed: 7.4ms preprocess, 67.6ms inference, 972.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 motorcycle, 1 bus, 53.7ms
Speed: 3.2ms preprocess, 53.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 motorcycle, 3 buss, 1 truck, 53.8ms
Speed: 4.2ms preprocess, 53.8ms inference, 1.9ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 1 motorcycle, 1 bus, 63.1ms
Speed: 5.4ms preprocess, 63.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 9 cars, 1 motorcycle, 3 buss, 1 truck, 66.3ms
Speed: 3.1ms preprocess, 66.3ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 9 cars, 1 motorcycle, 2 buss, 1 truck, 63.1ms
Speed: 8.5ms preprocess, 63.1ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 mo

  return fit_method(estimator, *args, **kwargs)



0: 384x640 2 persons, 12 cars, 1 bus, 61.8ms
Speed: 4.7ms preprocess, 61.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 12 cars, 1 bus, 61.8ms
Speed: 5.9ms preprocess, 61.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 13 cars, 1 bus, 61.7ms
Speed: 5.2ms preprocess, 61.7ms inference, 2.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 14 cars, 1 bus, 62.8ms
Speed: 4.1ms preprocess, 62.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 12 cars, 1 bus, 61.7ms
Speed: 3.7ms preprocess, 61.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 12 cars, 1 bus, 61.7ms
Speed: 7.3ms preprocess, 61.7ms inference, 1.4ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 2 persons, 13 cars, 1 bus, 61.7ms
Speed: 3.7ms preprocess, 61.7ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
from collections import Counter
from sklearn.cluster import KMeans
from deep_sort_realtime.deepsort_tracker import DeepSort

# Constants
COLOR_MAP = {
    'car': (0, 255, 0),
    'truck': (255, 0, 0),
    'bus': (0, 0, 255),
    'motorcycle': (255, 255, 0),
    'bicycle': (255, 0, 255),
    'van': (0, 255, 255),
    'person': (255, 165, 0)
}

def setup_model():
    model = YOLO('yolov8x.pt')
    return model

def setup_tracker():
    return DeepSort(max_age=30, nn_budget=70, max_iou_distance=0.7)

def classify_color(rgb):
    bgr = rgb[::-1]  # Convert RGB to BGR
    hsv = cv2.cvtColor(np.uint8([[bgr]]), cv2.COLOR_BGR2HSV)[0][0]
    h, s, v = hsv

    if s < 50:  # Low saturation
        if v < 60:
            return "black"
        elif v > 200:
            return "white"
        else:
            return "gray"

    # Hue ranges for basic colors
    if 0 <= h < 10 or 160 <= h <= 180:
        return "red"
    elif 10 <= h < 25:
        return "orange"
    elif 25 <= h < 35:
        return "yellow"
    elif 35 <= h < 85:
        return "green"
    elif 85 <= h < 130:
        return "blue"
    elif 130 <= h < 160:
        return "purple"
    return "unknown"

def get_dominant_color(img, box):
    x1, y1, x2, y2 = map(int, box)
    region = img[y1:y2, x1:x2]
    pixels = region.reshape(-1, 3)
    if len(pixels) < 3:
        return "unknown"
    kmeans = KMeans(n_clusters=3, n_init=10)
    kmeans.fit(pixels)
    colors = kmeans.cluster_centers_
    labels = kmeans.labels_
    counts = Counter(labels)
    dominant_color = colors[counts.most_common(1)[0][0]]
    return classify_color(dominant_color)

def process_video(video_path, output_path, model, tracker):
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frame_width, frame_height))

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detection_list = []
        results = model(frame)  # Get detections
        for box, conf, cls in zip(results[0].boxes.xyxy, results[0].boxes.conf, results[0].boxes.cls):
            x1, y1, x2, y2 = map(int, box.tolist())
            detection = ([x1, y1, x2 - x1, y2 - y1], conf.item(), int(cls.item()))
            detection_list.append(detection)

        tracks = tracker.update_tracks(detection_list, frame=frame)

        for track in tracks:
            if not track.is_confirmed():
                continue

            try:
                track_id = track.track_id
                ltwh = track.to_ltwh()
                x1, y1 = int(ltwh[0]), int(ltwh[1])
                x2, y2 = int(ltwh[0] + ltwh[2]), int(ltwh[1] + ltwh[3])

                cls_id = track.get_det_class()
                if cls_id is None:
                    continue

                class_name = model.names[cls_id]
                conf = track.get_det_conf() or 0.0

                color = get_dominant_color(frame, [x1, y1, x2, y2]) if class_name != 'person' else 'N/A'
                label = f'{class_name} #{track_id} ({color}): {conf:.2f}'

                bbox_color = COLOR_MAP.get(class_name.lower(), (200, 200, 200))
                cv2.rectangle(frame, (x1, y1), (x2, y2), bbox_color, 3)
                cv2.putText(frame, label, (x1, y1 - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, bbox_color, 2)
            except Exception as e:
                print(f"Error processing track {track.track_id}: {e}")

        out.write(frame)

    cap.release()
    out.release()
    print(f"Processed video saved to {output_path}")

def main():
    video_path = "/content/1734540857073 (1).mp4"
    output_path = "result_video.mp4"

    model = setup_model()
    tracker = setup_tracker()
    process_video(video_path, output_path, model, tracker)

if __name__ == "__main__":
    main()



0: 384x640 4 persons, 8 cars, 1 traffic light, 62.6ms
Speed: 3.2ms preprocess, 62.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 1 traffic light, 43.0ms
Speed: 3.0ms preprocess, 43.0ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 8 cars, 1 traffic light, 43.3ms
Speed: 3.0ms preprocess, 43.3ms inference, 1.5ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 6 persons, 7 cars, 1 traffic light, 61.8ms
Speed: 7.0ms preprocess, 61.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 persons, 7 cars, 1 traffic light, 61.8ms
Speed: 6.3ms preprocess, 61.8ms inference, 1.8ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 7 cars, 1 traffic light, 61.8ms
Speed: 7.2ms preprocess, 61.8ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 3 persons, 7 cars, 1 traffic light, 61.7ms
Speed: 3.5ms preprocess, 61.7ms i

  return fit_method(estimator, *args, **kwargs)



0: 384x640 5 cars, 1 airplane, 1 traffic light, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 train, 1 traffic light, 61.7ms
Speed: 3.2ms preprocess, 61.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 traffic light, 61.7ms
Speed: 5.0ms preprocess, 61.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 traffic light, 62.2ms
Speed: 3.0ms preprocess, 62.2ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 traffic light, 61.8ms
Speed: 7.9ms preprocess, 61.8ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 train, 1 traffic light, 61.7ms
Speed: 3.1ms preprocess, 61.7ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 5 cars, 1 bus, 1 traffic light, 61.7ms
Speed: 9.5ms preprocess, 61.7ms inference, 1.7ms postprocess per image at