<a href="https://colab.research.google.com/github/Sidhtang/CNN-classification-using-mnist-dataset/blob/main/identify_the_class_of_vehicle_and_color_and_consistent_tracking_id.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics deep_sort_realtime pytesseract

Collecting ultralytics
  Downloading ultralytics-8.3.57-py3-none-any.whl.metadata (35 kB)
Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting pytesseract
  Downloading pytesseract-0.3.13-py3-none-any.whl.metadata (11 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.57-py3-none-any.whl (905 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m905.3/905.3 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m66.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading pytesseract-0.3.13-py3-none-any.whl (14 kB)
Downloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: pytesseract, deep_sort_realtime, ultralytics-thop, ultralytics
Successful

In [None]:
# @title without tracking id
from ultralytics import YOLO
import cv2
import numpy as np
from collections import Counter
from sklearn.cluster import KMeans
from deep_sort_realtime.deepsort_tracker import DeepSort
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image

class VehicleClassifier(nn.Module):
    def __init__(self):
        super(VehicleClassifier, self).__init__()
        # Using a pre-trained ResNet backbone
        self.features = torch.hub.load('pytorch/vision:v0.10.0', 'resnet34', pretrained=True)
        num_ftrs = self.features.fc.in_features

        # Replace last layer with our custom classifier
        self.features.fc = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.Dropout(0.5),
            nn.Linear(512, len(VEHICLE_SUBTYPES))
        )

    def forward(self, x):
        return self.features(x)

# Enhanced vehicle classification constants
VEHICLE_SUBTYPES = {
    'car': [
        'sedan',
        'suv',
        'hatchback',
        'wagon',
        'coupe',
        'sports_car',
        'luxury',
        'compact'
    ],
    'truck': [
        'pickup',
        'semi',
        'delivery',
        'dump_truck',
        'box_truck'
    ],
    'bus': [
        'city_bus',
        'coach',
        'mini_bus',
        'school_bus'
    ],
    'van': [
        'passenger_van',
        'cargo_van',
        'minivan',
        'camper_van'
    ]
}

VEHICLE_ATTRIBUTES = {
    'size': ['compact', 'mid-size', 'full-size'],
    'body_style': ['2-door', '4-door', 'wagon', 'convertible'],
    'purpose': ['passenger', 'commercial', 'recreational']
}

def setup_classifier():
    """Initialize and return the vehicle classifier model"""
    model = VehicleClassifier()
    # Note: In practice, you would load pre-trained weights here
    model.eval()
    return model

def preprocess_vehicle_image(frame, box):
    """Preprocess vehicle image for classification"""
    x1, y1, x2, y2 = map(int, box)
    vehicle_img = frame[y1:y2, x1:x2]

    # Convert to PIL Image
    vehicle_img = cv2.cvtColor(vehicle_img, cv2.COLOR_BGR2RGB)
    vehicle_img = Image.fromarray(vehicle_img)

    # Apply transformations
    transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                           std=[0.229, 0.224, 0.225])
    ])

    return transform(vehicle_img).unsqueeze(0)

def estimate_vehicle_size(box):
    """Estimate vehicle size based on bounding box dimensions"""
    width = box[2] - box[0]
    height = box[3] - box[1]
    area = width * height

    if area < 10000:
        return 'compact'
    elif area < 20000:
        return 'mid-size'
    else:
        return 'full-size'

def analyze_vehicle_shape(frame, box):
    """Analyze vehicle shape characteristics"""
    x1, y1, x2, y2 = map(int, box)
    vehicle_region = frame[y1:y2, x1:x2]

    # Calculate aspect ratio
    aspect_ratio = (x2 - x1) / (y2 - y1)

    # Analyze vehicle shape using contours
    gray = cv2.cvtColor(vehicle_region, cv2.COLOR_BGR2GRAY)
    edges = cv2.Canny(gray, 50, 150)
    contours, _ = cv2.findContours(edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)

    if not contours:
        return None

    # Analyze the largest contour
    largest_contour = max(contours, key=cv2.contourArea)

    # Calculate shape features
    area = cv2.contourArea(largest_contour)
    perimeter = cv2.arcLength(largest_contour, True)
    if perimeter == 0:
        return None

    circularity = 4 * np.pi * area / (perimeter * perimeter)

    # Return shape characteristics
    return {
        'aspect_ratio': aspect_ratio,
        'circularity': circularity
    }

def classify_vehicle_details(frame, box, class_name, classifier_model):
    """Detailed vehicle classification"""
    try:
        # Preprocess image for classifier
        input_tensor = preprocess_vehicle_image(frame, box)

        # Get basic shape analysis
        shape_features = analyze_vehicle_shape(frame, box)
        if shape_features is None:
            return None

        # Estimate size
        size = estimate_vehicle_size(box)

        # Use classifier for detailed vehicle type
        with torch.no_grad():
            outputs = classifier_model(input_tensor)
            _, predicted = outputs.max(1)
            subtype_idx = predicted.item()

        # Determine body style based on shape analysis
        aspect_ratio = shape_features['aspect_ratio']
        if aspect_ratio < 1.5:
            body_style = '2-door'
        elif aspect_ratio < 2.0:
            body_style = '4-door'
        elif aspect_ratio < 2.5:
            body_style = 'wagon'
        else:
            body_style = 'convertible'

        # Determine purpose based on class and features
        if class_name in ['truck', 'van']:
            purpose = 'commercial'
        elif class_name == 'bus':
            purpose = 'passenger'
        else:
            purpose = 'recreational' if shape_features['circularity'] > 0.7 else 'passenger'

        return {
            'main_type': class_name,
            'subtype': VEHICLE_SUBTYPES[class_name][subtype_idx] if class_name in VEHICLE_SUBTYPES else 'unknown',
            'size': size,
            'body_style': body_style,
            'purpose': purpose,
            'shape_features': shape_features
        }
    except Exception as e:
        print(f"Error in vehicle classification: {e}")
        return None

def process_video(video_path, output_path, model, tracker, classifier_model, data_output_path):
    """Enhanced video processing with detailed vehicle classification"""
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    # Setup video writer
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    data = []

    while cap.isOpened():
        ret, frame = cap.read()
        if not ret:
            break

        detection_list = []
        results = model(frame)

        for box, conf, cls in zip(results[0].boxes.xyxy, results[0].boxes.conf, results[0].boxes.cls):
            x1, y1, x2, y2 = map(int, box.tolist())
            detection = ([x1, y1, x2 - x1, y2 - y1], conf.item(), int(cls.item()))
            detection_list.append(detection)

        tracks = tracker.update_tracks(detection_list, frame=frame)

        for track in tracks:
            if not track.is_confirmed():
                continue

            track_id = track.track_id
            ltwh = track.to_ltwh()
            box = [int(ltwh[0]), int(ltwh[1]),
                  int(ltwh[0] + ltwh[2]), int(ltwh[1] + ltwh[3])]

            cls_id = track.get_det_class()
            if cls_id is None:
                continue

            class_name = model.names[cls_id]

            # Only process vehicles
            if class_name in VEHICLE_SUBTYPES:
                # Get detailed classification
                vehicle_details = classify_vehicle_details(frame, box, class_name, classifier_model)

                if vehicle_details:
                    color = get_dominant_color(frame, box)

                    # Create detailed label
                    label = f"{vehicle_details['subtype']} ({vehicle_details['size']}) #{track_id}"

                    # Draw bounding box and label
                    bbox_color = COLOR_MAP.get(class_name.lower(), (200, 200, 200))
                    cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), bbox_color, 2)
                    cv2.putText(frame, label, (box[0], box[1] - 10),
                              cv2.FONT_HERSHEY_SIMPLEX, 0.5, bbox_color, 2)

                    # Store detailed data
                    data.append({
                        'Track ID': track_id,
                        'Main Type': class_name,
                        'Subtype': vehicle_details['subtype'],
                        'Size': vehicle_details['size'],
                        'Body Style': vehicle_details['body_style'],
                        'Purpose': vehicle_details['purpose'],
                        'Color': color,
                        'Confidence': track.get_det_conf() or 0.0
                    })

        out.write(frame)

    cap.release()
    out.release()

    # Store the detailed data
    with open(data_output_path, 'w', newline='') as f:
        writer = csv.DictWriter(f, fieldnames=data[0].keys() if data else [])
        if data:
            writer.writeheader()
            writer.writerows(data)

def main():
    video_path = "/content/WhatsApp Video 2025-01-03 at 01.11.52_2850e0bf (1).mp4"
    output_path = "output_video.mp4"
    data_output_path = "detailed_vehicle_data.csv"

    # Setup models
    yolo_model = setup_model()
    tracker = setup_tracker()
    classifier_model = setup_classifier()

    process_video(video_path, output_path, yolo_model, tracker, classifier_model, data_output_path)

if __name__ == "__main__":
    main()