<a href="https://colab.research.google.com/github/Sidhtang/vehicle-detection-/blob/main/efficient_b2_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install ultralytics colorthief deep_sort_realtime

Collecting ultralytics
  Downloading ultralytics-8.3.59-py3-none-any.whl.metadata (35 kB)
Collecting colorthief
  Downloading colorthief-0.2.1-py2.py3-none-any.whl.metadata (816 bytes)
Collecting deep_sort_realtime
  Downloading deep_sort_realtime-1.3.2-py3-none-any.whl.metadata (12 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.13-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.59-py3-none-any.whl (906 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m906.8/906.8 kB[0m [31m39.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading colorthief-0.2.1-py2.py3-none-any.whl (6.1 kB)
Downloading deep_sort_realtime-1.3.2-py3-none-any.whl (8.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.4/8.4 MB[0m [31m72.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.13-py3-none-any.whl (26 kB)
Installing collected packages: colorthief, deep_sort_realtime, ultralytics-thop, ultralytics
Su

In [None]:
!pip install torchvision --upgrade



In [None]:
from ultralytics import YOLO
import cv2
import numpy as np
from collections import Counter
from sklearn.cluster import KMeans
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torchvision.models import  resnet152
from PIL import Image
import csv
from colorthief import ColorThief
import io
import albumentations as A
from deep_sort_realtime.deepsort_tracker import DeepSort
import warnings
warnings.filterwarnings('ignore')

# Set up GPU device with mixed precision training
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if device == 'cuda':
    torch.backends.cudnn.benchmark = True
    scaler = torch.cuda.amp.GradScaler()

print(f"Using device: {device}")
from torchvision.models import efficientnet_b7
import torch.nn as nn
import torch
import torchvision.transforms as transforms

class EnhancedVehicleClassifier(nn.Module):
    def __init__(self):
        super(EnhancedVehicleClassifier, self).__init__()
        # Using EfficientNet-B7 as a powerful alternative
        # B7 is the largest variant of the original EfficientNet family
        self.features = efficientnet_b7(pretrained=True)
        # Get the number of features from B7 model
        num_ftrs = self.features.classifier[1].in_features

        # Enhanced classifier head optimized for B7 architecture
        self.features.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Dropout(0.45),  # Higher dropout for regularization
            nn.Linear(num_ftrs, 2560),  # Large intermediate layer
            nn.ReLU(),
            nn.BatchNorm1d(2560),
            nn.Dropout(0.45),
            nn.Linear(2560, 1280),
            nn.ReLU(),
            nn.BatchNorm1d(1280),
            nn.Dropout(0.35),
            nn.Linear(1280, len(VEHICLE_SUBTYPES))
        )

        # Initialize weights using He initialization
        for m in self.features.classifier.modules():
            if isinstance(m, nn.Linear):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
                if m.bias is not None:
                    nn.init.constant_(m.bias, 0)

    def forward(self, x):
        with torch.cuda.amp.autocast():
            return self.features(x)

def get_enhanced_transforms():
    """
    Create enhanced image preprocessing pipeline optimized for EfficientNet-B7
    EfficientNet-B7 expects input size of 600x600 for optimal performance
    """
    return transforms.Compose([
        transforms.Resize((600, 600)),  # B7's recommended input size
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

class EnhancedColorClassifier(nn.Module):
    def __init__(self):
        super(EnhancedColorClassifier, self).__init__()
        # Using ResNet152 for better color feature extraction
        self.features = resnet152(pretrained=True)
        num_ftrs = self.features.fc.in_features

        # Enhanced classifier head with attention mechanism
        self.attention = nn.Sequential(
            nn.Linear(num_ftrs, 512),
            nn.Tanh(),
            nn.Linear(512, 1)
        )

        self.classifier = nn.Sequential(
            nn.BatchNorm1d(num_ftrs),
            nn.Dropout(0.3),
            nn.Linear(num_ftrs, 512),
            nn.ReLU(),
            nn.BatchNorm1d(512),
            nn.Dropout(0.3),
            nn.Linear(512, len(COLOR_CLASSES))
        )

    def forward(self, x):
        with torch.cuda.amp.autocast():
            features = self.features.conv1(x)
            features = self.features.bn1(features)
            features = self.features.relu(features)
            features = self.features.maxpool(features)

            features = self.features.layer1(features)
            features = self.features.layer2(features)
            features = self.features.layer3(features)
            features = self.features.layer4(features)

            features = self.features.avgpool(features)
            features = torch.flatten(features, 1)

            # Apply attention mechanism
            attention_weights = self.attention(features)
            attention_weights = torch.sigmoid(attention_weights)
            attended_features = features * attention_weights

            return self.classifier(attended_features)

# Enhanced constants with more detailed categories
VEHICLE_SUBTYPES = {
    'car': ['sedan', 'suv', 'hatchback', 'wagon', 'coupe', 'sports_car', 'luxury', 'compact',
            'convertible', 'crossover', 'electric_vehicle'],
    'truck': ['pickup', 'semi', 'delivery', 'dump_truck', 'box_truck', 'flatbed', 'tanker',
              'concrete_mixer', 'car_carrier'],
    'bus': ['city_bus', 'coach', 'mini_bus', 'school_bus', 'articulated_bus', 'double_decker',
            'shuttle_bus'],
    'van': ['passenger_van', 'cargo_van', 'minivan', 'camper_van', 'panel_van', 'refrigerated_van',
            'step_van'],
    'two_wheeler': ['motorcycle', 'scooter', 'bicycle', 'electric_bike', 'moped', 'sport_bike',
                   'cruiser', 'touring_bike']
}

COLOR_CLASSES = [
    'black', 'white', 'gray', 'silver', 'red', 'blue', 'green', 'yellow',
    'brown', 'orange', 'purple', 'gold', 'beige', 'burgundy', 'navy',
    'teal', 'bronze', 'copper', 'champagne'
]

def get_enhanced_transforms():
    """
    Create enhanced image preprocessing pipeline with augmentations
    """
    return transforms.Compose([
        transforms.Resize((384, 384)),  # Larger input size for better detail capture
        transforms.RandomHorizontalFlip(p=0.3),
        transforms.RandomAffine(degrees=10, translate=(0.1, 0.1), scale=(0.9, 1.1)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

def classify_vehicle_details(frame, box, class_name, vehicle_classifier, confidence_threshold=0.7):
    """
    Enhanced vehicle classification with confidence thresholding and error handling
    """
    try:
        x1, y1, x2, y2 = map(int, box)
        vehicle_img = frame[y1:y2, x1:x2]

        # Check if image patch is valid
        if vehicle_img.size == 0 or vehicle_img.shape[0] == 0 or vehicle_img.shape[1] == 0:
            return None

        # Convert to PIL and apply enhanced preprocessing
        img_pil = Image.fromarray(cv2.cvtColor(vehicle_img, cv2.COLOR_BGR2RGB))
        transform = get_enhanced_transforms()
        input_tensor = transform(img_pil).unsqueeze(0).to(device)

        with torch.no_grad():
            with torch.cuda.amp.autocast():
                outputs = vehicle_classifier(input_tensor)
                probabilities = torch.softmax(outputs, dim=1)
                conf, predicted = probabilities.max(1)

                # Only classify if confidence exceeds threshold
                if conf.item() < confidence_threshold:
                    return None

        subtype = list(VEHICLE_SUBTYPES[class_name])[predicted.item() % len(VEHICLE_SUBTYPES[class_name])]

        # Enhanced size classification using relative dimensions
        area = (x2 - x1) * (y2 - y1)
        frame_area = frame.shape[0] * frame.shape[1]
        area_ratio = area / frame_area

        size = 'small' if area_ratio < 0.05 else \
               'medium' if area_ratio < 0.15 else 'large'

        # Enhanced body style and purpose classification
        body_style = 'commercial' if subtype in ['semi', 'delivery', 'dump_truck', 'box_truck', 'tanker',
                                               'concrete_mixer', 'car_carrier', 'refrigerated_van'] else \
                    'passenger' if subtype in ['city_bus', 'school_bus', 'coach', 'shuttle_bus'] else 'standard'

        purpose = 'commercial' if body_style == 'commercial' else \
                 'public' if body_style == 'passenger' else 'personal'

        return {
            'subtype': subtype,
            'size': size,
            'body_style': body_style,
            'purpose': purpose,
            'confidence': conf.item()
        }
    except Exception as e:
        print(f"Vehicle classification error: {e}")
        return None


def setup_models():
    """
    Initialize enhanced models with EfficientNet-B7
    """
    # Initialize YOLOv8x with enhanced parameters
    yolo_model = YOLO('yolov8x.pt')
    yolo_model.conf = 0.35
    yolo_model.iou = 0.65
    yolo_model.to(device)

    # Initialize enhanced classifier with B7 model
    vehicle_classifier = EnhancedVehicleClassifier().to(device)
    vehicle_classifier.eval()

    color_classifier = EnhancedColorClassifier().to(device)
    color_classifier.eval()

    # Initialize DeepSort with optimized parameters
    tracker = DeepSort(
        max_age=45,
        n_init=4,
        nms_max_overlap=0.85,
        max_cosine_distance=0.25,
        nn_budget=150,
    )

    return {
        'yolo': yolo_model,
        'vehicle_classifier': vehicle_classifier,
        'color_classifier': color_classifier,
        'tracker': tracker,
        'device': device
    }
def detect_color(frame, box, color_classifier, confidence_threshold=0.6):
    """
    Enhanced color detection with multi-method approach and confidence thresholding
    """
    try:
        x1, y1, x2, y2 = map(int, box)
        vehicle_img = frame[y1:y2, x1:x2]

        if vehicle_img.size == 0:
            return None

        # Convert to PIL Image
        img_pil = Image.fromarray(cv2.cvtColor(vehicle_img, cv2.COLOR_BGR2RGB))

        # Enhanced color detection using multiple methods
        results = {'colors': [], 'confidences': []}

        # Method 1: ML Classification
        transform = get_enhanced_transforms()
        input_tensor = transform(img_pil).unsqueeze(0).to(device)

        with torch.no_grad():
            with torch.cuda.amp.autocast():
                outputs = color_classifier(input_tensor)
                probabilities = torch.softmax(outputs, dim=1)
                conf, predicted = probabilities.max(1)

                if conf.item() >= confidence_threshold:
                    results['colors'].append(COLOR_CLASSES[predicted.item()])
                    results['confidences'].append(conf.item())

        # Method 2: ColorThief analysis
        img_byte_arr = io.BytesIO()
        img_pil.save(img_byte_arr, format='PNG')
        img_byte_arr.seek(0)

        color_thief = ColorThief(img_byte_arr)
        dominant_color = color_thief.get_color(quality=1)
        palette = color_thief.get_palette(color_count=3, quality=1)

        # Convert RGB values to color names using nearest neighbor
        for rgb in [dominant_color] + palette:
            color_name = get_closest_color(rgb)
            if color_name not in results['colors']:
                results['colors'].append(color_name)
                results['confidences'].append(0.8)  # Default confidence for color matching

        # Return most confident color
        if results['colors']:
            max_conf_idx = np.argmax(results['confidences'])
            return {
                'color': results['colors'][max_conf_idx],
                'confidence': results['confidences'][max_conf_idx],
                'secondary_colors': results['colors'][:3]  # Top 3 detected colors
            }
        return None

    except Exception as e:
        print(f"Color detection error: {e}")
        return None

def get_closest_color(rgb):
    """
    Convert RGB value to the closest named color using an extended color mapping.
    """
    color_map = {
        'black': (0, 0, 0),
        'white': (255, 255, 255),
        'gray': (128, 128, 128),
        'silver': (192, 192, 192),
        'red': (255, 0, 0),
        'blue': (0, 0, 255),
        'green': (0, 255, 0),
        'yellow': (255, 255, 0),
        'brown': (165, 42, 42),
        'orange': (255, 165, 0),
        'purple': (128, 0, 128),
        'gold': (255, 215, 0),
        'beige': (245, 245, 220),
        'burgundy': (128, 0, 32),
        'navy': (0, 0, 128),
        'teal': (0, 128, 128),
        'bronze': (205, 127, 50),
        'copper': (184, 115, 51),
        'champagne': (247, 231, 206),
        'pink': (255, 192, 203),
        'cyan': (0, 255, 255),
        'lime': (191, 255, 0),
        'magenta': (255, 0, 255),
        'olive': (128, 128, 0),
        'peach': (255, 229, 180),
    }

    min_distance = float('inf')
    closest_color = None

    for color_name, color_rgb in color_map.items():
        distance = sum((a - b) ** 2 for a, b in zip(rgb, color_rgb))
        if distance < min_distance:
            min_distance = distance
            closest_color = color_name

    return closest_color


    for color_name, color_rgb in color_map.items():
        distance = sum((a - b) ** 2 for a, b in zip(rgb, color_rgb))
        if distance < min_distance:
            min_distance = distance
            closest_color = color_name

    return closest_color

def process_video(video_path, output_path, models, data_output_path):
    """
    Enhanced video processing with multi-frame analysis and temporal smoothing
    """
    cap = cv2.VideoCapture(video_path)
    if not cap.isOpened():
        raise ValueError(f"Could not open video: {video_path}")

    # Video writer setup
    frame_width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
    frame_height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
    fps = int(cap.get(cv2.CAP_PROP_FPS))
    out = cv2.VideoWriter(output_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (frame_width, frame_height))

    data = []
    frame_count = 0
    temporal_buffer = {}  # Store recent detections for temporal smoothing

    try:
        while cap.isOpened():
            ret, frame = cap.read()
            if not ret:
                break

            frame_count += 1

            # Process every frame for maximum accuracy
            detection_list = []

            # Run YOLO detection with test-time augmentation
            results = models['yolo'](frame, augment=True)  # Enable test-time augmentation

            # Process YOLO detections
            for box, conf, cls in zip(results[0].boxes.xyxy, results[0].boxes.conf, results[0].boxes.cls):
                x1, y1, x2, y2 = map(int, box.tolist())
                detection = ([x1, y1, x2 - x1, y2 - y1], conf.item(), int(cls.item()))
                detection_list.append(detection)

            # Update tracks with enhanced DeepSort
            tracks = models['tracker'].update_tracks(detection_list, frame=frame)

            # Process each track with temporal smoothing
            for track in tracks:
                if not track.is_confirmed():
                    continue

                track_id = track.track_id
                ltwh = track
                to_ltwh = track.to_ltwh()
                box = [int(to_ltwh[0]), int(to_ltwh[1]),
                      int(to_ltwh[0] + to_ltwh[2]), int(to_ltwh[1] + to_ltwh[3])]

                cls_id = track.get_det_class()
                if cls_id is None:
                    continue

                class_name = models['yolo'].names[cls_id]

                if class_name in VEHICLE_SUBTYPES:
                    # Initialize temporal buffer for this track if needed
                    if track_id not in temporal_buffer:
                        temporal_buffer[track_id] = {
                            'vehicle_details': [],
                            'color_info': [],
                            'frame_history': []
                        }

                    # Get vehicle details
                    vehicle_details = classify_vehicle_details(
                        frame, box, class_name,
                        models['vehicle_classifier']
                    )

                    # Get color information
                    color_info = detect_color(
                        frame, box,
                        models['color_classifier']
                    )

                    # Update temporal buffer
                    if vehicle_details and color_info:
                        temporal_buffer[track_id]['vehicle_details'].append(vehicle_details)
                        temporal_buffer[track_id]['color_info'].append(color_info)
                        temporal_buffer[track_id]['frame_history'].append(frame_count)

                        # Keep only recent history (last 5 frames)
                        max_history = 5
                        if len(temporal_buffer[track_id]['vehicle_details']) > max_history:
                            temporal_buffer[track_id]['vehicle_details'] = temporal_buffer[track_id]['vehicle_details'][-max_history:]
                            temporal_buffer[track_id]['color_info'] = temporal_buffer[track_id]['color_info'][-max_history:]
                            temporal_buffer[track_id]['frame_history'] = temporal_buffer[track_id]['frame_history'][-max_history:]

                        # Get smoothed predictions using temporal buffer
                        smoothed_details = get_smoothed_predictions(temporal_buffer[track_id])

                        # Create detailed label with confidence scores
                        label_parts = [
                            f"{smoothed_details['subtype']} ({smoothed_details['size']})",
                            f"#{track_id}",
                            f"{smoothed_details['color']}",
                            f"{smoothed_details['confidence']:.2f}"
                        ]
                        label = " ".join(label_parts)

                        # Calculate dynamic color based on confidence
                        confidence = smoothed_details['confidence']
                        bbox_color = (
                            int(255 * (1 - confidence)),  # Red component
                            int(255 * confidence),        # Green component
                            0                            # Blue component
                        )

                        # Enhanced visualization
                        cv2.rectangle(frame, (box[0], box[1]), (box[2], box[3]), bbox_color, 2)

                        # Add background to text for better visibility
                        text_size = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 2)[0]
                        cv2.rectangle(frame,
                                    (box[0], box[1] - 25),
                                    (box[0] + text_size[0], box[1]),
                                    bbox_color, -1)
                        cv2.putText(frame, label, (box[0], box[1] - 10),
                                  cv2.FONT_HERSHEY_SIMPLEX, 0.5, (255, 255, 255), 2)

                        # Store detection data with enhanced information
                        data.append({
                            'Frame': frame_count,
                            'Track_ID': track_id,
                            'Main_Type': class_name,
                            'Subtype': smoothed_details['subtype'],
                            'Size': smoothed_details['size'],
                            'Body_Style': smoothed_details['body_style'],
                            'Purpose': smoothed_details['purpose'],
                            'Primary_Color': smoothed_details['color'],
                            'Secondary_Colors': smoothed_details['secondary_colors'],
                            'Detection_Confidence': track.get_det_conf() or 0.0,
                            'Classification_Confidence': smoothed_details['confidence'],
                            'Box_Coordinates': box,
                            'Temporal_Confidence': len(temporal_buffer[track_id]['vehicle_details']) / max_history
                        })

            # Clean up old tracks from temporal buffer
            current_track_ids = {track.track_id for track in tracks if track.is_confirmed()}
            temporal_buffer = {k: v for k, v in temporal_buffer.items() if k in current_track_ids}

            # Add frame counter and processing statistics
            cv2.putText(frame, f"Frame: {frame_count}", (10, 30),
                       cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)

            out.write(frame)

    finally:
        # Release resources
        cap.release()
        out.release()

        # Save detection data with enhanced error handling
        if data:
            try:
                with open(data_output_path, 'w', newline='') as f:
                    writer = csv.DictWriter(f, fieldnames=data[0].keys())
                    writer.writeheader()
                    writer.writerows(data)
                print(f"Successfully saved detection data to {data_output_path}")
            except Exception as e:
                print(f"Error saving detection data: {e}")
                # Attempt to save to alternative location
                backup_path = "backup_" + data_output_path
                with open(backup_path, 'w', newline='') as f:
                    writer = csv.DictWriter(f, fieldnames=data[0].keys())
                    writer.writeheader()
                    writer.writerows(data)
                print(f"Saved backup detection data to {backup_path}")

def get_smoothed_predictions(track_buffer):
    """
    Calculate smoothed predictions using temporal buffer data
    """
    # Get recent vehicle details
    recent_details = track_buffer['vehicle_details']
    recent_colors = track_buffer['color_info']

    if not recent_details or not recent_colors:
        return None

    # Count occurrences of each prediction
    subtype_counts = Counter(d['subtype'] for d in recent_details)
    size_counts = Counter(d['size'] for d in recent_details)
    body_style_counts = Counter(d['body_style'] for d in recent_details)
    purpose_counts = Counter(d['purpose'] for d in recent_details)
    color_counts = Counter(c['color'] for c in recent_colors)

    # Calculate average confidence
    avg_confidence = np.mean([d['confidence'] for d in recent_details])

    # Get secondary colors from recent detections
    all_secondary_colors = []
    for color_info in recent_colors:
        if 'secondary_colors' in color_info:
            all_secondary_colors.extend(color_info['secondary_colors'])
    secondary_colors = [color for color, count in Counter(all_secondary_colors).most_common(3)]

    # Return smoothed predictions
    return {
        'subtype': subtype_counts.most_common(1)[0][0],
        'size': size_counts.most_common(1)[0][0],
        'body_style': body_style_counts.most_common(1)[0][0],
        'purpose': purpose_counts.most_common(1)[0][0],
        'color': color_counts.most_common(1)[0][0],
        'secondary_colors': secondary_colors,
        'confidence': avg_confidence
    }

def main():
    """
    Main function with enhanced error handling and logging
    """
    try:
        # Define paths
        video_path = "/content/WhatsApp Video 2025-01-03 at 01.11.52_2850e0bf (1).mp4"
        output_path = "output_video.mp4"
        data_output_path = "vehicle_tracking_data.csv"

        print("Initializing models...")
        models = setup_models()
        print("Models initialized successfully")

        print("Starting video processing...")
        process_video(video_path, output_path, models, data_output_path)
        print("Video processing completed successfully")

    except Exception as e:
        print(f"Error in main execution: {e}")
        raise

if __name__ == "__main__":
    main()

Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
Using device: cuda
Initializing models...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8x.pt to 'yolov8x.pt'...


  check_for_updates()
100%|██████████| 131M/131M [00:00<00:00, 235MB/s]
Downloading: "https://download.pytorch.org/models/efficientnet_b7_lukemelas-c5b4e57e.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b7_lukemelas-c5b4e57e.pth
100%|██████████| 255M/255M [00:02<00:00, 130MB/s]
Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:01<00:00, 161MB/s]


Models initialized successfully
Starting video processing...

0: 384x640 4 persons, 13 cars, 1 motorcycle, 1 truck, 7630.1ms
Speed: 10.2ms preprocess, 7630.1ms inference, 608.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 motorcycle, 2 buss, 87.4ms
Speed: 5.9ms preprocess, 87.4ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 motorcycle, 3 buss, 1 truck, 85.7ms
Speed: 4.3ms preprocess, 85.7ms inference, 1.7ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 10 cars, 1 motorcycle, 2 buss, 1 truck, 86.6ms
Speed: 4.8ms preprocess, 86.6ms inference, 1.6ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 11 cars, 1 motorcycle, 3 buss, 1 truck, 127.8ms
Speed: 3.9ms preprocess, 127.8ms inference, 1.3ms postprocess per image at shape (1, 3, 384, 640)

0: 384x640 4 persons, 9 cars, 1 motorcycle, 2 buss, 1 truck, 111.7ms
Speed: 3.3ms preprocess, 111.7ms inference