<a href="https://colab.research.google.com/github/Richraj14/EdgeModels/blob/main/Comparision.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [4]:
!pip install thop



In [5]:
 !pip install ultralytics

Collecting ultralytics
  Downloading ultralytics-8.3.86-py3-none-any.whl.metadata (35 kB)
Collecting ultralytics-thop>=2.0.0 (from ultralytics)
  Downloading ultralytics_thop-2.0.14-py3-none-any.whl.metadata (9.4 kB)
Downloading ultralytics-8.3.86-py3-none-any.whl (922 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m922.6/922.6 kB[0m [31m31.4 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ultralytics_thop-2.0.14-py3-none-any.whl (26 kB)
Installing collected packages: ultralytics-thop, ultralytics
Successfully installed ultralytics-8.3.86 ultralytics-thop-2.0.14


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import numpy as np
import gc
from tqdm import tqdm
from thop import profile  # FLOPs Calculation
import pandas as pd
from ultralytics import YOLO  # YOLO Model for Edge AI


Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.


In [None]:
# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [None]:
# ✅ Load Pascal VOC Classification Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])


In [None]:
dataset = torchvision.datasets.VOCSegmentation(  # Use segmentation for consistent sizes
    root="./data", year="2012", image_set="val", download=True, transform=transform
)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2, collate_fn=lambda x: (torch.stack([i[0] for i in x]), None))


Downloading http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar to ./data/VOCtrainval_11-May-2012.tar


100%|██████████| 2.00G/2.00G [01:50<00:00, 18.1MB/s]


Extracting ./data/VOCtrainval_11-May-2012.tar to ./data


In [None]:
# ✅ Models to evaluate (EDGE + HIGH-PERFORMANCE)
models_to_test = [
    "mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5",  # EDGE AI
    "resnet50", "vit_b_16", "convnext_tiny",  # HIGH-PERFORMANCE
]


In [None]:
# ✅ Function to measure GPU memory usage
def get_memory_usage():
    return torch.cuda.memory_allocated(device) / 1e6 if torch.cuda.is_available() else 0


In [None]:

# ✅ Function to evaluate a model
def evaluate_model(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name} on Pascal VOC...")

    # ✅ Load pre-trained model
    if "yolo" in model_name:
        model = YOLO("yolov8n.pt").to(device)  # Load YOLO-Nano for edge AI
    else:
        model = getattr(torchvision.models, model_name)(pretrained=True).to(device)

    model.eval()

    # ✅ Measure inference time
    total_times = []
    for _ in range(num_trials):
        dummy_input = torch.randn(1, 3, 224, 224).to(device)  # Adjust input size as needed
        torch.cuda.synchronize()
        start_time = torch.cuda.Event(enable_timing=True)
        end_time = torch.cuda.Event(enable_timing=True)

        start_time.record()
        with torch.no_grad():
            model(dummy_input)
        end_time.record()
        torch.cuda.synchronize()

        total_times.append(start_time.elapsed_time(end_time) / 1000)  # Convert ms to seconds

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = num_trials / sum(total_times) if sum(total_times) > 0 else 0

    # ✅ Track VRAM Usage
    max_vram = torch.cuda.max_memory_allocated() / (1024**2) if torch.cuda.is_available() else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    return {
        "Model": model_name,
        "Category": "Edge AI" if model_name in ["mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5", "yolov8n"] else "High-Performance",
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
models_to_test = ["mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5", "yolov8n"]
results = [evaluate_model(model) for model in models_to_test]

# ✅ Print results
for res in results:
    print(res)


🚀 Evaluating mobilenet_v3_large on Pascal VOC...


Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth



🚀 Evaluating efficientnet_b0 on Pascal VOC...


100%|██████████| 20.5M/20.5M [00:00<00:00, 77.6MB/s]
Downloading: "https://download.pytorch.org/models/squeezenet1_1-b8a52dc0.pth" to /root/.cache/torch/hub/checkpoints/squeezenet1_1-b8a52dc0.pth



🚀 Evaluating squeezenet1_1 on Pascal VOC...


100%|██████████| 4.73M/4.73M [00:00<00:00, 45.0MB/s]
Downloading: "https://download.pytorch.org/models/shufflenetv2_x0.5-f707e7126e.pth" to /root/.cache/torch/hub/checkpoints/shufflenetv2_x0.5-f707e7126e.pth



🚀 Evaluating shufflenet_v2_x0_5 on Pascal VOC...


100%|██████████| 5.28M/5.28M [00:00<00:00, 31.6MB/s]



🚀 Evaluating yolov8n on Pascal VOC...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8n.pt to 'yolov8n.pt'...


100%|██████████| 6.25M/6.25M [00:00<00:00, 109MB/s]



0: 224x224 (no detections), 67.4ms
Speed: 0.1ms preprocess, 67.4ms inference, 38.7ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 7.3ms
Speed: 0.0ms preprocess, 7.3ms inference, 0.7ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 6.9ms
Speed: 0.0ms preprocess, 6.9ms inference, 0.7ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 6.9ms
Speed: 0.0ms preprocess, 6.9ms inference, 0.7ms postprocess per image at shape (1, 3, 224, 224)

0: 224x224 (no detections), 7.0ms
Speed: 0.0ms preprocess, 7.0ms inference, 0.7ms postprocess per image at shape (1, 3, 224, 224)
{'Model': 'mobilenet_v3_large', 'Category': 'Edge AI', 'Avg Inference Time (s)': 0.0068, 'Min Inference Time (s)': 0.0063, 'Max Inference Time (s)': 0.0078, 'FPS': 147.48, 'Throughput (images/sec)': 147.48, 'Max VRAM Usage (MB)': 58.34}
{'Model': 'efficientnet_b0', 'Category': 'Edge AI', 'Avg Inference Time (s)': 0.0235, 'Min Inference Time

In [None]:
# ✅ Compute final metrics
  avg_time = np.mean(total_times)
  min_time, max_time = np.min(total_times), np.max(total_times)
  fps = 1 / avg_time if avg_time > 0 else 0
  throughput = len(dataset) / sum(total_times) if sum(total_times) > 0 else 0

    # ✅ Clear GPU memory for next model
  torch.cuda.empty_cache()
  gc.collect()

    return {
        "Model": model_name,
        "Category": "Edge AI" if model_name in ["mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5", "yolov8n"] else "High-Performance",
        "Model Size (MB)": round(model_size, 2),
        "FLOPs (G)": round(macs / 1e9, 2),
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
results = []
for model_name in models_to_test:



IndentationError: unexpected indent (<ipython-input-18-894c94dfe29c>, line 2)

In [None]:

import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import numpy as np
import gc
from tqdm import tqdm
from thop import profile  # FLOPs Calculation
import pandas as pd
from ultralytics import YOLO  # YOLO Model for Edge AI

# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ Load Pascal VOC Classification Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = torchvision.datasets.VOCSegmentation(  # Use segmentation for consistent sizes
    root="./data", year="2012", image_set="val", download=True, transform=transform
)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2, collate_fn=lambda x: (torch.stack([i[0] for i in x]), None))

# ✅ Models to evaluate (EDGE + HIGH-PERFORMANCE)
models_to_test = [
    "mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5",  # EDGE AI
    "resnet50", "vit_b_16", "convnext_tiny",  # HIGH-PERFORMANCE
]

# ✅ Function to measure GPU memory usage
def get_memory_usage():
    return torch.cuda.memory_allocated(device) / 1e6 if torch.cuda.is_available() else 0

# ✅ Function to evaluate a model
def evaluate_model(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name} on Pascal VOC...")

    # ✅ Load pre-trained model
    if "yolo" in model_name:
        model = YOLO("yolov8n.pt").to(device)  # Load YOLO-Nano for edge AI
    else:
        model = getattr(torchvision.models, model_name)(pretrained=True).to(device)

    model.eval()

    # ✅ Compute FLOPs and Model Size
    dummy_input = torch.randn(1, 3, 224, 224).to(device)
    macs, params = profile(model, inputs=(dummy_input,), verbose=False)

    # ✅ Model size in MB
    model_size = (sum(p.numel() for p in model.parameters()) * 4) / (1024 ** 2)

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    with torch.no_grad():
        for _ in range(num_trials):
            for images, _ in tqdm(dataloader, desc=f"Running {model_name}", leave=False):
                images = images.to(device)

                # ✅ Measure inference time
                start_time = time.time()
                _ = model(images)
                total_times.append(time.time() - start_time)

                # ✅ Track max VRAM usage
                max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = len(dataset) / sum(total_times) if sum(total_times) > 0 else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    return {
        "Model": model_name,
        "Category": "Edge AI" if model_name in ["mobilenet_v3_large", "efficientnet_b0", "squeezenet1_1", "shufflenet_v2_x0_5", "yolov8n"] else "High-Performance",
        "Model Size (MB)": round(model_size, 2),
        "FLOPs (G)": round(macs / 1e9, 2),
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
results = []
for model_name in models_to_test:
    results.append(evaluate_model(model_name))

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("model_performance_results.csv", index=False)

Using device: cuda
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data

🚀 Evaluating mobilenet_v3_large on Pascal VOC...





🚀 Evaluating efficientnet_b0 on Pascal VOC...





🚀 Evaluating squeezenet1_1 on Pascal VOC...





🚀 Evaluating shufflenet_v2_x0_5 on Pascal VOC...





🚀 Evaluating resnet50 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 168MB/s]



🚀 Evaluating vit_b_16 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/vit_b_16-c867db91.pth" to /root/.cache/torch/hub/checkpoints/vit_b_16-c867db91.pth
100%|██████████| 330M/330M [00:02<00:00, 165MB/s]



🚀 Evaluating convnext_tiny on Pascal VOC...


Downloading: "https://download.pytorch.org/models/convnext_tiny-983f1562.pth" to /root/.cache/torch/hub/checkpoints/convnext_tiny-983f1562.pth
100%|██████████| 109M/109M [00:01<00:00, 105MB/s]



📊 Model Performance Summary:
                Model          Category  Model Size (MB)  FLOPs (G)  \
0  mobilenet_v3_large           Edge AI            20.92       0.23   
1     efficientnet_b0           Edge AI            20.17       0.42   
2       squeezenet1_1           Edge AI             4.71       0.35   
3  shufflenet_v2_x0_5           Edge AI             5.21       0.04   
4            resnet50  High-Performance            97.49       4.13   
5            vit_b_16  High-Performance           330.23      11.29   
6       convnext_tiny  High-Performance           109.06       4.46   

   Avg Inference Time (s)  Min Inference Time (s)  Max Inference Time (s)  \
0                  0.0222                  0.0073                  0.1213   
1                  0.0269                  0.0093                  0.1031   
2                  0.0066                  0.0027                  0.0764   
3                  0.0223                  0.0073                  0.0620   
4               

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import numpy as np
import gc
from tqdm import tqdm
from thop import profile  # FLOPs Calculation
import pandas as pd
from ultralytics import YOLO  # YOLO Model for Edge AI

# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ Load Pascal VOC Classification Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = torchvision.datasets.VOCSegmentation(  # Use segmentation for consistent sizes
    root="./data", year="2012", image_set="val", download=True, transform=transform
)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2, collate_fn=lambda x: (torch.stack([i[0] for i in x]), None))

# ✅ Models to evaluate (EDGE + HIGH-PERFORMANCE)
models_to_test = [
    # EDGE AI (Optimized for speed & efficiency)
    "mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "ghostnet", "regnet_x_400mf",

    # HIGH-PERFORMANCE (For stronger accuracy, but still efficient)
    "resnet18", "resnext50_32x4d", "vit_s_16", "swin_t", "convnext_small"
]

# ✅ Function to evaluate a model
def evaluate_model(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name} on Pascal VOC...")

    # ✅ Load pre-trained model
    if "yolo" in model_name:
        model = YOLO(f"{model_name}.pt").to(device)  # Load YOLO Model
    else:
        model_func = getattr(torchvision.models, model_name, None)
        if model_func is None:
            print(f"❌ Model {model_name} is not available in torchvision.")
            return None

        model = model_func(pretrained=True).to(device)

    model.eval()

    # ✅ Compute FLOPs and Model Size (For non-YOLO models)
    if "yolo" not in model_name:
        dummy_input = torch.randn(1, 3, 224, 224).to(device)
        try:
            macs, params = profile(model, inputs=(dummy_input,), verbose=False)
            model_size = (sum(p.numel() for p in model.parameters()) * 4) / (1024 ** 2)
        except Exception as e:
            print(f"⚠️ Could not compute FLOPs for {model_name}: {e}")
            macs, model_size = 0, 0
    else:
        macs, model_size = 0, 0  # Skip FLOPs calculation for YOLO

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    with torch.no_grad():
        for _ in range(num_trials):
            for images, _ in tqdm(dataloader, desc=f"Running {model_name}", leave=False):
                images = images.to(device)

                # ✅ Measure inference time
                start_time = time.time()
                _ = model(images)
                total_times.append(time.time() - start_time)

                # ✅ Track max VRAM usage
                max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = len(dataset) / sum(total_times) if sum(total_times) > 0 else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    # ✅ Determine model category dynamically
    category = "Edge AI" if model_name in ["mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "ghostnet", "regnet_x_400mf"] else "High-Performance"

    return {
        "Model": model_name,
        "Category": category,
        "Model Size (MB)": round(model_size, 2),
        "FLOPs (G)": round(macs / 1e9, 2),
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
results = []
for model_name in models_to_test:
    results.append(evaluate_model(model_name))

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("model_performance_results.csv", index=False)


Using device: cuda
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data

🚀 Evaluating mobilenet_v2 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/mobilenet_v2-b0353104.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-b0353104.pth
100%|██████████| 13.6M/13.6M [00:01<00:00, 9.90MB/s]



🚀 Evaluating efficientnet_b1 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 41.1MB/s]



🚀 Evaluating mnasnet1_0 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth" to /root/.cache/torch/hub/checkpoints/mnasnet1.0_top1_73.512-f206786ef8.pth
100%|██████████| 16.9M/16.9M [00:00<00:00, 70.8MB/s]



🚀 Evaluating ghostnet on Pascal VOC...
❌ Model ghostnet is not available in torchvision.

🚀 Evaluating regnet_x_400mf on Pascal VOC...


Downloading: "https://download.pytorch.org/models/regnet_x_400mf-adf1edd5.pth" to /root/.cache/torch/hub/checkpoints/regnet_x_400mf-adf1edd5.pth
100%|██████████| 21.3M/21.3M [00:00<00:00, 47.8MB/s]



🚀 Evaluating resnet18 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 109MB/s]



🚀 Evaluating resnext50_32x4d on Pascal VOC...


Downloading: "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d-7cdf4587.pth
100%|██████████| 95.8M/95.8M [00:00<00:00, 116MB/s]



🚀 Evaluating vit_s_16 on Pascal VOC...
❌ Model vit_s_16 is not available in torchvision.

🚀 Evaluating swin_t on Pascal VOC...


Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /root/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:00<00:00, 153MB/s] 



🚀 Evaluating convnext_small on Pascal VOC...


Downloading: "https://download.pytorch.org/models/convnext_small-0c510722.pth" to /root/.cache/torch/hub/checkpoints/convnext_small-0c510722.pth
100%|██████████| 192M/192M [00:01<00:00, 174MB/s]


AttributeError: 'NoneType' object has no attribute 'keys'

In [None]:
!pip install pytorchcv

Collecting pytorchcv
  Downloading pytorchcv-0.0.73-py2.py3-none-any.whl.metadata (134 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.2/134.2 kB[0m [31m5.8 MB/s[0m eta [36m0:00:00[0m
Downloading pytorchcv-0.0.73-py2.py3-none-any.whl (585 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/585.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m585.2/585.2 kB[0m [31m27.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pytorchcv
Successfully installed pytorchcv-0.0.73


In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import numpy as np
import gc
from tqdm import tqdm
from thop import profile  # FLOPs Calculation
import pandas as pd
from ultralytics import YOLO  # YOLO Model for Edge AI

# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ VRAM Usage Tracking
def get_memory_usage():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    return 0  # Return 0 if using CPU

# ✅ Load Pascal VOC Dataset
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

dataset = torchvision.datasets.VOCSegmentation(
    root="./data", year="2012", image_set="val", download=True, transform=transform
)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2, collate_fn=lambda x: (torch.stack([i[0] for i in x]), None))

# ✅ Models to evaluate (EDGE + HIGH-PERFORMANCE)
models_to_test = [
    # EDGE AI (Optimized for speed & efficiency)
    "mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "regnet_x_400mf",

    # HIGH-PERFORMANCE (For stronger accuracy, but still efficient)
    "resnet18", "resnext50_32x4d", "vit_s_16", "swin_t", "convnext_small"
]

# ✅ Function to evaluate a model
def evaluate_model(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name} on Pascal VOC...")

    # ✅ Load pre-trained model
    try:
        if "yolo" in model_name:
            model = YOLO(f"{model_name}.pt").to(device)  # Load YOLO Model
        else:
            model_func = getattr(torchvision.models, model_name, None)
            if model_func is None:
                print(f"❌ Model {model_name} is not available in torchvision.")
                return None

            model = model_func(weights="DEFAULT").to(device)  # Updated weight loading

        model.eval()
    except Exception as e:
        print(f"⚠️ Error loading {model_name}: {e}")
        return None

    # ✅ Compute FLOPs and Model Size (For non-YOLO models)
    if "yolo" not in model_name:
        dummy_input = torch.randn(1, 3, 224, 224).to(device)
        try:
            macs, params = profile(model, inputs=(dummy_input,), verbose=False)
            model_size = (sum(p.numel() for p in model.parameters()) * 4) / (1024 ** 2)
        except Exception as e:
            print(f"⚠️ Could not compute FLOPs for {model_name}: {e}")
            macs, model_size = 0, 0
    else:
        macs, model_size = 0, 0  # Skip FLOPs calculation for YOLO

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    with torch.no_grad():
        for _ in range(num_trials):
            for images, _ in tqdm(dataloader, desc=f"Running {model_name}", leave=False):
                images = images.to(device)

                # ✅ Measure inference time
                start_time = time.time()
                _ = model(images)
                total_times.append(time.time() - start_time)

                # ✅ Track max VRAM usage
                max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = len(dataset) / sum(total_times) if sum(total_times) > 0 else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    # ✅ Determine model category dynamically
    category = "Edge AI" if model_name in ["mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "regnet_x_400mf"] else "High-Performance"

    return {
        "Model": model_name,
        "Category": category,
        "Model Size (MB)": round(model_size, 2),
        "FLOPs (G)": round(macs / 1e9, 2),
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
results = []
for model_name in models_to_test:
    result = evaluate_model(model_name)
    if result:  # Only append if evaluation was successful
        results.append(result)

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("model_performance_results.csv", index=False)


Using device: cuda
Using downloaded and verified file: ./data/VOCtrainval_11-May-2012.tar
Extracting ./data/VOCtrainval_11-May-2012.tar to ./data


Downloading: "https://download.pytorch.org/models/mobilenet_v2-7ebf99e0.pth" to /root/.cache/torch/hub/checkpoints/mobilenet_v2-7ebf99e0.pth



🚀 Evaluating mobilenet_v2 on Pascal VOC...


100%|██████████| 13.6M/13.6M [00:00<00:00, 68.2MB/s]



🚀 Evaluating efficientnet_b1 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 99.9MB/s]



🚀 Evaluating mnasnet1_0 on Pascal VOC...


Downloading: "https://download.pytorch.org/models/regnet_x_400mf-62229a5f.pth" to /root/.cache/torch/hub/checkpoints/regnet_x_400mf-62229a5f.pth



🚀 Evaluating regnet_x_400mf on Pascal VOC...


100%|██████████| 21.3M/21.3M [00:00<00:00, 134MB/s] 



🚀 Evaluating resnet18 on Pascal VOC...





🚀 Evaluating resnext50_32x4d on Pascal VOC...


Downloading: "https://download.pytorch.org/models/resnext50_32x4d-1a0047aa.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d-1a0047aa.pth
100%|██████████| 95.8M/95.8M [00:00<00:00, 171MB/s]



🚀 Evaluating vit_s_16 on Pascal VOC...
❌ Model vit_s_16 is not available in torchvision.

🚀 Evaluating swin_t on Pascal VOC...





🚀 Evaluating convnext_small on Pascal VOC...





📊 Model Performance Summary:
             Model          Category  Model Size (MB)  FLOPs (G)  \
0     mobilenet_v2           Edge AI            13.37       0.33   
1  efficientnet_b1           Edge AI            29.73       0.61   
2       mnasnet1_0           Edge AI            16.72       0.34   
3   regnet_x_400mf           Edge AI            20.97       0.43   
4         resnet18  High-Performance            44.59       1.82   
5  resnext50_32x4d  High-Performance            95.48       4.29   
6           swin_t  High-Performance           107.91       2.98   
7   convnext_small  High-Performance           191.59       8.70   

   Avg Inference Time (s)  Min Inference Time (s)  Max Inference Time (s)  \
0                  0.0178                  0.0061                  0.0819   
1                  0.0351                  0.0124                  0.0974   
2                  0.0168                  0.0055                  0.0480   
3                  0.0329                  0.0091

In [None]:
import torch
import time
import numpy as np
import gc
from tqdm import tqdm
import pandas as pd
from ultralytics import YOLO  # YOLOv8
import os

# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ VRAM Usage Tracking
def get_memory_usage():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    return 0  # Return 0 if using CPU

# ✅ YOLO Model Variants
yolo_models = [
    # ✅ YOLOv5 Variants
    "yolov5s", "yolov5m", "yolov5l", "yolov5x",

    # ✅ YOLOv6 Variants
    "yolov6n", "yolov6s", "yolov6m", "yolov6l",

    # ✅ YOLOv7 Variants
    "yolov7", "yolov7-tiny", "yolov7x",

    # ✅ YOLOv8 Variants
    "yolov8n", "yolov8s", "yolov8m", "yolov8l"
]

# ✅ Function to evaluate YOLO models
def evaluate_yolo(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name}...")

    # ✅ Load YOLO Model
    try:
        model = YOLO(f"{model_name}.pt").to(device)  # Load Model
        model.eval()
    except Exception as e:
        print(f"⚠️ Error loading {model_name}: {e}")
        return None

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    dummy_input = torch.randn(1, 3, 640, 640).to(device)  # YOLO expects 640x640

    with torch.no_grad():
        for _ in range(num_trials):
            # ✅ Measure inference time
            start_time = time.time()
            _ = model(dummy_input)  # Run inference
            total_times.append(time.time() - start_time)

            # ✅ Track max VRAM usage
            max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    return {
        "Model": model_name,
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all YOLO models
results = []
for model_name in yolo_models:
    result = evaluate_yolo(model_name)
    if result:  # Only append if evaluation was successful
        results.append(result)

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 YOLO Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("yolo_performance_results.csv", index=False)


Using device: cuda

🚀 Evaluating yolov5s...
PRO TIP 💡 Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov5su.pt to 'yolov5su.pt'...


100%|██████████| 17.7M/17.7M [00:00<00:00, 123MB/s] 



0: 640x640 (no detections), 15.0ms
Speed: 0.1ms preprocess, 15.0ms inference, 1.9ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 15.0ms
Speed: 0.0ms preprocess, 15.0ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 15.3ms
Speed: 0.0ms preprocess, 15.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 15.0ms
Speed: 0.0ms preprocess, 15.0ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 14.7ms
Speed: 0.0ms preprocess, 14.7ms inference, 1.1ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov5m...
PRO TIP 💡 Replace 'model=yolov5m.pt' with new 'model=yolov5mu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/rel

100%|██████████| 48.2M/48.2M [00:00<00:00, 109MB/s]



0: 640x640 (no detections), 32.3ms
Speed: 0.0ms preprocess, 32.3ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 32.4ms
Speed: 0.0ms preprocess, 32.4ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 32.4ms
Speed: 0.2ms preprocess, 32.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 29.9ms
Speed: 0.2ms preprocess, 29.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 19.4ms
Speed: 0.2ms preprocess, 19.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov5l...
PRO TIP 💡 Replace 'model=yolov5l.pt' with new 'model=yolov5lu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/rel

100%|██████████| 102M/102M [00:01<00:00, 86.8MB/s]



0: 640x640 (no detections), 53.4ms
Speed: 0.0ms preprocess, 53.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 49.9ms
Speed: 0.0ms preprocess, 49.9ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 49.3ms
Speed: 0.0ms preprocess, 49.3ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 49.1ms
Speed: 0.0ms preprocess, 49.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 35.8ms
Speed: 0.0ms preprocess, 35.8ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov5x...
PRO TIP 💡 Replace 'model=yolov5x.pt' with new 'model=yolov5xu.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.

Downloading https://github.com/ultralytics/assets/rel

100%|██████████| 186M/186M [00:01<00:00, 108MB/s]



0: 640x640 (no detections), 99.1ms
Speed: 0.0ms preprocess, 99.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 76.8ms
Speed: 0.0ms preprocess, 76.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 61.1ms
Speed: 0.0ms preprocess, 61.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 60.9ms
Speed: 0.0ms preprocess, 60.9ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 62.4ms
Speed: 0.0ms preprocess, 62.4ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov6n...
⚠️ Error loading yolov6n: [Errno 2] No such file or directory: 'yolov6n.pt'

🚀 Evaluating yolov6s...
⚠️ Error loading yolov6s: [Errno 2] No such file or directory: 'yolov6s.pt'

🚀 Evaluating yolov6m...
⚠️ Error loading yolov6m: [Errno 2] No such file or directory: 'yolov6m.pt'

🚀 Evaluating yolov6l...
⚠️ Erro

100%|██████████| 21.5M/21.5M [00:00<00:00, 209MB/s]



0: 640x640 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 16.1ms
Speed: 0.0ms preprocess, 16.1ms inference, 3.4ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov8m...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8m.pt to 'yolov8m.pt'...


100%|██████████| 49.7M/49.7M [00:00<00:00, 296MB/s]



0: 640x640 (no detections), 36.5ms
Speed: 0.0ms preprocess, 36.5ms inference, 2.8ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 36.6ms
Speed: 0.0ms preprocess, 36.6ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 36.5ms
Speed: 0.0ms preprocess, 36.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 36.5ms
Speed: 0.0ms preprocess, 36.5ms inference, 1.6ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 29.2ms
Speed: 0.0ms preprocess, 29.2ms inference, 1.5ms postprocess per image at shape (1, 3, 640, 640)

🚀 Evaluating yolov8l...
Downloading https://github.com/ultralytics/assets/releases/download/v8.3.0/yolov8l.pt to 'yolov8l.pt'...


100%|██████████| 83.7M/83.7M [00:01<00:00, 52.3MB/s]



0: 640x640 (no detections), 61.5ms
Speed: 0.0ms preprocess, 61.5ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 61.6ms
Speed: 0.0ms preprocess, 61.6ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 38.4ms
Speed: 0.0ms preprocess, 38.4ms inference, 1.3ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 37.2ms
Speed: 0.0ms preprocess, 37.2ms inference, 1.2ms postprocess per image at shape (1, 3, 640, 640)

0: 640x640 (no detections), 37.8ms
Speed: 0.0ms preprocess, 37.8ms inference, 1.4ms postprocess per image at shape (1, 3, 640, 640)

📊 YOLO Model Performance Summary:
     Model  Avg Inference Time (s)  Min Inference Time (s)  \
0  yolov5s                  0.0557                  0.0196   
1  yolov5m                  0.1476                  0.0257   
2  yolov5l                  0.1638                  0.0413   
3  yolov5x                  0.2625                  

In [1]:
!pip install thop
!pip install pycocotools

Collecting thop
  Downloading thop-0.1.1.post2209072238-py3-none-any.whl.metadata (2.7 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->thop)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch->thop)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch->thop)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch->thop)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2

**Downloading coc2017 dataset**

In [8]:
import os
import requests
import zipfile

# ✅ Define the COCO dataset directory in Google Drive
coco_dir = "/content/drive/My Drive/Dataset/COCO2017"
os.makedirs(coco_dir, exist_ok=True)

# ✅ COCO dataset URLs
coco_urls = {
    "train_images": "http://images.cocodataset.org/zips/train2017.zip",
    "val_images": "http://images.cocodataset.org/zips/val2017.zip",
    "annotations": "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
}

# ✅ Function to download and extract files
def download_and_extract(url, save_path, extract_path):
    filename = url.split("/")[-1]
    filepath = os.path.join(save_path, filename)

    # Download file if not already exists
    if not os.path.exists(filepath):
        print(f"⬇️ Downloading {filename}...")
        response = requests.get(url, stream=True)
        with open(filepath, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)
        print(f"✅ Downloaded {filename}")

    # Extract file
    print(f"📦 Extracting {filename}...")
    with zipfile.ZipFile(filepath, "r") as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"✅ Extracted {filename}")

# ✅ Download & extract train, val images and annotations
for key, url in coco_urls.items():
    download_and_extract(url, coco_dir, coco_dir)

print("\n🚀 COCO 2017 dataset is stored in Google Drive at:", coco_dir)



⬇️ Downloading train2017.zip...
✅ Downloaded train2017.zip
📦 Extracting train2017.zip...
✅ Extracted train2017.zip
⬇️ Downloading val2017.zip...
✅ Downloaded val2017.zip
📦 Extracting val2017.zip...
✅ Extracted val2017.zip
⬇️ Downloading annotations_trainval2017.zip...
✅ Downloaded annotations_trainval2017.zip
📦 Extracting annotations_trainval2017.zip...
✅ Extracted annotations_trainval2017.zip

🚀 COCO 2017 dataset is stored in Google Drive at: /content/drive/My Drive/Dataset/COCO2017


Trying various olo models with coco 2017 data set

In [1]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
import time
import numpy as np
import gc
from tqdm import tqdm
from thop import profile  # FLOPs Calculation
import pandas as pd
from ultralytics import YOLO  # YOLO Model for Edge AI
from pycocotools.coco import COCO
from torchvision.datasets import CocoDetection
import os

# ✅ Set device (Use T4 GPU on Google Colab)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ VRAM Usage Tracking
def get_memory_usage():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    return 0  # Return 0 if using CPU

# ✅ Define dataset paths (Stored Locally)
data_root = "/content/drive/MyDrive/Dataset/COCO2017"
ann_file = os.path.join(data_root, "annotations/instances_val2017.json")
img_dir = os.path.join(data_root, "val2017")

# ✅ Transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# ✅ Load COCO 2017 Dataset
dataset = CocoDetection(root=img_dir, annFile=ann_file, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True, num_workers=2, collate_fn=lambda x: (torch.stack([i[0] for i in x]), None))

# ✅ Models to evaluate
models_to_test = [
    "mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "regnet_x_400mf",  # EDGE AI
    "resnet18", "resnext50_32x4d", "vit_s_16", "swin_t", "convnext_small"  # HIGH-PERFORMANCE
]

# ✅ Function to evaluate a model
def evaluate_model(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name} on COCO 2017...")
    try:
        if "yolo" in model_name:
            model = YOLO(f"{model_name}.pt").to(device)  # Load YOLO Model
        else:
            model_func = getattr(torchvision.models, model_name, None)
            if model_func is None:
                print(f"❌ Model {model_name} is not available in torchvision.")
                return None
            model = model_func(weights="DEFAULT").to(device)
        model.eval()
    except Exception as e:
        print(f"⚠️ Error loading {model_name}: {e}")
        return None

    # ✅ Compute FLOPs and Model Size (For non-YOLO models)
    if "yolo" not in model_name:
        dummy_input = torch.randn(1, 3, 224, 224).to(device)
        try:
            macs, params = profile(model, inputs=(dummy_input,), verbose=False)
            model_size = (sum(p.numel() for p in model.parameters()) * 4) / (1024 ** 2)
        except Exception as e:
            print(f"⚠️ Could not compute FLOPs for {model_name}: {e}")
            macs, model_size = 0, 0
    else:
        macs, model_size = 0, 0

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    with torch.no_grad():
        for _ in range(num_trials):
            for images, _ in tqdm(dataloader, desc=f"Running {model_name}", leave=False):
                images = images.to(device)
                start_time = time.time()
                _ = model(images)
                total_times.append(time.time() - start_time)
                max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = len(dataset) / sum(total_times) if sum(total_times) > 0 else 0

    torch.cuda.empty_cache()
    gc.collect()

    category = "Edge AI" if model_name in ["mobilenet_v2", "efficientnet_b1", "mnasnet1_0", "regnet_x_400mf"] else "High-Performance"

    return {
        "Model": model_name,
        "Category": category,
        "Model Size (MB)": round(model_size, 2),
        "FLOPs (G)": round(macs / 1e9, 2),
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all models
results = []
for model_name in models_to_test:
    result = evaluate_model(model_name)
    if result:
        results.append(result)

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("coco_model_performance.csv", index=False)


Using device: cuda
loading annotations into memory...
Done (t=1.03s)
creating index...
index created!

🚀 Evaluating mobilenet_v2 on COCO 2017...





🚀 Evaluating efficientnet_b1 on COCO 2017...


Downloading: "https://download.pytorch.org/models/efficientnet_b1-c27df63c.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1-c27df63c.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 101MB/s]
Downloading: "https://download.pytorch.org/models/mnasnet1.0_top1_73.512-f206786ef8.pth" to /root/.cache/torch/hub/checkpoints/mnasnet1.0_top1_73.512-f206786ef8.pth



🚀 Evaluating mnasnet1_0 on COCO 2017...


100%|██████████| 16.9M/16.9M [00:00<00:00, 83.8MB/s]



🚀 Evaluating regnet_x_400mf on COCO 2017...


Downloading: "https://download.pytorch.org/models/regnet_x_400mf-62229a5f.pth" to /root/.cache/torch/hub/checkpoints/regnet_x_400mf-62229a5f.pth
100%|██████████| 21.3M/21.3M [00:00<00:00, 30.3MB/s]



🚀 Evaluating resnet18 on COCO 2017...


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 72.9MB/s]



🚀 Evaluating resnext50_32x4d on COCO 2017...


Downloading: "https://download.pytorch.org/models/resnext50_32x4d-1a0047aa.pth" to /root/.cache/torch/hub/checkpoints/resnext50_32x4d-1a0047aa.pth
100%|██████████| 95.8M/95.8M [00:01<00:00, 82.0MB/s]



🚀 Evaluating vit_s_16 on COCO 2017...
❌ Model vit_s_16 is not available in torchvision.

🚀 Evaluating swin_t on COCO 2017...


Downloading: "https://download.pytorch.org/models/swin_t-704ceda3.pth" to /root/.cache/torch/hub/checkpoints/swin_t-704ceda3.pth
100%|██████████| 108M/108M [00:01<00:00, 78.4MB/s]



🚀 Evaluating convnext_small on COCO 2017...


Downloading: "https://download.pytorch.org/models/convnext_small-0c510722.pth" to /root/.cache/torch/hub/checkpoints/convnext_small-0c510722.pth
100%|██████████| 192M/192M [00:01<00:00, 147MB/s]



📊 Model Performance Summary:
             Model          Category  Model Size (MB)  FLOPs (G)  \
0     mobilenet_v2           Edge AI            13.37       0.33   
1  efficientnet_b1           Edge AI            29.73       0.61   
2       mnasnet1_0           Edge AI            16.72       0.34   
3   regnet_x_400mf           Edge AI            20.97       0.43   
4         resnet18  High-Performance            44.59       1.82   
5  resnext50_32x4d  High-Performance            95.48       4.29   
6           swin_t  High-Performance           107.91       2.98   
7   convnext_small  High-Performance           191.59       8.70   

   Avg Inference Time (s)  Min Inference Time (s)  Max Inference Time (s)  \
0                  0.0171                  0.0062                  0.2239   
1                  0.0373                  0.0126                  0.1497   
2                  0.0162                  0.0056                  0.0688   
3                  0.0328                  0.0108

In [8]:
import os
import requests
import zipfile

# ✅ Define COCO dataset directory in Google Drive
coco_dir = "/content/drive/My Drive/Dataset/COCO2017"
os.makedirs(coco_dir, exist_ok=True)

# ✅ COCO dataset URLs and corresponding folders
coco_data = {
    "val_images": {
        "url": "http://images.cocodataset.org/zips/val2017.zip",
        "folder": os.path.join(coco_dir, "val2017")
    },
    "annotations": {
        "url": "http://images.cocodataset.org/annotations/annotations_trainval2017.zip",
        "folder": os.path.join(coco_dir, "annotations")
    },
}

# ✅ Function to download and extract files
def download_and_extract(url, save_path, extract_path):
    filename = url.split("/")[-1]
    filepath = os.path.join(save_path, filename)

    # Download file if not already exists
    if not os.path.exists(filepath):
        print(f"⬇️ Downloading {filename}...")
        response = requests.get(url, stream=True)
        with open(filepath, "wb") as file:
            for chunk in response.iter_content(chunk_size=1024):
                file.write(chunk)
        print(f"✅ Downloaded {filename}")

    # Extract file
    print(f"📦 Extracting {filename}...")
    with zipfile.ZipFile(filepath, "r") as zip_ref:
        zip_ref.extractall(extract_path)
    print(f"✅ Extracted {filename}")

# ✅ Check and download only missing folders (Skip train2017)
for key, data in coco_data.items():
    folder_path = data["folder"]

    if not os.path.exists(folder_path):
        print(f"\n🚀 {folder_path} is missing. Downloading...")
        download_and_extract(data["url"], coco_dir, coco_dir)
    else:
        print(f"✅ {folder_path} already exists. Skipping download.")

print("\n🎉 COCO 2017 dataset is fully available in Google Drive at:", coco_dir)



🚀 /content/drive/My Drive/Dataset/COCO2017/val2017 is missing. Downloading...
⬇️ Downloading val2017.zip...
✅ Downloaded val2017.zip
📦 Extracting val2017.zip...
✅ Extracted val2017.zip

🚀 /content/drive/My Drive/Dataset/COCO2017/annotations is missing. Downloading...
⬇️ Downloading annotations_trainval2017.zip...
✅ Downloaded annotations_trainval2017.zip
📦 Extracting annotations_trainval2017.zip...
✅ Extracted annotations_trainval2017.zip

🎉 COCO 2017 dataset is fully available in Google Drive at: /content/drive/My Drive/Dataset/COCO2017


**Running Yolo models on COCO2017 Dataset**

In [17]:
import torch
import time
import numpy as np
import gc
import copy
from tqdm import tqdm
import pandas as pd
from ultralytics import YOLO  # YOLOv8
from torchinfo import summary  # Alternative to `thop`

# ✅ Define device (GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# ✅ VRAM Usage Tracking
def get_memory_usage():
    if torch.cuda.is_available():
        return torch.cuda.max_memory_allocated() / (1024 ** 2)  # Convert bytes to MB
    return 0  # Return 0 if using CPU

# ✅ YOLO Model Variants
yolo_models = {
    "yolov5s": "yolov5s.pt",
    "yolov5m": "yolov5m.pt",
    "yolov5l": "yolov5l.pt",
    "yolov5x": "yolov5x.pt",
    "yolov8n": "yolov8n.pt",
    "yolov8s": "yolov8s.pt",
    "yolov8m": "yolov8m.pt",
    "yolov8l": "yolov8l.pt",
}

# ✅ Function to compute FLOPs and Model Size
def compute_flops_and_size(model):
    try:
        dummy_input = torch.randn(1, 3, 640, 640).to(device)  # Dummy input

        # ✅ Compute Model Size (MB)
        model_size = sum(p.numel() for p in model.parameters()) * 4 / (1024 ** 2)  # MB

        # ✅ Compute FLOPs using `torchinfo.summary()`
        model_summary = summary(model, input_size=(1, 3, 640, 640), verbose=0)
        total_flops = model_summary.total_mult_adds / 1e9  # Convert to GFLOPs

        return round(model_size, 2), round(total_flops, 2)
    except Exception as e:
        print(f"⚠️ Could not compute FLOPs for model: {e}")
        return 0, 0

# ✅ Function to evaluate YOLO models
def evaluate_yolo(model_name, num_trials=5):
    print(f"\n🚀 Evaluating {model_name}...")

    # ✅ Load YOLO Model (Reinitialize each time)
    try:
        model = YOLO(yolo_models[model_name]).to(device)  # Load & move to CUDA
        model.eval()
    except Exception as e:
        print(f"⚠️ Error loading {model_name}: {e}")
        return None

    # ✅ Compute FLOPs & Model Size
    model_size, flops = compute_flops_and_size(model)

    # ✅ Performance tracking
    total_times = []
    max_vram = 0

    with torch.no_grad():
        for _ in range(num_trials):
            dummy_input = torch.randn(1, 3, 640, 640).to(device) / 255.0  # Normalize & move to CUDA
            start_time = time.time()
            _ = model(dummy_input)  # Run inference
            total_times.append(time.time() - start_time)

            # ✅ Track max VRAM usage
            max_vram = max(max_vram, get_memory_usage())

    # ✅ Compute final metrics
    avg_time = np.mean(total_times)
    min_time, max_time = np.min(total_times), np.max(total_times)
    fps = 1 / avg_time if avg_time > 0 else 0
    throughput = num_trials / avg_time if avg_time > 0 else 0

    # ✅ Clear GPU memory for next model
    torch.cuda.empty_cache()
    gc.collect()

    # ✅ Determine model category (Edge AI vs High-Performance)
    edge_models = ["yolov5s", "yolov8n", "yolov5m", "yolov8s"]
    category = "Edge AI" if model_name in edge_models else "High-Performance"

    return {
        "Model": model_name,
        "Category": category,
        "Model Size (MB)": model_size,
        "FLOPs (G)": flops,
        "Avg Inference Time (s)": round(avg_time, 4),
        "Min Inference Time (s)": round(min_time, 4),
        "Max Inference Time (s)": round(max_time, 4),
        "FPS": round(fps, 2),
        "Throughput (images/sec)": round(throughput, 2),
        "Max VRAM Usage (MB)": round(max_vram, 2),
    }

# ✅ Run evaluation for all YOLO models
results = []
for model_name in yolo_models.keys():
    result = evaluate_yolo(model_name)
    if result:  # Only append if evaluation was successful
        results.append(result)

# ✅ Display results in a table
df_results = pd.DataFrame(results)
print("\n📊 YOLO Model Performance Summary:")
print(df_results)

# ✅ Save results as CSV
df_results.to_csv("yolo_performance_results.csv", index=False)








Using device: cuda

🚀 Evaluating yolov5s...
PRO TIP 💡 Replace 'model=yolov5s.pt' with new 'model=yolov5su.pt'.
YOLOv5 'u' models are trained with https://github.com/ultralytics/ultralytics and feature improved performance vs standard YOLOv5 models trained with https://github.com/ultralytics/yolov5.


0: 640x640 (no detections), 30.4ms
Speed: 0.0ms preprocess, 30.4ms inference, 2.6ms postprocess per image at shape (1, 3, 640, 640)
[34m[1mengine/trainer: [0mtask=detect, mode=train, model=yolov5s.pt, data=coco.yaml, epochs=100, time=None, patience=100, batch=16, imgsz=640, save=True, save_period=-1, cache=False, device=cuda:0, workers=8, project=None, name=train, exist_ok=False, pretrained=True, optimizer=auto, verbose=True, seed=0, deterministic=True, single_cls=False, rect=False, cos_lr=False, close_mosaic=10, resume=False, amp=True, fraction=1.0, profile=False, freeze=None, multi_scale=False, overlap_mask=True, mask_ratio=4, dropout=0.0, val=True, split=val, save_json=False, save_hy

100%|██████████| 169M/169M [00:04<00:00, 39.0MB/s]
Unzipping /content/datasets/coco2017labels-segments.zip to /content/datasets/coco...: 100%|██████████| 122232/122232 [00:20<00:00, 5982.28file/s]

Downloading http://images.cocodataset.org/zips/train2017.zip to '/content/datasets/coco/images/train2017.zip'...
Downloading http://images.cocodataset.org/zips/val2017.zip to '/content/datasets/coco/images/val2017.zip'...
Downloading http://images.cocodataset.org/zips/test2017.zip to '/content/datasets/coco/images/test2017.zip'...





Dataset download success ✅ (1398.8s), saved to [1m/content/datasets[0m

Downloading https://ultralytics.com/assets/Arial.ttf to '/root/.config/Ultralytics/Arial.ttf'...


100%|██████████| 755k/755k [00:00<00:00, 106MB/s]



                   from  n    params  module                                       arguments                     
  0                  -1  1      3520  ultralytics.nn.modules.conv.Conv             [3, 32, 6, 2, 2]              
  1                  -1  1     18560  ultralytics.nn.modules.conv.Conv             [32, 64, 3, 2]                
  2                  -1  1     18816  ultralytics.nn.modules.block.C3              [64, 64, 1]                   
  3                  -1  1     73984  ultralytics.nn.modules.conv.Conv             [64, 128, 3, 2]               
  4                  -1  2    115712  ultralytics.nn.modules.block.C3              [128, 128, 2]                 
  5                  -1  1    295424  ultralytics.nn.modules.conv.Conv             [128, 256, 3, 2]              
  6                  -1  3    625152  ultralytics.nn.modules.block.C3              [256, 256, 3]                 
  7                  -1  1   1180672  ultralytics.nn.modules.conv.Conv             [256

100%|██████████| 5.35M/5.35M [00:00<00:00, 301MB/s]


[34m[1mAMP: [0mchecks passed ✅


[34m[1mtrain: [0mScanning /content/datasets/coco/labels/train2017... 117266 images, 1021 backgrounds, 0 corrupt: 100%|██████████| 118287/118287 [05:59<00:00, 328.68it/s]


[34m[1mtrain: [0mNew cache created: /content/datasets/coco/labels/train2017.cache
[34m[1malbumentations: [0mBlur(p=0.01, blur_limit=(3, 7)), MedianBlur(p=0.01, blur_limit=(3, 7)), ToGray(p=0.01, num_output_channels=3, method='weighted_average'), CLAHE(p=0.01, clip_limit=(1.0, 4.0), tile_grid_size=(8, 8))


[34m[1mval: [0mScanning /content/datasets/coco/labels/val2017... 4952 images, 48 backgrounds, 0 corrupt: 100%|██████████| 5000/5000 [00:14<00:00, 335.96it/s]


[34m[1mval: [0mNew cache created: /content/datasets/coco/labels/val2017.cache
Plotting labels to runs/detect/train/labels.jpg... 
[34m[1moptimizer:[0m 'optimizer=auto' found, ignoring 'lr0=0.01' and 'momentum=0.937' and determining best 'optimizer', 'lr0' and 'momentum' automatically... 
[34m[1moptimizer:[0m SGD(lr=0.01, momentum=0.9) with parameter groups 69 weight(decay=0.0), 76 weight(decay=0.0005), 75 bias(decay=0.0)
[34m[1mTensorBoard: [0mmodel graph visualization added ✅
Image sizes 640 train, 640 val
Using 2 dataloader workers
Logging results to [1mruns/detect/train[0m
Starting training for 100 epochs...

      Epoch    GPU_mem   box_loss   cls_loss   dfl_loss  Instances       Size


      1/100      4.24G      1.656      2.704      1.558        237        640:  46%|████▌     | 3375/7393 [34:10<40:41,  1.65it/s]


KeyboardInterrupt: 