<a href="https://colab.research.google.com/github/RH00000/UH_RTS_Research_ML/blob/main/profiling_stats_for_abcd_models_for_10000_images.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install torch torchvision tensorflow-datasets

In [7]:
import time
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import IterableDataset, DataLoader
from torchvision import models, transforms
from PIL import Image
import tensorflow_datasets as tfds

In [8]:
# 1: Load the ImageNetV2 TopImages split via TFDS
builder = tfds.builder("imagenet_v2", config="topimages")
builder.download_and_prepare()
tfds_ds = builder.as_dataset(split="test", as_supervised=True)

In [9]:
# 2: Wrap TFDS dataset in a PyTorch IterableDataset
class ImageNetV2TopImages(IterableDataset):
    def __init__(self, tfds_dataset, transform=None):
        self.ds = tfds_dataset
        self.transform = transform

    def __iter__(self):
        for img, label in tfds.as_numpy(self.ds):
            pil = Image.fromarray(img)
            if self.transform:
                pil = self.transform(pil)
            yield pil, label

In [10]:
# 3: Define preprocessing to match ResNet expectations
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406], [0.229,0.224,0.225]),
])


In [11]:
# 4: Instantiate DataLoader
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dataset = ImageNetV2TopImages(tfds_ds, transform=preprocess)
loader  = DataLoader(dataset, batch_size=1, num_workers=4)



In [12]:
# 5: Load pretrained ResNet models (A-D)
resnet18  = models.resnet18(pretrained=True).to(device).eval()   # model A
resnet34  = models.resnet34(pretrained=True).to(device).eval()   # model B
resnet50  = models.resnet50(pretrained=True).to(device).eval()   # model C
resnet152 = models.resnet152(pretrained=True).to(device).eval()  # model D

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 132MB/s]
Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 108MB/s]
Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 169MB/s]
Downloading: "https://download.pytorch.org/models/resnet152-394f9c45.pth" to /root/.cache/torch/hub/checkpoints/resnet152-394f9c45.pth
100%|██████████| 230M/230M [00:01<00:00, 124MB/s]


In [13]:
# 6: Profile all models on the validation set
N = 10000  # number of images in the TopImages split
confs = np.zeros((N, 4), dtype=np.float32)
times = np.zeros((N, 4), dtype=np.float32)
ok_flags = np.zeros((N, 4), dtype=bool)
models_list = [resnet18, resnet34, resnet50, resnet152]

for idx, (img, label) in enumerate(loader):
    img = img.to(device)
    label = label.to(device).item()

    for i, model in enumerate(models_list):
        start = time.perf_counter()
        with torch.no_grad():
            logits = model(img)
            probs  = F.softmax(logits, dim=1)
            conf, pred = torch.max(probs, dim=1)
        elapsed = time.perf_counter() - start

        confs[idx, i]    = conf.item()
        times[idx, i]    = elapsed
        ok_flags[idx, i] = (pred.item() == label)

    if (idx + 1) % 500 == 0:
        print(f"Processed {idx+1}/{N} images")
    if idx >= N - 1:
        break

# 7: Save profiling data
np.savez('validation_stats.npz', confs=confs, times=times, oks=ok_flags)
print("Saved profiling data to 'validation_stats.npz'")

Processed 500/10000 images
Processed 1000/10000 images
Processed 1500/10000 images
Processed 2000/10000 images
Processed 2500/10000 images
Processed 3000/10000 images
Processed 3500/10000 images
Processed 4000/10000 images
Processed 4500/10000 images
Processed 5000/10000 images
Processed 5500/10000 images
Processed 6000/10000 images
Processed 6500/10000 images
Processed 7000/10000 images
Processed 7500/10000 images
Processed 8000/10000 images
Processed 8500/10000 images
Processed 9000/10000 images
Processed 9500/10000 images
Processed 10000/10000 images
Saved profiling data to 'validation_stats.npz'


In [14]:
# 8: Preview the first 5 entries
data = np.load('validation_stats.npz')
confs = data['confs']; times = data['times']; oks = data['oks']
print("\nFirst 5 confidences (A,B,C,D):\n", confs[:5])
print("\nFirst 5 times (sec) (A,B,C,D):\n", times[:5])
print("\nFirst 5 correctness flags (A,B,C,D):\n", oks[:5])


First 5 confidences (A,B,C,D):
 [[0.22229803 0.2532501  0.11593659 0.7026045 ]
 [0.22229803 0.2532501  0.11593659 0.7026045 ]
 [0.22229803 0.2532501  0.11593659 0.7026045 ]
 [0.22229803 0.2532501  0.11593659 0.7026045 ]
 [0.8048309  0.4974504  0.9526833  0.877165  ]]

First 5 times (sec) (A,B,C,D):
 [[1.392876   0.0063738  0.1137031  0.02299892]
 [0.0049773  0.00786057 0.00701183 0.02092551]
 [0.00403045 0.0080063  0.00741818 0.01948753]
 [0.00507551 0.00809344 0.00792632 0.01992278]
 [0.00437528 0.0077288  0.00675506 0.01818722]]

First 5 correctness flags (A,B,C,D):
 [[False False False False]
 [False False False False]
 [False False False False]
 [False False False False]
 [False False False False]]


In [15]:
# 9: Quick summary statistics
import pandas as pd
# Compute per-model averages
avg_conf = confs.mean(axis=0)
std_conf = confs.std(axis=0)
avg_time = times.mean(axis=0)
std_time = times.std(axis=0)
acc      = oks.mean(axis=0)
# Assemble into DataFrame
summary = pd.DataFrame({
    'Avg Confidence': avg_conf,
    'Std Confidence': std_conf,
    'Avg Time (s)'  : avg_time,
    'Std Time (s)'  : std_time,
    'Accuracy'      : acc
}, index=['ResNet18','ResNet34','ResNet50','ResNet152'])

print("Summary statistics by model:")
print(summary)

Summary statistics by model:
           Avg Confidence  Std Confidence  Avg Time (s)  Std Time (s)  \
ResNet18         0.727814        0.273904      0.004681      0.013954   
ResNet34         0.775295        0.253715      0.006958      0.001803   
ResNet50         0.799098        0.244461      0.007820      0.002676   
ResNet152        0.840349        0.216998      0.021013      0.006444   

           Accuracy  
ResNet18     0.7244  
ResNet34     0.7508  
ResNet50     0.7716  
ResNet152    0.8016  


In [16]:
# Optional: convert arrays to CSV files for easier inspection:
import pandas as pd
# Create DataFrames
idxs = np.arange(N)
cols = ['ResNet18','ResNet34','ResNet50','ResNet152']
df_confs = pd.DataFrame(confs, index=idxs, columns=cols)
df_times = pd.DataFrame(times, index=idxs, columns=cols)
df_oks   = pd.DataFrame(oks.astype(int), index=idxs, columns=cols)

# Save to CSV
df_confs.to_csv('confs.csv', index_label='sample')
df_times.to_csv('times.csv', index_label='sample')
df_oks.to_csv('oks.csv',   index_label='sample')

print("Saved CSVs: confs.csv, times.csv, oks.csv")

Saved CSVs: confs.csv, times.csv, oks.csv


In [None]:
# Download CSVs in Colab (uncomment to use)
# files.download('confs.csv')
# files.download('times.csv')
# files.download('oks.csv')