In [1]:
# Check GPU
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)


!pip install timm --quiet


Device: cuda


In [2]:
from google.colab import drive
drive.mount('/content/drive')

# Path to saved models
SAVE_DIR = "/content/drive/MyDrive/terafac_models"




Mounted at /content/drive


In [8]:
from google.colab import drive
drive.mount('/content/drive')

import os, zipfile

ZIP_PATH = "/content/drive/MyDrive/cars196/dataset/archive (7).zip"
BASE_DIR = "/content/cars196"

os.makedirs(BASE_DIR, exist_ok=True)

with zipfile.ZipFile(ZIP_PATH, 'r') as zip_ref:
    zip_ref.extractall(BASE_DIR)

print("Dataset extracted!")
print(os.listdir(BASE_DIR))


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Dataset extracted!
['car_devkit', 'cars_test', 'cars_train']


In [3]:
import os
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image
import timm


In [9]:
class CarsDataset(Dataset):
    def __init__(self, df, transform=None):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        row = self.df.iloc[idx]
        img = Image.open(row["image"]).convert("RGB")
        x1, y1, x2, y2 = row["bbox"]
        img = img.crop((x1, y1, x2, y2))
        label = row["label"]
        if self.transform:
            img = self.transform(img)
        return img, label

val_tf = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485,0.456,0.406],
                         [0.229,0.224,0.225])
])


In [None]:
from sklearn.model_selection import StratifiedShuffleSplit

TRAIN_ANNO = "/content/cars196/car_devkit/devkit/cars_train_annos.mat"
TRAIN_IMG_DIR = "/content/cars196/cars_train/cars_train"

# load annotations (reusing our level 3 code)
import scipy.io
def load_annotations(mat_file, img_dir, has_labels=True):
    mat = scipy.io.loadmat(mat_file)
    annotations = mat['annotations'][0]
    data = []
    for ann in annotations:
        x1 = int(ann[0][0][0])
        y1 = int(ann[1][0][0])
        x2 = int(ann[2][0][0])
        y2 = int(ann[3][0][0])
        label = int(ann[4][0][0])-1 if has_labels else -1
        fname = ann[5][0] if has_labels else ann[4][0]
        data.append({"image": os.path.join(img_dir, fname),
                     "label": label,
                     "bbox": (x1, y1, x2, y2)})
    return pd.DataFrame(data)

train_df_full = load_annotations(TRAIN_ANNO, TRAIN_IMG_DIR, True)

splitter = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, val_idx = next(splitter.split(train_df_full["image"], train_df_full["label"]))
val_df = train_df_full.iloc[val_idx].reset_index(drop=True)

val_ds = CarsDataset(val_df, val_tf)
val_loader = DataLoader(val_ds, batch_size=16, shuffle=False, num_workers=2)


In [11]:
# EfficientNet-B4
efficientnet_model = timm.create_model("efficientnet_b4", pretrained=False, num_classes=196)
efficientnet_model.load_state_dict(torch.load(f"{SAVE_DIR}/efficientnet_cars196_final.pth"))
efficientnet_model.to(device)
efficientnet_model.eval()

# ConvNeXt-Tiny
convnext_model = timm.create_model("convnext_tiny", pretrained=False, num_classes=196)
convnext_model.load_state_dict(torch.load(f"{SAVE_DIR}/convnext_tiny_best.pth"))
convnext_model.to(device)
convnext_model.eval()


ConvNeXt(
  (stem): Sequential(
    (0): Conv2d(3, 96, kernel_size=(4, 4), stride=(4, 4))
    (1): LayerNorm2d((96,), eps=1e-06, elementwise_affine=True)
  )
  (stages): Sequential(
    (0): ConvNeXtStage(
      (downsample): Identity()
      (blocks): Sequential(
        (0): ConvNeXtBlock(
          (conv_dw): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)
          (norm): LayerNorm((96,), eps=1e-06, elementwise_affine=True)
          (mlp): Mlp(
            (fc1): Linear(in_features=96, out_features=384, bias=True)
            (act): GELU()
            (drop1): Dropout(p=0.0, inplace=False)
            (norm): Identity()
            (fc2): Linear(in_features=384, out_features=96, bias=True)
            (drop2): Dropout(p=0.0, inplace=False)
          )
          (shortcut): Identity()
          (drop_path): Identity()
        )
        (1): ConvNeXtBlock(
          (conv_dw): Conv2d(96, 96, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), groups=96)


In [None]:
all_labels = []
all_preds = []

for imgs, labels in val_loader:
    imgs = imgs.to(device)
    labels = labels.to(device)

    with torch.no_grad():
        preds1 = torch.softmax(efficientnet_model(imgs), dim=1)
        preds2 = torch.softmax(convnext_model(imgs), dim=1)

    # soft voting: average probabilities
    ensemble_preds = (preds1 + preds2) / 2
    ensemble_labels = ensemble_preds.argmax(1)

    all_labels.append(labels.cpu())
    all_preds.append(ensemble_labels.cpu())

all_labels = torch.cat(all_labels)
all_preds = torch.cat(all_preds)

accuracy = (all_labels == all_preds).float().mean()
print(f"Ensemble Validation Accuracy: {accuracy:.4f}")


Ensemble Validation Accuracy: 0.9233


In [13]:
df_results = pd.DataFrame({
    "true_label": all_labels.numpy(),
    "pred_label": all_preds.numpy()
})
df_results.to_csv(f"{SAVE_DIR}/ensemble_val_predictions.csv", index=False)
print("Saved ensemble predictions!")


Saved ensemble predictions!
