# Baseline EfficientNet-B0

In [None]:
import time
import csv
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [None]:

# 1. CIFAR-10 Dataset (Resize to 128x128)

transform_train = T.Compose([
    T.Resize((128, 128)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
])

transform_test = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=transform_train,
)
testset = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=transform_test,
)

trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

classes = trainset.classes
num_classes = 10

100%|██████████| 170M/170M [00:15<00:00, 11.1MB/s] 


In [None]:

# 2. Simple FLOPs Counter (Conv2d + Linear only)

def count_flops(model, input_size=(1, 3, 128, 128)):
    hooks = []
    flops = []

    def conv_hook(self, inp, out):
        # inp[0]: (B, Cin, H, W)
        x = inp[0]
        Cin = x.shape[1]
        Cout = self.out_channels
        kH, kW = self.kernel_size
        out_h, out_w = out.shape[2], out.shape[3]
        groups = self.groups
        # MACs = Cout * (Cin/groups) * kH * kW * out_h * out_w
        flops.append(Cout * (Cin // groups) * kH * kW * out_h * out_w)

    def linear_hook(self, inp, out):
        in_f = inp[0].shape[-1]
        out_f = out.shape[-1]
        flops.append(in_f * out_f)

    for layer in model.modules():
        if isinstance(layer, nn.Conv2d):
            hooks.append(layer.register_forward_hook(conv_hook))
        elif isinstance(layer, nn.Linear):
            hooks.append(layer.register_forward_hook(linear_hook))

    dummy = torch.randn(*input_size).to(next(model.parameters()).device)
    model.eval()
    with torch.no_grad():
        model(dummy)

    for h in hooks:
        h.remove()

    return sum(flops)

In [None]:

# 3. Build Baseline EfficientNet-B0

def build_efficientnet_b0_baseline(num_classes=10):
    model = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, num_classes)
    return model.to(device)

model_name = "EfficientNetB0"
model = build_efficientnet_b0_baseline(num_classes=num_classes)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10

Downloading: "https://download.pytorch.org/models/efficientnet_b0_rwightman-7f5810bc.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b0_rwightman-7f5810bc.pth
100%|██████████| 20.5M/20.5M [00:00<00:00, 224MB/s]


In [None]:

# 4. Training & Evaluation Helpers

def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = outputs.max(1)
        total += labels.size(0)
        correct += preds.eq(labels).sum().item()

    return running_loss / len(loader), 100.0 * correct / total

def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, preds = outputs.max(1)
            total += labels.size(0)
            correct += preds.eq(labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    acc = 100.0 * correct / total
    return acc, np.array(all_labels), np.array(all_preds)

def plot_learning_curves(name, train_losses, train_accs, test_accs):
    epochs = range(1, len(train_losses) + 1)

    plt.figure()
    plt.plot(epochs, train_losses, label="Train Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"{name} - Training Loss")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_loss_curve.png")
    plt.close()

    plt.figure()
    plt.plot(epochs, train_accs, label="Train Acc")
    plt.plot(epochs, test_accs, label="Test Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    plt.title(f"{name} - Accuracy Curves")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_accuracy_curves.png")
    plt.close()

def save_confusion_matrix(name, y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, cmap="Blues", cbar=False)
    plt.title(f"{name} - CIFAR-10 Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_confusion_matrix.png")
    plt.close()

In [None]:

# 5. Training Loop

train_losses, train_accs, test_accs = [], [], []
train_start = time.time()

for epoch in range(num_epochs):
    t0 = time.time()
    loss, train_acc = train_one_epoch(model, trainloader, optimizer, criterion)
    test_acc, _, _ = evaluate(model, testloader)
    t1 = time.time()

    train_losses.append(loss)
    train_accs.append(train_acc)
    test_accs.append(test_acc)

    print(f"[{model_name}] Epoch {epoch+1}/{num_epochs} | "
          f"Loss={loss:.4f} | TrainAcc={train_acc:.2f}% | "
          f"TestAcc={test_acc:.2f}% | Time={t1 - t0:.2f}s")

train_end = time.time()
total_train_time = train_end - train_start

final_train_acc = train_accs[-1]
final_test_acc, y_true, y_pred = evaluate(model, testloader)

print(f"\n[{model_name}] Total training time: {total_train_time:.2f}s "
      f"({total_train_time/60:.2f} min)")
print(f"[{model_name}] Final Train Acc: {final_train_acc:.2f}%")
print(f"[{model_name}] Final Test  Acc: {final_test_acc:.2f}%")

plot_learning_curves(model_name, train_losses, train_accs, test_accs)
save_confusion_matrix(model_name, y_true, y_pred, classes)

[EfficientNetB0] Epoch 1/10 | Loss=0.6185 | TrainAcc=81.23% | TestAcc=93.14% | Time=69.49s
[EfficientNetB0] Epoch 2/10 | Loss=0.1828 | TrainAcc=93.93% | TestAcc=94.84% | Time=68.55s
[EfficientNetB0] Epoch 3/10 | Loss=0.1166 | TrainAcc=96.16% | TestAcc=95.51% | Time=68.23s
[EfficientNetB0] Epoch 4/10 | Loss=0.0803 | TrainAcc=97.35% | TestAcc=95.64% | Time=68.48s
[EfficientNetB0] Epoch 5/10 | Loss=0.0584 | TrainAcc=98.04% | TestAcc=95.67% | Time=68.47s
[EfficientNetB0] Epoch 6/10 | Loss=0.0451 | TrainAcc=98.52% | TestAcc=95.59% | Time=68.19s
[EfficientNetB0] Epoch 7/10 | Loss=0.0371 | TrainAcc=98.75% | TestAcc=95.90% | Time=68.74s
[EfficientNetB0] Epoch 8/10 | Loss=0.0311 | TrainAcc=98.99% | TestAcc=95.73% | Time=68.34s
[EfficientNetB0] Epoch 9/10 | Loss=0.0266 | TrainAcc=99.12% | TestAcc=95.74% | Time=68.21s
[EfficientNetB0] Epoch 10/10 | Loss=0.0228 | TrainAcc=99.27% | TestAcc=95.78% | Time=68.19s

[EfficientNetB0] Total training time: 684.90s (11.42 min)
[EfficientNetB0] Final Train A

In [None]:

# 6. FLOPs & Params

flops = count_flops(model, input_size=(1, 3, 128, 128))
params = sum(p.numel() for p in model.parameters())

print(f"[{model_name}] Params: {params}")
print(f"[{model_name}] FLOPs:  {flops:.3g}")

[EfficientNetB0] Params: 4020358
[EfficientNetB0] FLOPs:  1.26e+08


In [None]:

# 7. Inference & Training Runtime (100 runs)

dummy_x = torch.randn(1, 3, 128, 128).to(device)

# Inference time
model.eval()
if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

with torch.no_grad():
    for _ in range(10):
        _ = model(dummy_x)

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
with torch.no_grad():
    for _ in range(100):
        _ = model(dummy_x)
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()
infer_ms = (t1 - t0) / 100.0 * 1000.0

if device == "cuda":
    mem_infer_mb = torch.cuda.max_memory_allocated() / (1024**2)
else:
    mem_infer_mb = 0.0

# Training time (single-batch steps)
model.train()
dummy_label = torch.randint(0, num_classes, (1,), device=device)
optimizer_step = optim.SGD(model.parameters(), lr=0.01)
criterion_step = nn.CrossEntropyLoss()

if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

for _ in range(10):
    optimizer_step.zero_grad()
    out = model(dummy_x)
    loss = criterion_step(out, dummy_label)
    loss.backward()
    optimizer_step.step()

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
for _ in range(100):
    optimizer_step.zero_grad()
    out = model(dummy_x)
    loss = criterion_step(out, dummy_label)
    loss.backward()
    optimizer_step.step()
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()
train_ms = (t1 - t0) / 100.0 * 1000.0

if device == "cuda":
    mem_train_mb = torch.cuda.max_memory_allocated() / (1024**2)
else:
    mem_train_mb = 0.0

mem_mb = max(mem_infer_mb, mem_train_mb)

print(f"\n[{model_name}] Inference time / image: {infer_ms:.3f} ms")
print(f"[{model_name}] Train step / image: {train_ms:.3f} ms")
print(f"[{model_name}] Peak GPU memory: {mem_mb:.2f} MB")


[EfficientNetB0] Inference time / image: 8.088 ms
[EfficientNetB0] Train step / image: 26.770 ms
[EfficientNetB0] Peak GPU memory: 100.27 MB


In [None]:

# 8. Save Model & Metrics CSV

torch.save(model.state_dict(),
           f"/kaggle/working/{model_name}_cifar10_128x128.pth")

csv_path = f"/kaggle/working/{model_name}_metrics.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow([
        "Model", "Params", "FLOPs",
        "Infer_ms", "Train_ms",
        "Memory_MB", "TotalTrainTimeSec",
        "FinalTrainAcc", "FinalTestAcc"
    ])
    writer.writerow([
        model_name,
        params,
        flops,
        infer_ms,
        train_ms,
        mem_mb,
        total_train_time,
        final_train_acc,
        final_test_acc,
    ])

print(f"\n[{model_name}] Metrics CSV saved to: {csv_path}")
print(f"[{model_name}] Done.")


[EfficientNetB0] Metrics CSV saved to: /kaggle/working/EfficientNetB0_metrics.csv
[EfficientNetB0] Done.


# Oriented 1D EfficientNet-B0

In [None]:
import time
import csv
import math
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as T
from torch.utils.data import DataLoader
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights

from torchvision.transforms.functional import rotate
from torchvision.transforms import InterpolationMode

from sklearn.metrics import confusion_matrix
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Device:", device)

Device: cuda


In [None]:

# 1. CIFAR-10 Dataset (Resize to 128x128)

transform_train = T.Compose([
    T.Resize((128, 128)),
    T.RandomHorizontalFlip(),
    T.ToTensor(),
])

transform_test = T.Compose([
    T.Resize((128, 128)),
    T.ToTensor(),
])

trainset = torchvision.datasets.CIFAR10(
    root="./data",
    train=True,
    download=True,
    transform=transform_train,
)
testset = torchvision.datasets.CIFAR10(
    root="./data",
    train=False,
    download=True,
    transform=transform_test,
)

trainloader = DataLoader(trainset, batch_size=128, shuffle=True, num_workers=2)
testloader = DataLoader(testset, batch_size=128, shuffle=False, num_workers=2)

classes = trainset.classes
num_classes = 10

In [None]:

# 2. Simple FLOPs Counter

def count_flops(model, input_size=(1, 3, 128, 128)):
    hooks = []
    flops = []

    def conv_hook(self, inp, out):
        x = inp[0]
        Cin = x.shape[1]
        Cout = self.out_channels
        kH, kW = self.kernel_size
        out_h, out_w = out.shape[2], out.shape[3]
        groups = self.groups
        flops.append(Cout * (Cin // groups) * kH * kW * out_h * out_w)

    def linear_hook(self, inp, out):
        in_f = inp[0].shape[-1]
        out_f = out.shape[-1]
        flops.append(in_f * out_f)

    for layer in model.modules():
        if isinstance(layer, nn.Conv2d):
            hooks.append(layer.register_forward_hook(conv_hook))
        elif isinstance(layer, nn.Linear):
            hooks.append(layer.register_forward_hook(linear_hook))

    dummy = torch.randn(*input_size).to(next(model.parameters()).device)
    model.eval()
    with torch.no_grad():
        model(dummy)

    for h in hooks:
        h.remove()

    return sum(flops)

In [None]:

# 3. Oriented 1D Conv2d

class Oriented1DConv2d(nn.Module):
    def __init__(
        self,
        in_channels,
        out_channels,
        kernel_size=7,
        num_angles=8,
        stride=(1,1),
        bias=True,
        groups=1,
    ):
        super().__init__()
        assert kernel_size % 2 == 1,
        if isinstance(stride, int):
            stride = (stride, stride)

        self.num_angles = num_angles
        self.angles = [i * 180.0 / num_angles for i in range(num_angles)]

        pad = kernel_size // 2
        # vertical 1D kernel (K x 1), orientation via rotate()
        self.conv1d = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=(kernel_size, 1),
            stride=stride,
            padding=(pad, 0),
            bias=bias,
            groups=groups,
        )

    def forward(self, x):
        outs = []
        for ang in self.angles:
            xr = rotate(x, angle=ang, interpolation=InterpolationMode.BILINEAR)
            y = self.conv1d(xr)
            y = rotate(y, angle=-ang, interpolation=InterpolationMode.BILINEAR)
            outs.append(y)
        return sum(outs) / self.num_angles


In [None]:

# 4. Replace depthwise convs with Oriented1DConv2d

def replace_depthwise_with_oriented(module, kernel_size=7, num_angles=8):
    for name, child in list(module.named_children()):
        if isinstance(child, nn.Conv2d):
            if (
                child.groups == child.in_channels
                and child.kernel_size[0] == child.kernel_size[1]
                and child.kernel_size[0] > 1
            ):
                new_conv = Oriented1DConv2d(
                    in_channels=child.in_channels,
                    out_channels=child.out_channels,
                    kernel_size=kernel_size,
                    num_angles=num_angles,
                    stride=child.stride,
                    bias=(child.bias is not None),
                    groups=child.groups,
                )
                setattr(module, name, new_conv)
            else:
                replace_depthwise_with_oriented(child, kernel_size, num_angles)
        else:
            replace_depthwise_with_oriented(child, kernel_size, num_angles)

def build_oriented_efficientnet_b0(num_classes=10, kernel_size=7, num_angles=8):
    model = efficientnet_b0(weights=EfficientNet_B0_Weights.IMAGENET1K_V1)
    replace_depthwise_with_oriented(model.features, kernel_size=kernel_size, num_angles=num_angles)
    in_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(in_features, num_classes)
    return model.to(device)

model_name = "OrientedEfficientNetB0"
model = build_oriented_efficientnet_b0(num_classes=num_classes, kernel_size=7, num_angles=8)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)
num_epochs = 10


In [None]:

# 5. Training & Evaluation Helpers

def train_one_epoch(model, loader, optimizer, criterion):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    for imgs, labels in loader:
        imgs, labels = imgs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(imgs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, preds = outputs.max(1)
        total += labels.size(0)
        correct += preds.eq(labels).sum().item()

    return running_loss / len(loader), 100.0 * correct / total

def evaluate(model, loader):
    model.eval()
    correct = 0
    total = 0
    all_labels = []
    all_preds = []
    with torch.no_grad():
        for imgs, labels in loader:
            imgs, labels = imgs.to(device), labels.to(device)
            outputs = model(imgs)
            _, preds = outputs.max(1)
            total += labels.size(0)
            correct += preds.eq(labels).sum().item()
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(preds.cpu().numpy())
    acc = 100.0 * correct / total
    return acc, np.array(all_labels), np.array(all_preds)

def plot_learning_curves(name, train_losses, train_accs, test_accs):
    epochs = range(1, len(train_losses) + 1)

    plt.figure()
    plt.plot(epochs, train_losses, label="Train Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.title(f"{name} - Training Loss")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_loss_curve.png")
    plt.close()

    plt.figure()
    plt.plot(epochs, train_accs, label="Train Acc")
    plt.plot(epochs, test_accs, label="Test Acc")
    plt.xlabel("Epoch")
    plt.ylabel("Accuracy (%)")
    plt.title(f"{name} - Accuracy Curves")
    plt.legend()
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_accuracy_curves.png")
    plt.close()

def save_confusion_matrix(name, y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8,6))
    sns.heatmap(cm, cmap="Blues", cbar=False)
    plt.title(f"{name} - CIFAR-10 Confusion Matrix")
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(f"/kaggle/working/{name}_confusion_matrix.png")
    plt.close()

In [None]:

# 6. Training Loop (10 epochs)

train_losses, train_accs, test_accs = [], [], []
train_start = time.time()

for epoch in range(num_epochs):
    t0 = time.time()
    loss, train_acc = train_one_epoch(model, trainloader, optimizer, criterion)
    test_acc, _, _ = evaluate(model, testloader)
    t1 = time.time()

    train_losses.append(loss)
    train_accs.append(train_acc)
    test_accs.append(test_acc)

    print(f"[{model_name}] Epoch {epoch+1}/{num_epochs} | "
          f"Loss={loss:.4f} | TrainAcc={train_acc:.2f}% | "
          f"TestAcc={test_acc:.2f}% | Time={t1 - t0:.2f}s")

train_end = time.time()
total_train_time = train_end - train_start

final_train_acc = train_accs[-1]
final_test_acc, y_true, y_pred = evaluate(model, testloader)

print(f"\n[{model_name}] Total training time: {total_train_time:.2f}s "
      f"({total_train_time/60:.2f} min)")
print(f"[{model_name}] Final Train Acc: {final_train_acc:.2f}%")
print(f"[{model_name}] Final Test  Acc: {final_test_acc:.2f}%")

plot_learning_curves(model_name, train_losses, train_accs, test_accs)
save_confusion_matrix(model_name, y_true, y_pred, classes)

[OrientedEfficientNetB0] Epoch 1/10 | Loss=1.6654 | TrainAcc=38.76% | TestAcc=50.62% | Time=455.55s
[OrientedEfficientNetB0] Epoch 2/10 | Loss=1.2829 | TrainAcc=53.46% | TestAcc=57.87% | Time=455.57s
[OrientedEfficientNetB0] Epoch 3/10 | Loss=1.1135 | TrainAcc=60.03% | TestAcc=61.80% | Time=455.25s
[OrientedEfficientNetB0] Epoch 4/10 | Loss=1.0021 | TrainAcc=64.38% | TestAcc=65.20% | Time=454.77s
[OrientedEfficientNetB0] Epoch 5/10 | Loss=0.9120 | TrainAcc=67.45% | TestAcc=67.76% | Time=455.69s
[OrientedEfficientNetB0] Epoch 6/10 | Loss=0.8433 | TrainAcc=70.13% | TestAcc=68.90% | Time=455.37s
[OrientedEfficientNetB0] Epoch 7/10 | Loss=0.7810 | TrainAcc=72.41% | TestAcc=70.00% | Time=455.64s
[OrientedEfficientNetB0] Epoch 8/10 | Loss=0.7298 | TrainAcc=73.99% | TestAcc=71.27% | Time=454.63s
[OrientedEfficientNetB0] Epoch 9/10 | Loss=0.6827 | TrainAcc=75.96% | TestAcc=72.27% | Time=455.20s
[OrientedEfficientNetB0] Epoch 10/10 | Loss=0.6414 | TrainAcc=77.35% | TestAcc=73.10% | Time=454.70s

In [None]:

# 7. FLOPs & Params

flops = count_flops(model, input_size=(1, 3, 128, 128))
params = sum(p.numel() for p in model.parameters())

print(f"[{model_name}] Params: {params}")
print(f"[{model_name}] FLOPs:  {flops:.3g}")

[OrientedEfficientNetB0] Params: 3901062
[OrientedEfficientNetB0] FLOPs:  1.57e+08


In [None]:

# 8. Inference & Training Runtime

dummy_x = torch.randn(1, 3, 128, 128).to(device)

# Inference
model.eval()
if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

with torch.no_grad():
    for _ in range(10):
        _ = model(dummy_x)

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
with torch.no_grad():
    for _ in range(100):
        _ = model(dummy_x)
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()
infer_ms = (t1 - t0) / 100.0 * 1000.0

if device == "cuda":
    mem_infer_mb = torch.cuda.max_memory_allocated() / (1024**2)
else:
    mem_infer_mb = 0.0

# Training
model.train()
dummy_label = torch.randint(0, num_classes, (1,), device=device)
optimizer_step = optim.SGD(model.parameters(), lr=0.01)
criterion_step = nn.CrossEntropyLoss()

if device == "cuda":
    torch.cuda.empty_cache()
    torch.cuda.reset_peak_memory_stats()
    torch.cuda.synchronize()

for _ in range(10):
    optimizer_step.zero_grad()
    out = model(dummy_x)
    loss = criterion_step(out, dummy_label)
    loss.backward()
    optimizer_step.step()

if device == "cuda":
    torch.cuda.synchronize()
t0 = time.time()
for _ in range(100):
    optimizer_step.zero_grad()
    out = model(dummy_x)
    loss = criterion_step(out, dummy_label)
    loss.backward()
    optimizer_step.step()
if device == "cuda":
    torch.cuda.synchronize()
t1 = time.time()
train_ms = (t1 - t0) / 100.0 * 1000.0

if device == "cuda":
    mem_train_mb = torch.cuda.max_memory_allocated() / (1024**2)
else:
    mem_train_mb = 0.0

mem_mb = max(mem_infer_mb, mem_train_mb)

print(f"\n[{model_name}] Inference time / image: {infer_ms:.3f} ms")
print(f"[{model_name}] Train step   / image: {train_ms:.3f} ms")
print(f"[{model_name}] Peak GPU memory: {mem_mb:.2f} MB")


[OrientedEfficientNetB0] Inference time / image: 102.514 ms
[OrientedEfficientNetB0] Train step   / image: 212.608 ms
[OrientedEfficientNetB0] Peak GPU memory: 183.01 MB


In [None]:

# 9. Save Model & Metrics CSV

torch.save(model.state_dict(),
           f"/kaggle/working/{model_name}_cifar10_128x128.pth")

csv_path = f"/kaggle/working/{model_name}_metrics.csv"
with open(csv_path, "w", newline="") as f:
    writer = csv.writer(f)
    writer.writerow([
        "Model", "Params", "FLOPs",
        "Infer_ms", "Train_ms",
        "Memory_MB", "TotalTrainTimeSec",
        "FinalTrainAcc", "FinalTestAcc"
    ])
    writer.writerow([
        model_name,
        params,
        flops,
        infer_ms,
        train_ms,
        mem_mb,
        total_train_time,
        final_train_acc,
        final_test_acc,
    ])

print(f"\n[{model_name}] Metrics CSV saved to: {csv_path}")
print(f"[{model_name}] Done.")


[OrientedEfficientNetB0] Metrics CSV saved to: /kaggle/working/OrientedEfficientNetB0_metrics.csv
[OrientedEfficientNetB0] Done.


# Compare both the models

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns


# 1. Load Metrics from Both Models


baseline_csv = "/kaggle/working/EfficientNetB0_metrics.csv"
oriented_csv = "/kaggle/working/OrientedEfficientNetB0_metrics.csv"

df_base = pd.read_csv(baseline_csv)
df_ori = pd.read_csv(oriented_csv)

df_base["Type"] = "Baseline"
df_ori["Type"] = "Oriented"

# Combine
df = pd.concat([df_base, df_ori], ignore_index=True)

print("\n===== FULL METRICS TABLE =====")
display(df)


# 2. Extract Rows for Comparison

b = df[df["Type"] == "Baseline"].iloc[0]
o = df[df["Type"] == "Oriented"].iloc[0]


# 3. Compute Ratios

flo_ratio = o["FLOPs"] / b["FLOPs"]
infer_ratio = o["Infer_ms"] / b["Infer_ms"]
train_ratio = o["Train_ms"] / b["Train_ms"]
memory_ratio = o["Memory_MB"] / b["Memory_MB"]
efficiency = infer_ratio / flo_ratio

print("\n===== RATIOS (Oriented / Baseline) =====")
print(f"FLOPs Ratio        : {flo_ratio:.4f}")
print(f"Inference Ratio    : {infer_ratio:.4f}")
print(f"Training Ratio     : {train_ratio:.4f}")
print(f"Memory Ratio       : {memory_ratio:.4f}")
print(f"Efficiency         : {efficiency:.4f}")


# 4. Save Comparison CSV

comparison = pd.DataFrame({
    "Metric": [
        "Params",
        "FLOPs",
        "Infer_ms",
        "Train_ms",
        "Memory_MB",
        "TotalTrainTimeSec",
        "FinalTrainAcc",
        "FinalTestAcc",
        "FLOPsRatio",
        "InferenceRatio",
        "TrainingRatio",
        "MemoryRatio",
        "Efficiency"
    ],
    "Baseline": [
        b["Params"],
        b["FLOPs"],
        b["Infer_ms"],
        b["Train_ms"],
        b["Memory_MB"],
        b["TotalTrainTimeSec"],
        b["FinalTrainAcc"],
        b["FinalTestAcc"],
        "",
        "",
        "",
        "",
        "",
    ],
    "Oriented": [
        o["Params"],
        o["FLOPs"],
        o["Infer_ms"],
        o["Train_ms"],
        o["Memory_MB"],
        o["TotalTrainTimeSec"],
        o["FinalTrainAcc"],
        o["FinalTestAcc"],
        flo_ratio,
        infer_ratio,
        train_ratio,
        memory_ratio,
        efficiency,
    ],
})

comp_path = "/kaggle/working/EfficientNet_Comparison.csv"
comparison.to_csv(comp_path, index=False)

print(f"\nComparison CSV saved to: {comp_path}")


# 5. Visualization


# FLOPs plot
plt.figure(figsize=(6,4))
sns.barplot(data=df, x="Type", y="FLOPs")
plt.title("FLOPs Comparison")
plt.savefig("/kaggle/working/FLOPs_Comparison.png")
plt.close()

# Inference time
plt.figure(figsize=(6,4))
sns.barplot(data=df, x="Type", y="Infer_ms")
plt.title("Inference Time (ms/image)")
plt.savefig("/kaggle/working/Inference_Comparison.png")
plt.close()

# Training time
plt.figure(figsize=(6,4))
sns.barplot(data=df, x="Type", y="Train_ms")
plt.title("Training Step Time (ms/image)")
plt.savefig("/kaggle/working/TrainTime_Comparison.png")
plt.close()

# Speed vs FLOPs
plt.figure(figsize=(6,4))
plt.scatter(df["FLOPs"], df["Infer_ms"], s=100)
for i, row in df.iterrows():
    plt.text(row["FLOPs"]*1.02, row["Infer_ms"], row["Type"])
plt.xlabel("FLOPs")
plt.ylabel("Inference Time (ms)")
plt.title("Speed vs FLOPs")
plt.savefig("/kaggle/working/Speed_vs_FLOPs.png")
plt.close()

print("\nAll comparison plots saved to /kaggle/working/")


===== FULL METRICS TABLE =====


Unnamed: 0,Model,Params,FLOPs,Infer_ms,Train_ms,Memory_MB,TotalTrainTimeSec,FinalTrainAcc,FinalTestAcc,Type
0,EfficientNetB0,4020358,125997568,8.088479,26.770236,100.270996,684.900757,99.268,95.78,Baseline
1,OrientedEfficientNetB0,3901062,156898304,102.513957,212.607546,183.006836,4552.374904,77.348,73.1,Oriented



===== RATIOS (Oriented / Baseline) =====
FLOPs Ratio        : 1.2452
Inference Ratio    : 12.6741
Training Ratio     : 7.9419
Memory Ratio       : 1.8251
Efficiency         : 10.1779

Comparison CSV saved to: /kaggle/working/EfficientNet_Comparison.csv

All comparison plots saved to /kaggle/working/
