In [5]:
import torch
import torch.nn as nn
import pandas as pd
from torchsummary import summary

class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_planes, planes, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv2d(in_planes, planes, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(planes)
        self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(planes)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_planes != planes:
            self.shortcut = nn.Sequential(
                nn.Conv2d(in_planes, planes, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm2d(planes)
            )

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.bn2(self.conv2(out))
        out += self.shortcut(x)
        return torch.relu(out)

class CustomResNet(nn.Module):
    def __init__(self, block, num_blocks, channels, num_classes=10):
        super(CustomResNet, self).__init__()
        self.in_planes = channels[0]

        self.conv1 = nn.Conv2d(3, channels[0], kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(channels[0])
        self.layer1 = self._make_layer(block, channels[0], num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, channels[1], num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, channels[2], num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, channels[3], num_blocks[3], stride=2)
        self.linear = nn.Linear(channels[3] * block.expansion, num_classes)

    def _make_layer(self, block, planes, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_planes, planes, stride))
            self.in_planes = planes * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = torch.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)
        out = torch.nn.functional.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out

def test_resnet_architectures():
    architectures = []
    
    layer_configs = [
        [2, 2],       # 2-layer ResNet
        [3, 4, 6],    # 3-layer deeper model
        [3, 4, 6, 3], # 4-layer ResNet-34 style
    ]
    
    channel_configs = [
        [32, 64],       # Small 2-layer model
        [48, 96, 192],  # Balanced 3-layer
        [64, 128, 256, 512],  # Full 4-layer ResNet
    ]

    for layers in layer_configs:
        for channels in channel_configs:
            try:
                model = CustomResNet(BasicBlock, layers, channels)
                num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
                
                architectures.append({
                    "Layers": len(layers),
                    "Blocks per Layer": layers,
                    "Channels": channels,
                    "Parameters": num_params
                })
            except:
                pass

    df = pd.DataFrame(architectures)
    df = df.sort_values(by="Parameters", ascending=True)
    return df

df_results = test_resnet_architectures()

# Display the DataFrame normally
print("ResNet Architecture Comparisons:")
print(df_results)

# Or use Pandas' built-in method in Jupyter
from IPython.display import display
display(df_results)


ResNet Architecture Comparisons:
   Layers Blocks per Layer             Channels  Parameters
0       4     [3, 4, 6, 3]  [64, 128, 256, 512]    21282122


Unnamed: 0,Layers,Blocks per Layer,Channels,Parameters
0,4,"[3, 4, 6, 3]","[64, 128, 256, 512]",21282122


In [7]:

import itertools

# Define hyperparameter search space
learning_rates = [0.1, 0.01, 0.001]
batch_sizes = [128, 256]
weight_decays = [1e-4, 5e-4]
optimizers = ['SGD', 'AdamW']
schedulers = ['CosineAnnealingLR', 'OneCycleLR']

# Generate all hyperparameter combinations
hyperparam_combinations = list(itertools.product(learning_rates, batch_sizes, weight_decays, optimizers, schedulers))

hyperparam_results = []

for lr, batch_size, wd, optim, sched in hyperparam_combinations:
    hyperparam_results.append({
        "Learning Rate": lr,
        "Batch Size": batch_size,
        "Weight Decay": wd,
        "Optimizer": optim,
        "Scheduler": sched
    })

df_hyperparams = pd.DataFrame(hyperparam_results)

# Display the DataFrame normally
print("ResNet Architecture Comparisons:")
print(df_hyperparams)

# Or use Pandas' built-in method in Jupyter
from IPython.display import display
display(df_hyperparams)

ResNet Architecture Comparisons:
    Learning Rate  Batch Size  Weight Decay Optimizer          Scheduler
0           0.100         128        0.0001       SGD  CosineAnnealingLR
1           0.100         128        0.0001       SGD         OneCycleLR
2           0.100         128        0.0001     AdamW  CosineAnnealingLR
3           0.100         128        0.0001     AdamW         OneCycleLR
4           0.100         128        0.0005       SGD  CosineAnnealingLR
5           0.100         128        0.0005       SGD         OneCycleLR
6           0.100         128        0.0005     AdamW  CosineAnnealingLR
7           0.100         128        0.0005     AdamW         OneCycleLR
8           0.100         256        0.0001       SGD  CosineAnnealingLR
9           0.100         256        0.0001       SGD         OneCycleLR
10          0.100         256        0.0001     AdamW  CosineAnnealingLR
11          0.100         256        0.0001     AdamW         OneCycleLR
12          0.100 

Unnamed: 0,Learning Rate,Batch Size,Weight Decay,Optimizer,Scheduler
0,0.1,128,0.0001,SGD,CosineAnnealingLR
1,0.1,128,0.0001,SGD,OneCycleLR
2,0.1,128,0.0001,AdamW,CosineAnnealingLR
3,0.1,128,0.0001,AdamW,OneCycleLR
4,0.1,128,0.0005,SGD,CosineAnnealingLR
5,0.1,128,0.0005,SGD,OneCycleLR
6,0.1,128,0.0005,AdamW,CosineAnnealingLR
7,0.1,128,0.0005,AdamW,OneCycleLR
8,0.1,256,0.0001,SGD,CosineAnnealingLR
9,0.1,256,0.0001,SGD,OneCycleLR


In [None]:

import pandas as pd

# Load model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
net = CustomResNet(BasicBlock, [3, 4, 6], [64, 128, 256]).to(device)  # Adjust based on the best model
model_path = "optimized_model.pth"  # Local model path

# Load weights
net.load_state_dict(torch.load(model_path, map_location=device))
net.eval()

# Run inference on local test data
predictions = []
with torch.no_grad():
    for images, indices in test_loader:
        images = images.to(device)
        outputs = net(images)
        _, predicted = outputs.max(1)
        for idx, label in zip(indices.numpy(), predicted.cpu().numpy()):
            predictions.append((idx, label))

# Save local submission file
submission_df = pd.DataFrame(predictions, columns=["ID", "Label"])
submission_file = "submission.csv"
submission_df.to_csv(submission_file, index=False)
print(f"Local Submission file saved: {submission_file}")
