In [1]:
# Step 1: Import Libraries
import torch
from torchvision import datasets, transforms
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import sys

# Step 2: Set Up Kaggle Environment
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {device}')

# Step 3: Load and Preprocess the Dataset
data_transforms = transforms.Compose([
    transforms.Resize((32, 32)),  # CIFAR-10 images are 32x32
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=data_transforms)
val_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=data_transforms)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=128, shuffle=False)

# Step 4: Define the CNN Model
class CNN(nn.Module):
    def __init__(self, activation_func, weight_init):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 8 * 8, 128)  # Adjusted for 32x32 input
        self.fc2 = nn.Linear(128, 10)  # 10 classes for CIFAR-10
        
        # Weight initialization
        if weight_init == 'xavier':
            nn.init.xavier_uniform_(self.conv1.weight)
            nn.init.xavier_uniform_(self.conv2.weight)
            nn.init.xavier_uniform_(self.fc1.weight)
            nn.init.xavier_uniform_(self.fc2.weight)
        elif weight_init == 'kaiming':
            nn.init.kaiming_uniform_(self.conv1.weight, mode='fan_in', nonlinearity='relu')
            nn.init.kaiming_uniform_(self.conv2.weight, mode='fan_in', nonlinearity='relu')
            nn.init.kaiming_uniform_(self.fc1.weight, mode='fan_in', nonlinearity='relu')
            nn.init.kaiming_uniform_(self.fc2.weight, mode='fan_in', nonlinearity='relu')
        elif weight_init == 'random':
            nn.init.uniform_(self.conv1.weight)
            nn.init.uniform_(self.conv2.weight)
            nn.init.uniform_(self.fc1.weight)
            nn.init.uniform_(self.fc2.weight)
        
        self.activation_func = activation_func

    def forward(self, x):
        x = self.conv1(x)
        x = self.activation_func(x)
        x = F.max_pool2d(x, 2)
        x = self.conv2(x)
        x = self.activation_func(x)
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 64 * 8 * 8)  # Adjusted for 32x32 input
        x = self.fc1(x)
        x = self.activation_func(x)
        x = self.fc2(x)
        return x

activation_functions = {
    'relu': nn.ReLU(),
    'tanh': nn.Tanh(),
    'leaky_relu': nn.LeakyReLU(0.1)
}

# Step 5: Define the Training Function
def train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=10, config_name=None):
    model = model.to(device)
    best_accuracy = 0.0
    best_model_state = None

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for inputs, labels in val_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        val_accuracy = 100 * correct / total
        print(f'Epoch {epoch+1}, Loss: {running_loss/len(train_loader)}, Val Loss: {val_loss/len(val_loader)}, Val Acc: {val_accuracy}')

        if val_accuracy > best_accuracy:
            best_accuracy = val_accuracy
            best_model_state = model.state_dict()

    if best_model_state is not None:
        best_model_path = f'/kaggle/working/{config_name}_best.pth'
        torch.save(best_model_state, best_model_path)
        print(f'Best model for {config_name} saved at {best_model_path} with accuracy: {best_accuracy}')

    return best_accuracy

# Step 6: Redirect Output and Train the Model
class Tee:
    def __init__(self, *files):
        self.files = files

    def write(self, obj):
        for f in self.files:
            f.write(obj)

    def flush(self):
        for f in self.files:
            f.flush()

# Save the original sys.stdout
original_stdout = sys.stdout

# Open the output file
output_file = open('/kaggle/working/training_log.txt', 'w')

# Redirect stdout to both the file and the notebook
sys.stdout = Tee(sys.stdout, output_file)

best_accuracy_all = 0.0
best_config_all = None
best_model_all = None

criterion = nn.CrossEntropyLoss()

for activation_name, activation_func in activation_functions.items():
    for weight_init in ['xavier', 'kaiming', 'random']:
        for optimizer_name in ['SGD', 'Adam', 'RMSprop']:
            config_name = f'{activation_name}_{weight_init}_{optimizer_name}'
            print(f'\nTraining with {config_name}')
            model = CNN(activation_func, weight_init)
            if optimizer_name == 'SGD':
                optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
            elif optimizer_name == 'Adam':
                optimizer = optim.Adam(model.parameters(), lr=0.001)
            elif optimizer_name == 'RMSprop':
                optimizer = optim.RMSprop(model.parameters(), lr=0.001)

            best_accuracy = train_model(model, train_loader, val_loader, optimizer, criterion, num_epochs=10, config_name=config_name)

            if best_accuracy > best_accuracy_all:
                best_accuracy_all = best_accuracy
                best_config_all = config_name
                best_model_all = model.state_dict()

# Restore original stdout
sys.stdout = original_stdout
output_file.close()

print(f'Best overall accuracy: {best_accuracy_all} with configuration: {best_config_all}')

Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 31.8MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified

Training with relu_xavier_SGD
Epoch 1, Loss: 1.8668865752037225, Val Loss: 1.619702493088155, Val Acc: 42.88
Epoch 2, Loss: 1.4994408264184547, Val Loss: 1.3960141652747045, Val Acc: 50.7
Epoch 3, Loss: 1.341345333992063, Val Loss: 1.3300545321235173, Val Acc: 52.7
Epoch 4, Loss: 1.2525737957881236, Val Loss: 1.2322311642803723, Val Acc: 56.39
Epoch 5, Loss: 1.1876769512510665, Val Loss: 1.1736443321916121, Val Acc: 58.85
Epoch 6, Loss: 1.128165486676004, Val Loss: 1.1349170155163053, Val Acc: 60.07
Epoch 7, Loss: 1.0775853587538384, Val Loss: 1.0988043410868584, Val Acc: 61.65
Epoch 8, Loss: 1.0337715613872498, Val Loss: 1.0920892516268959, Val Acc: 61.48
Epoch 9, Loss: 0.991312546955655, Val Loss: 1.0294663476038584, Val Acc: 64.19
Epoch 10, Loss: 0.9528283347254214, Val Loss: 1.0143030225476133, Val Acc: 64.48
Best model for relu_xavier_SGD saved at /kaggle/working/relu_xavier_SGD_best.pth with