In [None]:
import torch
import torch.nn as nn
import torchvision
from torchsummary import summary
from torchvision import datasets, transforms
from torch.utils.data import Subset, DataLoader
from sklearn.model_selection import StratifiedShuffleSplit

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
class CNN(nn.Module):
    def __init__(self, num_filters=32, size_filters=3, activation_func='relu', filter_org=1, num_dense=128, input_channels=3, num_classes=10, num_conv=5):
        '''
        num_filters: Number of filters in each layer --> 32,64,etc
        size_filters: Size of each filter (=F) --> 5,10,etc
        activation_func: Activation function for the convolutional layers --> ReLU, GeLU,SiLU, Mish
        filter_org: Ratio of number of filters in i+1th layer to number of filters in ith layer --> 1,0.5,2,etc
        num_dense: Number of neurons in dense layer --> 128
        input_channels: number of channels in input layer --> 3 (RGB)
        num_classes: Number of Classes in the iNaturalist Dataset --> 10
        num_conv: number of Conv-activation-maxpool blocks in the CNN model --> given:5
        '''
        super(CNN, self).__init__()
        self.layers=nn.ModuleList()

        if activation_func == 'relu':
            activation_layer = nn.ReLU()
        elif activation_func == 'gelu':
            activation_layer = nn.GELU()
        elif activation_func == 'silu':
            activation_layer = nn.SiLU()
        elif activation_func == 'mish':
            activation_layer = nn.Mish()

        for layer in range(num_conv):
            out_channels=int(num_filters*((filter_org)**(layer)))
            conv_layer = nn.Conv2d(in_channels=input_channels, out_channels=out_channels, kernel_size=size_filters, padding=size_filters//2)
            self.layers.append(conv_layer)
            input_channels=out_channels
            self.layers.append(activation_layer)                
            self.layers.append(nn.MaxPool2d(kernel_size = 2, stride = 2))
        self.adaptive_pool = nn.AdaptiveAvgPool2d((1, 1))
        self.fc_layers = nn.Sequential(nn.Linear(input_channels, num_dense),activation_layer, nn.Linear(num_dense, num_classes))

    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        x = self.adaptive_pool(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

In [None]:
def get_dataloaders(dir='/kaggle/input/inaturalist-10-class/train',split=0.2,batch_size=64):
    all_transforms = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406],
                            std=[0.229, 0.224, 0.225])
    ])
    dataset = datasets.ImageFolder(root=dir, transform=all_transforms)
    labels = dataset.targets
    splitter = StratifiedShuffleSplit(n_splits=1, test_size=split, random_state=42)
    train_idx, val_idx = next(splitter.split(torch.zeros(len(labels)), labels))
    train_dataset = Subset(dataset, train_idx)
    val_dataset = Subset(dataset, val_idx)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

    return train_loader,val_loader

In [None]:
def get_optimizer(optim,lr,model):
    if optim=='sgd':
        return (torch.optim.SGD(model.parameters(), lr, weight_decay=0, momentum=0))
    elif optim=='momentum':
        return (torch.optim.SGD(model.parameters(), lr, weight_decay=0, momentum=0.9))
    elif optim=='adam':
        return (torch.optim.Adam(model.parameters(), lr, weight_decay=0.005))

In [None]:
my_model = CNN(
    num_filters=32,
    size_filters=3,
    activation_func='relu',
    filter_org=2,
    num_dense=128
).to(device)

summary(my_model, input_size=(3, 224, 224))

In [None]:
import wandb
# wandb.init(
#     project="DA6401_Assign2",
# )
sweep_config = {
    'method': 'grid',
    'metric': {
        'name': 'accuracy',
        'goal': 'maximize'
    },
    'parameters': {
        'num_filters':{
            'values':[32,64]
        },
        'size_filters':{
            'values':[3,5,10]
        },
        'activation_func':{
            'values':['relu','gelu','silu','mish']
        },
        'filter_org':{
            'values':[1,0.5,2]
        },
        'num_dense':{
            'values':[128,256,512]
        },
        'batch_size':{
            'values':[16,32,64]
        },
        'optimizer':{
            'values':['sgd','momentum','adam']
        },
        'learning_rate':{
            'min':0.0001,
            'max':0.01
        }
    }
}

sweep_id = wandb.sweep(sweep_config, project="DA6401_Assign2", entity="ishita49-indian-institute-of-technology-madras")

In [None]:
def train():
    config_defaults = {
        'num_filters':32,
        'size_filters':3,
        'activation_func':'relu',
        'filter_org':2,
        'num_dense':128,
        'batch_size':64,
        'optimizer':'sgd',
        'learning_rate':0.005
    }
    
    wandb.init(config=config_defaults)
    config = wandb.config

    model = CNN(num_filters=config.num_filters,
                size_filters=config.size_filters,
                activation_func=config.activation_func,
                filter_org=config.filter_org,
                num_dense=config.num_dense,
                num_classes=10).to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer=get_optimizer(optim=config.optimizer,lr=config.learning_rate,model=model)
    train_loader,val_loader=get_dataloaders(batch_size=config.batch_size)
    # Train
    num_epochs=20
    for epoch in range(num_epochs):
        model.train()
        total_loss=0
        for i, (images, labels) in enumerate(train_loader):  
            images = images.to(device)
            labels = labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            total_loss+=loss.item()
        avg_loss=total_loss/len(train_loader)
        print('Epoch [{}/{}], Loss: {:.4f}'.format(epoch+1, num_epochs, avg_loss))
        wandb.log({"epoch": epoch + 1, "train_loss": avg_loss})

    # Validation
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in val_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    print('Validation Accuracy: {:.2f}%'.format(accuracy))

    wandb.log({"val_accuracy": accuracy})
    wandb.finish()

In [None]:
wandb.agent(sweep_id, function=train)