<a href="https://colab.research.google.com/github/CS23M005/Assignment2_PARTB/blob/main/dl_assn2_partb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np

import math

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
device

device(type='cuda')

In [None]:
!wget https://storage.googleapis.com/wandb_datasets/nature_12K.zip -O nature_12K.zip
!unzip -q nature_12K.zip

--2024-04-06 15:55:32--  https://storage.googleapis.com/wandb_datasets/nature_12K.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 108.177.121.207, 142.250.1.207, 108.177.120.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|108.177.121.207|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 3816687935 (3.6G) [application/zip]
Saving to: 'nature_12K.zip'


2024-04-06 15:55:45 (278 MB/s) - 'nature_12K.zip' saved [3816687935/3816687935]



In [None]:
!rm nature_12K.zip

In [None]:
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
import torchvision
from torch import optim
from tqdm import tqdm
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.models as models


In [None]:
# Below function takes the optimizer string as input and outputs the model optimizer
def getOptim(model,optim_name, learning_rate):
  if(optim_name == 'sgd'):
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
  elif(optim_name == 'adam'):
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
  else:
    optimizer = optim.NAdam(model.parameters(), lr=learning_rate)
  return optimizer


In [None]:
# this function takes the data and do forward propagation and generates the accuracy and loss
def check_accuracy(loader,model,criterion,batchSize):
    num_correct = 0
    num_loss = 0
    total = 0
    num_samples = 0
    total_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            scores = model(x) # forward propagation
            loss = criterion(scores, y)
            total_loss += loss.item()*batchSize
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum().item()
            num_samples += predictions.size(0)
    model.train()
    return (num_correct / num_samples)*100 , total_loss

In [None]:
    #Below code reads the dataset and transforms (2 types - with augmentation and without augmentation)

    #without augmentation
    transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))])

    train_dataset = datasets.ImageFolder(root='inaturalist_12K/train',transform=transform)

    train_dataset,val_dataset = torch.utils.data.random_split(train_dataset,[8000,1999])

    #with augmentation
    transform2 = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))])

    train_dataset2 = datasets.ImageFolder(root='inaturalist_12K/train',transform=transform2)

    train_dataset2,val_dataset2 = torch.utils.data.random_split(train_dataset2,[8000,1999])

    #function takes input augmentation string and produces required transformed data loader
    def getData(data_aug, batchSize):
        if(data_aug == "no"):
            train_loader = torch.utils.data.DataLoader(train_dataset,batch_size =batchSize,shuffle = True,num_workers=2,pin_memory=True)
            val_loader = torch.utils.data.DataLoader(val_dataset,batch_size =batchSize,shuffle = True,num_workers=2,pin_memory=True)
        else:
            train_loader = torch.utils.data.DataLoader(train_dataset2,batch_size =batchSize,shuffle = True,num_workers=2,pin_memory=True)
            val_loader = torch.utils.data.DataLoader(val_dataset2,batch_size =batchSize,shuffle = True,num_workers=2,pin_memory=True)
        return train_loader, val_loader

In [None]:
# resnet50 model importing and removing the base model last layer and adding the required sized last layer
def resnet50_ud(output_size):
    model = models.resnet50(pretrained=True)
    num_features = model.fc.in_features
    model.fc = torch.nn.Linear(num_features, output_size)
    for p in model.parameters():
        p.requires_grad = False #freezing
    for p in model.fc.parameters():
        p.requires_grad = True #unfreezing
    return model

In [None]:
#training the model constructed above
#get the data loader, model and train for each epoch
#log the necessary data into wandb
def train_cnn_ud(output_size,optim_name,batchSize,num_epochs,learning_rate, data_aug):

    train_loader, val_loader = getData(data_aug, batchSize)
    model = resnet50_ud(output_size).to(device)
    optimizer = getOptim(model,optim_name, learning_rate)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        for batch_idx, (data, targets) in enumerate(tqdm(train_loader)):
            data = data.to(device=device)
            targets = targets.to(device=device)
            scores = model(data) #forward propagation
            loss = criterion(scores,targets)
            optimizer.zero_grad()
            loss.backward() #backaward propagation for weights update
            optimizer.step()
        train_accuracy,train_loss = check_accuracy(train_loader, model,criterion,batchSize)
        validation_accuracy,validation_loss = check_accuracy(val_loader, model,criterion,batchSize)
        print(f"train_accuracy:{train_accuracy:.4f},train_loss:{train_loss:.4f}")
        print(f"validation_accuracy:{validation_accuracy:.4f},validation_loss:{validation_loss:.4f}")
        wandb.log({'train_accuracy':train_accuracy})
        wandb.log({'train_loss':train_loss})
        wandb.log({'val_accuracy':validation_accuracy})
        wandb.log({'val_loss':validation_loss})

    #wandb.log({'train_accuracy':train_accuracy})


In [None]:
optim_name = 'adam'
batchSize=32
dropOut = 0.1
num_epochs = 5
learning_rate = 1e-3
input_channel=3
output_size=10
num_filters=16
filter_size=3
activation_fun = "relu"
filter_config = "same"
stride = 1
poolstride = 2
poolsize = 2
data_aug = "no"
train_cnn_ud(output_size,optim_name,batchSize,num_epochs,learning_rate, data_aug)

100%|██████████| 250/250 [00:46<00:00,  5.35it/s]


train_accuracy:75.5125,train_loss:5980.1779
validation_accuracy:73.0365,validation_loss:1675.5604


NameError: name 'wandb' is not defined

In [None]:
!pip install wandb
import wandb
wandb.login()



[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········································


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [None]:
def main_fun():
    wandb.init(project ='Assignment2_PartB')
    params = wandb.config
    with wandb.init(project = 'Assignment2_PartB', name='optim_'+str(params.optim_name)
                    +'epochs'+str(params.num_epochs) + 'batch_size_'+str(params.batchSize)
                    +'lear_rate_'+str(params.learning_rate) + 'data_aug_'+ str(params.data_aug)) as run:
        train_cnn_ud(output_size,params.optim_name,params.batchSize,params.num_epochs,params.learning_rate, params.data_aug)

sweep_params = {
    'method' : 'bayes',
    'name'   : 'cs23m005',
    'metric' : {
        'goal' : 'maximize',
        'name' : 'val_accuracy',
    },
    'parameters' : {
            'optim_name' :{'values':['sgd','adam','nadam']},
            'batchSize' : {'values':[32,64]},
            'data_aug' :{'values':['yes','no']},
            'num_epochs':{'values':[5,10]},
            'learning_rate' :{'values':[1e-3,1e-4]}
    }
}
sweepId = wandb.sweep(sweep_params,project = 'Assignment2_PartB')
wandb.agent(sweepId,function =main_fun,count = 10)
wandb.finish()

Create sweep with ID: 7687mpbh
Sweep URL: https://wandb.ai/cs23m005/Assignment2_PartB/sweeps/7687mpbh


[34m[1mwandb[0m: Agent Starting Run: xei231sm with config:
[34m[1mwandb[0m: 	batchSize: 32
[34m[1mwandb[0m: 	data_aug: no
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 10
[34m[1mwandb[0m: 	optim_name: nadam




VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

100%|██████████| 250/250 [00:52<00:00,  4.79it/s]


train_accuracy:73.6500,train_loss:6341.6382
validation_accuracy:70.6853,validation_loss:1791.8653


100%|██████████| 250/250 [00:45<00:00,  5.48it/s]


train_accuracy:78.5000,train_loss:5378.3509
validation_accuracy:74.7874,validation_loss:1580.6254


100%|██████████| 250/250 [00:45<00:00,  5.54it/s]


train_accuracy:79.6375,train_loss:5004.2228
validation_accuracy:74.4872,validation_loss:1576.8329


100%|██████████| 250/250 [00:45<00:00,  5.50it/s]


train_accuracy:77.9250,train_loss:5270.6567
validation_accuracy:70.8854,validation_loss:1744.9621


100%|██████████| 250/250 [00:45<00:00,  5.52it/s]


train_accuracy:81.3875,train_loss:4556.3213
validation_accuracy:75.1376,validation_loss:1575.5219


100%|██████████| 250/250 [00:45<00:00,  5.53it/s]


train_accuracy:81.7375,train_loss:4487.3464
validation_accuracy:75.3377,validation_loss:1577.9817


100%|██████████| 250/250 [00:45<00:00,  5.50it/s]


train_accuracy:81.9125,train_loss:4352.0603
validation_accuracy:74.3372,validation_loss:1601.4466


100%|██████████| 250/250 [00:44<00:00,  5.61it/s]


train_accuracy:81.7250,train_loss:4440.2895
validation_accuracy:75.6878,validation_loss:1582.6076


100%|██████████| 250/250 [00:44<00:00,  5.62it/s]


train_accuracy:82.7750,train_loss:4209.5522
validation_accuracy:74.9375,validation_loss:1592.7119


100%|██████████| 250/250 [00:46<00:00,  5.40it/s]


train_accuracy:82.8625,train_loss:4134.3779
validation_accuracy:74.0870,validation_loss:1612.1130


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁▅▆▄▇▇▇▇██
train_loss,█▅▄▅▂▂▂▂▁▁
val_accuracy,▁▇▆▁▇█▆█▇▆
val_loss,█▁▁▆▁▁▂▁▂▂

0,1
train_accuracy,82.8625
train_loss,4134.37787
val_accuracy,74.08704
val_loss,1612.11303


[34m[1mwandb[0m: Agent Starting Run: kgfpy7xt with config:
[34m[1mwandb[0m: 	batchSize: 32
[34m[1mwandb[0m: 	data_aug: yes
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	optim_name: adam




VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

100%|██████████| 250/250 [01:12<00:00,  3.43it/s]


train_accuracy:68.6500,train_loss:7642.6066
validation_accuracy:67.8339,validation_loss:1943.1111


100%|██████████| 250/250 [01:05<00:00,  3.81it/s]


train_accuracy:69.4625,train_loss:7314.7036
validation_accuracy:69.7849,validation_loss:1848.3105


100%|██████████| 250/250 [01:05<00:00,  3.82it/s]


train_accuracy:69.1125,train_loss:7415.6193
validation_accuracy:68.4342,validation_loss:1901.7484


100%|██████████| 250/250 [01:05<00:00,  3.81it/s]


train_accuracy:71.0250,train_loss:6963.4734
validation_accuracy:69.5348,validation_loss:1827.0083


100%|██████████| 250/250 [01:05<00:00,  3.83it/s]


train_accuracy:69.7250,train_loss:7280.6234
validation_accuracy:67.3837,validation_loss:1984.1631


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
train_accuracy,▁▃▂█▄
train_loss,█▅▆▁▄
val_accuracy,▂█▄▇▁
val_loss,▆▂▄▁█

0,1
train_accuracy,69.725
train_loss,7280.62339
val_accuracy,67.38369
val_loss,1984.16309


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mytoixt8 with config:
[34m[1mwandb[0m: 	batchSize: 64
[34m[1mwandb[0m: 	data_aug: yes
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_epochs: 5
[34m[1mwandb[0m: 	optim_name: sgd




VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

100%|██████████| 125/125 [01:12<00:00,  1.72it/s]


train_accuracy:37.8375,train_loss:16614.5603
validation_accuracy:38.9695,validation_loss:4249.6530


100%|██████████| 125/125 [01:05<00:00,  1.92it/s]


train_accuracy:51.9500,train_loss:15179.4118
validation_accuracy:50.3752,validation_loss:3902.0290


100%|██████████| 125/125 [01:04<00:00,  1.94it/s]


train_accuracy:57.1000,train_loss:13949.4571
validation_accuracy:57.6288,validation_loss:3584.9539


100%|██████████| 125/125 [01:05<00:00,  1.90it/s]


train_accuracy:59.5000,train_loss:13055.8025
validation_accuracy:58.2291,validation_loss:3376.9994


 73%|███████▎  | 91/125 [00:48<00:19,  1.73it/s]

In [None]:
import argparse


def parse_args():
    p = argparse.ArgumentParser(description = "provide optinal parameters for training")
    p.add_argument('-wp', '--wandb_project', type=str, default="Assignment2_PartB", help="wandb project name")
    p.add_argument('-opt', '--optim_name', type=str, default="nadam", choices = ['sgd','adam','nadam'], help="optimizer for backprop")
    p.add_argument('-bS', '--batchSize', type=int, default=32, choices = [32, 64], help="batch size")
    p.add_argument('-ag', '--data_aug', type=str, default="no", choices = ['yes', 'no'], help="data augmentation")
    p.add_argument('-nE', '--num_epochs', type=int, default=5, choices = [5, 10], help="number of epochs")
    p.add_argument('-lR', '--learning_rate', type=float, default=1e-3, choices = [1e-3, 1e-4], help="learning rate")

args = parse_args()
wandb.init(project = args.wadb_project)
wandb.run.name=f'optimizer {str(args.optim_name)} epochs {str(args.num_epochs)} learning rate {args.learning_rate}'

train_cnn_ud(output_size,args.optim_name,args.batchSize,args.num_epochs,args.learning_rate, args.data_aug)


