In [1]:
import torch
from torchvision.datasets import ImageFolder
import torchvision.transforms as transforms
from torch.utils.data import Subset, DataLoader
import torchvision
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from tqdm import tqdm
from torch.utils.data import DataLoader
from torchvision import datasets
import torchvision.models as models

In [2]:
!pip install --upgrade wandb
import wandb
# import socket
# socket.setdefaulttimeout(30)
wandb.login(key='1d2423ec9b728fe6cc1e2c0b9a2af0e67a45183c')


Collecting wandb
  Downloading wandb-0.19.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (10 kB)
Downloading wandb-0.19.9-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (20.9 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m20.9/20.9 MB[0m [31m82.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: wandb
  Attempting uninstall: wandb
    Found existing installation: wandb 0.19.6
    Uninstalling wandb-0.19.6:
      Successfully uninstalled wandb-0.19.6
Successfully installed wandb-0.19.9


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mcs24m047[0m ([33mcs24m047-iitm-ac-in[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [3]:
# from google.colab import drive
# drive.mount('/content/drive')

In [4]:
train_directory='/kaggle/input/dataset2/inaturalist_12K/train'
test_directory='/kaggle/input/dataset2/inaturalist_12K/val'

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [6]:
transform_basic = transforms.Compose([
    transforms.Resize((224,224)), #reshaped the data to be used by RESNET50
    transforms.ToTensor(),
    transforms.Normalize((0.5,),(0.5,))]) # normalized for better accuracy.

train_dataset = datasets.ImageFolder(root=train_directory,transform=transform_basic) # train_data loading
training_dataset,validation_dataset = torch.utils.data.random_split(train_dataset,[8000,1999]) #splitting the data into 80%(training) and 20%(validation) The overall data size is 9999

transform_augmented = transforms.Compose([
    transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
    transforms.RandomRotation(10),      # Randomly rotate the image by a maximum of 10 degrees
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust brightness, contrast, saturation, and hue
    transforms.RandomResizedCrop(224),  # Randomly crop and resize the image to 224x224
    transforms.ToTensor(),              # Convert the image to a PyTorch tensor
    transforms.Normalize((0.5,),(0.5,))  # Normalize the image
]) # for augumenting the training data
train_dataset2 = datasets.ImageFolder(root=train_directory,transform=transform_augmented)
training_dataset_aug,validation_dataset_aug = torch.utils.data.random_split(train_dataset2,[8000,1999]) #  #splitting the data into 80%(training) and 20%(validation) The overall data size is 9999

test_dataset = datasets.ImageFolder(root=test_directory,transform=transform_basic); # test data loading.

In [7]:
def data_loader_creator(augmentation_flag,batch_size): # function to return the data loaders depending on augumentation.
    if(augmentation_flag == 'no'):
        train_loader = torch.utils.data.DataLoader(training_dataset,batch_size =batch_size,shuffle = True,num_workers=2,pin_memory=True)
        val_loader = torch.utils.data.DataLoader(validation_dataset,batch_size =batch_size,shuffle = True,num_workers=2,pin_memory=True)
        return train_loader,val_loader
    else:
        train_loader_aug = torch.utils.data.DataLoader(training_dataset_aug,batch_size =batch_size,shuffle = True,num_workers=4,pin_memory=True)
        val_loader_aug = torch.utils.data.DataLoader(validation_dataset_aug,batch_size =batch_size,shuffle = True,num_workers=4,pin_memory=True)
        return train_loader_aug,val_loader_aug

In [8]:
def RESNET50(NUM_OF_CLASSES): # this function returns the model by freezing all but not last layer
    model = models.resnet50(pretrained=True)
    num_ftrs = model.fc.in_features
    model.fc = torch.nn.Linear(num_ftrs, NUM_OF_CLASSES) # modifying output layer to 10 neurons 
    
    for param in model.parameters(): # freezing
        param.requires_grad = False
        
    for param in model.fc.parameters(): #unfreezing
        param.requires_grad = True
   
    return model

In [9]:
def RESNET50_1(k,NUM_OF_CLASSES): #this function returns the model by freezing first k layers only
    model = models.resnet50(pretrained=True)    
    
    params = list(model.parameters())
    for param in params[:k]:
        param.requires_grad = False #freezing
        
    num_ftrs = model.fc.in_features
    
    model.fc = torch.nn.Linear(num_ftrs, NUM_OF_CLASSES)
    
    return model

In [10]:
def RESNET50_2(neurons_dense,NUM_OF_CLASSES): #this function returns the model by freezing all but not last layer after adding dense layer
    
    model = models.resnet50(pretrained=True)    
    
    activation_function_layer = nn.ReLU()
    
    for params in model.parameters():
        params.requires_grad = False #freezing
        
    num_ftrs = model.fc.in_features
    
    model.fc = nn.Sequential(
      nn.Linear(num_ftrs,neurons_dense), #adding dense layer
      activation_function_layer,
      nn.Dropout(0.4),
      nn.Linear(neurons_dense, 10)
    )

    for param in model.fc.parameters():
        param.requires_grad = True  #unfreezing
    return model

In [11]:
def Accuracy_calculator(loader,model,criterion,batch_size): # function to clculate the accuracy and loss
    no_of_correct_predictions = 0
    no_of_samples = 0
    total_loss = 0.0
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            scores = model(x)
            loss = criterion(scores, y)
            total_loss += loss.item()*batch_size # sum of cross entropies
            _, predictions = scores.max(1)
            no_of_correct_predictions += (predictions == y).sum().item() # correctly classified data
            no_of_samples += predictions.size(0)
    model.train()
    return (no_of_correct_predictions / no_of_samples)*100 , total_loss / no_of_samples

In [12]:
def train_the_model(batch_size,no_of_epochs,learning_rate,augmentation_flag,strategy_flag,NUM_OF_CLASSES):

    train_loader,val_loader = data_loader_creator(augmentation_flag,batch_size)  # getting dataloaders.

    #test_loader = torch.utils.data.DataLoader(test_data,batch_size =batchSize,shuffle = True,num_workers=2,pin_memory=True)

    no_of_input_channels=3
    no_of_classes=10

    if(strategy_flag == 0):
        model = RESNET50(NUM_OF_CLASSES).to(device)
    elif(strategy_flag == 1):
        model = RESNET50_1(10,NUM_OF_CLASSES).to(device)
    else:
        model = RESNET50_2(256,NUM_OF_CLASSES).to(device)

    # model=CNN(no_of_input_channels, no_of_classes,no_of_filters,size_of_filter,no_of_neurons,
    #           activation_function_name,dropout_probability,batch_normalization)
    # model=nn.DataParallel(model)
    # model=model.to(device)

    # if(optimizer_name == 'sgd'):
    #     optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    # elif(optimizer_name == 'adam'):
    #     optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    # else:
    #   optimizer = optim.NAdam(model.parameters(), lr=learning_rate) # optimzers selection

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss() # since it is classification problem corss entropy loss is used.

    for epoch in range(no_of_epochs): # performs the training.
        for batchId, (input_images, target_classes) in enumerate(tqdm(train_loader)):
            # Get data to cuda if possible
            input_images = input_images.to(device=device)
            target_classes = target_classes.to(device=device)
            # forward
            scores = model(input_images) # give the last layer pre-activation values.
            loss = criterion(scores,target_classes) # gets the overll cross entropy loss for each batch
            optimizer.zero_grad() # gradients are made to zero for each batch.
            loss.backward()  # calculaing the gradients
            optimizer.step() #updates the parameters
        training_accuracy,training_loss = Accuracy_calculator(train_loader, model,criterion,batch_size) # calculates the accuracy and loss at one go
        validation_accuracy,validation_loss = Accuracy_calculator(val_loader, model,criterion,batch_size)
        #  the below line can be uncommenteed for test accuracy and loss
        #test_accuracy,test_loss = check_accuracy(test_loader, model,criterion,batchSize)
        print(f"training_accuracy:{training_accuracy:.4f},training_loss:{training_loss:.4f}")
        print(f"validation_accuracy:{validation_accuracy:.4f},validation_loss:{validation_loss:.4f}")
        #print(f"test_accuracy:{test_accuracy:.4f},test_loss:{test_loss:.4f}")
        wandb.log({'training_accuracy':training_accuracy}) # plotting  the data in wandb
        wandb.log({'training_loss':training_loss})
        wandb.log({'validation_accuracy':validation_accuracy})
        wandb.log({'validation_loss':validation_loss})

In [13]:
# Sweep config for wandb plotting
# wandb.init(project ='DA6401_Assignment_2')
sweep_config = {
    'name'  : "run_part_b",
    'method': 'bayes',
    'metric': {
      'name': 'validation_accuracy',
      'goal': 'maximize'
    },
    'parameters': {
        'batch_size': {
            'values': [32, 64]
        },
        'no_of_epochs': {
            'values': [5,10]
        },
        'learning_rate': {
            'values': [1e-3, 1e-4]
        },
        'augmentation_flag': {
            'values': ['yes','no']
        },
        'strategy_flag': {
            'values': [2]
        }
    }
}

# sweep_id = wandb.sweep(sweep_config, project="DA6401_Assignment_2")

In [14]:
def run_experiment():
    try:
        run = wandb.init()  # No config argument here
        cfg = run.config
        run.name = (
            f"Batch_size: {cfg.batch_size}, "
            f"No_of_epochs: {cfg.no_of_epochs}, "
            f"Learning_Rate: {cfg.learning_rate}, "
            f"Augmentation_flag: {cfg.augmentation_flag}, "
            f"Strategy_flag: {cfg.strategy_flag}"
        )
        train_the_model(
            cfg.batch_size,
            cfg.no_of_epochs,
            cfg.learning_rate,
            cfg.augmentation_flag,
            cfg.strategy_flag,
            10
        )
    except Exception as e:
        print(f"Error during training: {e}")
        if wandb.run:
            wandb.finish(exit_code=1)
        raise
    finally:
        if wandb.run:
            wandb.finish
if __name__=="__main__":
    sweep_id = wandb.sweep(sweep_config, project="DA6401_Assignment_2")
    wandb.agent(sweep_id, run_experiment ,  count=1)

Create sweep with ID: t8hxnsc8
Sweep URL: https://wandb.ai/cs24m047-iitm-ac-in/DA6401_Assignment_2/sweeps/t8hxnsc8


[34m[1mwandb[0m: Agent Starting Run: 8myk31o4 with config:
[34m[1mwandb[0m: 	augmentation_flag: no
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	no_of_epochs: 5
[34m[1mwandb[0m: 	strategy_flag: 2


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 204MB/s]
100%|██████████| 125/125 [01:11<00:00,  1.74it/s]


training_accuracy:72.2625,training_loss:0.8466
validation_accuracy:70.2851,validation_loss:0.9271


100%|██████████| 125/125 [00:46<00:00,  2.70it/s]


training_accuracy:76.7875,training_loss:0.6943
validation_accuracy:74.0870,validation_loss:0.8264


100%|██████████| 125/125 [00:45<00:00,  2.76it/s]


training_accuracy:78.0375,training_loss:0.6665
validation_accuracy:75.3377,validation_loss:0.8039


100%|██████████| 125/125 [00:46<00:00,  2.68it/s]


training_accuracy:78.4250,training_loss:0.6405
validation_accuracy:74.5873,validation_loss:0.7988


100%|██████████| 125/125 [00:45<00:00,  2.72it/s]


training_accuracy:80.0000,training_loss:0.5997
validation_accuracy:75.0375,validation_loss:0.7836


0,1
training_accuracy,▁▅▆▇█
training_loss,█▄▃▂▁
validation_accuracy,▁▆█▇█
validation_loss,█▃▂▂▁

0,1
training_accuracy,80.0
training_loss,0.5997
validation_accuracy,75.03752
validation_loss,0.78355
