# Imports

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
#for dirname, _, filenames in os.walk('/kaggle/input'):
 #   for filename in filenames:
        #print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import zipfile
import glob
from PIL import Image
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import torchvision
from torchvision import datasets, transforms
import torchvision.models as models
from datetime import datetime
import wandb
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
random_state = 2137
np.random.seed(random_state)
torch.manual_seed(random_state)
torch.cuda.manual_seed(random_state)



In [3]:
os.environ["WANDB_API_KEY"] = 'cac5e1e8113d6c2054d4bc95b0b518086be2b55d'

# Initialising the project on wandb

In [4]:
def initialize_wandb(sweep_config):
    current_time = datetime.now().strftime("%m-%d_%H:%M")
    #info = my_config.get_configuration_info()
    run_name = f"tuning_{current_time}"
    #print(run_name)
    wandb.init()
    #wandb.init(project='cats&dogs_ML&DL_project', save_code=True, config = sweep_config, name = run_name)
    return

# Parameters

In [5]:
class configuration:
    def __init__(self, sweep_config, model, model_transforms, last_checkpoint_path=None):
        config = wandb.init().config
        self.last_checkpoint_path = last_checkpoint_path
        self.model = model
        print("loading parameters")
        self.batch_size = config.batch_size
        print("batch")
        self.init_learning_rate = config.learning_rate
        print("lr")
        self.optimizer_name = config.optimizer
        print("optimizer")
        print("loaded parameters")
        self.optimizer = self.initialize_optimizer()
        print("got optimizer")
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, 'min', patience=3, verbose=True, min_lr=self.init_learning_rate/32, factor=0.5)
        self.criterion = nn.CrossEntropyLoss()
        
        self.test_transforms = model_transforms
        # combine with some basic transforms
        self.custom_transforms = get_costum_transforms()
        self.train_transforms = transforms.Compose([self.custom_transforms,model_transforms])
    def initialize_optimizer(self):
        print("getting optimizer")
        if self.optimizer_name == 'sgd':
            optimizer = optim.SGD(self.model.parameters(), lr=self.init_learning_rate, momentum=0.9)
            print('sgd')
        elif self.optimizer_name == 'adam':
            optimizer = optim.Adam(self.model.parameters(), lr=self.init_learning_rate)
            print('adam')
        else:
            print("Unsupported optimizer:")
            raise ValueError(f"Unsupported optimizer: {optimizer_name}")
        return optimizer
    def get_configuration_info(self):
        info = f"{type(self.model).__name__}_"
        info += f"{type(self.optimizer).__name__}_"
        info += f"Batch={self.batch_size}_"
        info += f"lr={self.optimizer.param_groups[0]['lr']}"
        return info
    def get_configuration_dictionary(self):
        configuration_info = {
            'model_type': type(self.model).__name__,
            'criterion': str(self.criterion),
            'optimizer': type(self.optimizer).__name__,
            'batch_size': self.batch_size,
            'scheduler': type(self.scheduler).__name__,
            'optimizer_params': {
                'initial_lr': self.optimizer.param_groups[0]['lr']
            },
            'scheduler_params': {
                'min_lr': self.scheduler.min_lrs,
                'patience': self.scheduler.patience,
                'factor': self.scheduler.factor
            },
            'custom_transforms': [str(transform) for transform in self.custom_transforms.transforms] if self.custom_transforms else None
        }

        return configuration_info

# loading datasets

In [6]:
#reading csv
annotations = pd.read_csv('../input/cats-and-dogs-breeds-classification-oxford-dataset/annotations/annotations/list.txt')

#The first 4 rows consists of the information about breeds
#Reading the data after 5th row
annotations = annotations.loc[5:,]

#Processing the columns
annotations[['CLASS-ID','SPECIES','BREED','ID']] = annotations['#Image CLASS-ID SPECIES BREED ID'].str.split(expand=True) 

#Dropping unnecessary columns
annotations = annotations.drop('#Image CLASS-ID SPECIES BREED ID',axis=1)

#renaming the columns
annotations = annotations.rename(columns={"CLASS-ID": "image", "SPECIES": "CLASS-ID", 'BREED' : "SPECIES", "ID":"BREED ID"})


#converting the object type to int type
annotations[["CLASS-ID","SPECIES","BREED ID"]] = annotations[["CLASS-ID","SPECIES","BREED ID"]].astype(int)

In [7]:
annotations

Unnamed: 0,image,CLASS-ID,SPECIES,BREED ID
5,Abyssinian_100,1,1,1
6,Abyssinian_101,1,1,1
7,Abyssinian_102,1,1,1
8,Abyssinian_103,1,1,1
9,Abyssinian_104,1,1,1
...,...,...,...,...
7349,yorkshire_terrier_96,37,2,25
7350,yorkshire_terrier_97,37,2,25
7351,yorkshire_terrier_98,37,2,25
7352,yorkshire_terrier_99,37,2,25


Species stands here for dog (2) or a cat (1), breed ID is the ID of a breed given we know what type of animal it is, and class-ID is a unique ID for each species and breed together. Overall there is 25 cats breeds and 12 dog breeds. image is a column of filenames. All the files here are in jpg format

### RUN THIS CELL ONLY ONCE!!!

In [8]:
# adding the extension to image so it can be used to access the real image
annotations['image'] = annotations['image'].apply(lambda x : str(x)+'.jpg')
annotations = annotations.reset_index()
annotations = annotations.drop('index',axis=1)

#Extracting the classname/breed of the animal
annotations['classname'] = annotations['image'].apply(lambda x: str(x)[:str(x).rindex('_')])

# Adding information about cat or dog based on the 'Species' column to the 'classname' column
annotations['classname'] = annotations.apply(lambda row: f"{('dog' if row['SPECIES'] == 2 else 'cat')}_{row['classname']}", axis=1)
annotations

Unnamed: 0,image,CLASS-ID,SPECIES,BREED ID,classname
0,Abyssinian_100.jpg,1,1,1,cat_Abyssinian
1,Abyssinian_101.jpg,1,1,1,cat_Abyssinian
2,Abyssinian_102.jpg,1,1,1,cat_Abyssinian
3,Abyssinian_103.jpg,1,1,1,cat_Abyssinian
4,Abyssinian_104.jpg,1,1,1,cat_Abyssinian
...,...,...,...,...,...
7344,yorkshire_terrier_96.jpg,37,2,25,dog_yorkshire_terrier
7345,yorkshire_terrier_97.jpg,37,2,25,dog_yorkshire_terrier
7346,yorkshire_terrier_98.jpg,37,2,25,dog_yorkshire_terrier
7347,yorkshire_terrier_99.jpg,37,2,25,dog_yorkshire_terrier


# Adding transformations

In [9]:
def get_costum_transforms():
    costum_transforms = torchvision.transforms.Compose([
        transforms.RandomRotation(degrees=(-30, 30),fill=None),
        transforms.Resize((300,300)),
        transforms.RandomApply([transforms.Compose([
                transforms.CenterCrop(200),
                transforms.RandomCrop(80),
            ]),], p=0.3),
        transforms.RandomHorizontalFlip(p=0.4),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.1),
        transforms.GaussianBlur(kernel_size=(5,5), sigma=0.3),
        transforms.RandomApply([transforms.Compose([
                transforms.GaussianBlur(kernel_size=(9,9), sigma=0.7),
            ]),], p=0.4),
      ])
    return costum_transforms



# Creating the dataset class to make it easily accessable and short data analysis

In [10]:
class CatsDogsDataset(Dataset):
    def __init__(self, file_list, transform=None):
        self.folder_patch = '/kaggle/input/cats-and-dogs-breeds-classification-oxford-dataset/images/images/'
        self.annotations = file_list
        self.transform = transform
        self.filelength = len(file_list)

    def __len__(self):
        return self.filelength

    def __getitem__(self, idx):
        classID = self.annotations['CLASS-ID'].iloc[idx]
        img_path = self.annotations['image'].iloc[idx]
        img_path = self.folder_patch + img_path
        img = Image.open(img_path)
        
        has_alpha_channel = img.mode == 'RGBA'
        if has_alpha_channel == True:
            print("image has Alpha channel")
            img = img.convert('RGB')
        if self.transform is not None:
            try:
                img = self.transform(img)
            except RuntimeError as e:
                print(f"Exception: {e}")
                print("Shape before normalization:", img.size)
                print(img_path)
                tot = transforms.ToTensor()
                img_tensor = tot(img)
                print("Input Tensor Shape:", img_tensor.shape)
                print("Input Tensor Values:", img_tensor)
        #else:
            #print("No transformations to be done")
        return img, classID-1

### Plotting the histogram of images resolutions (I couldn't find any information about the images resolutions in a dataset)

In [11]:
all_images = CatsDogsDataset(annotations)

dataset_size = len(all_images)
print(dataset_size)
image_sizes = []
for i in range(dataset_size):
    img, _ = all_images[i]
    image_sizes.append(img.size)

unique_count = len(set(image_sizes))
print("Number of unique elements:", unique_count)

x_values, y_values = zip(*image_sizes)
plt.title('x_values')
plt.hist(x_values,bins=100)
plt.show()
plt.title('y_values')
plt.hist(y_values,bins=100)
plt.show()

resoulutions above 1000 pixels in one axis are not representative

filtered_data = [(x, y) for x, y in image_sizes if x < 1000 and y < 1000]

x_values, y_values = zip(*filtered_data)
plt.hist2d(x_values, y_values, bins=(50, 50), cmap='viridis', cmin = 1)

plt.xlim(0, 1000)
plt.ylim(0, 1000)

# Add color bar for reference
cbar = plt.colorbar()
cbar.set_label('Frequency')

# Add labels and title
plt.xlabel('horizontal')
plt.ylabel('vertical')
plt.title('2D Histogram for resolutions')

# Show the plot
plt.show()

Most of the images have rectangular shapes with proportions around 5x4 or 3x5 which should be also true in real life scenarions

# Create a 2D histogram
hist, x_edges, y_edges, _ = plt.hist2d(x_values, y_values, bins=(30, 30), cmap='inferno', cmin = 1, vmax=30)

plt.xlim(0, 1000)
plt.ylim(0, 1000)

# Add color bar for reference
cbar = plt.colorbar()
cbar.set_label('Frequency')

cbar.set_ticks([0, 2, 4, 6, 10, 20, 30])

plt.xlabel('horizontal')
plt.ylabel('vertical')
plt.title('2D Histogram for rare resolutions')
plt.show()

It can be obserwed that resolutions below 250 and 550 pixels in any axis also appear rarely

### Plotting the distribution of animal breed to check if they are really distributed uniformally

plt.hist(annotations['classname'], bins=37, edgecolor='black',rwidth=0.5)
plt.xticks(rotation='vertical')
plt.xlabel('breed indexes')
plt.ylabel('Frequency')
plt.title('Frequency of different breeds in dataset')
plt.show()

The breeds are actually almost uniformally distributed

# Splitting datasets and creating the dataloaders

In [12]:
def get_dataloaders(annotations, config): 
    train_set_temp, test_annotations = train_test_split(annotations, test_size=0.2, random_state=random_state, stratify=annotations['CLASS-ID'])
    train_annotations, validation_annotations = train_test_split(train_set_temp, test_size=0.2, random_state=random_state, stratify=train_set_temp['CLASS-ID'])

    train_data = CatsDogsDataset(train_annotations, transform=config.train_transforms)
    valid_data = CatsDogsDataset(validation_annotations, transform=config.test_transforms)
    test_data = CatsDogsDataset(test_annotations, transform=config.test_transforms)

    train_loader = DataLoader(dataset=train_data, batch_size=config.batch_size , shuffle=True)
    valid_loader = DataLoader(dataset=valid_data, batch_size=config.batch_size, shuffle=True)
    test_loader = DataLoader(dataset=test_data, batch_size=config.batch_size, shuffle=False)
    
    return train_loader,  valid_loader, test_loader

# Training

### Accuracy function

In [13]:
def my_accuracy(predictions, labels):
    predictions = torch.argmax(predictions,dim=1)
    #print(predictions)
    #print(labels)
    correct = (predictions == labels)
    #print(correct)
    acc = sum(correct) / len(predictions)
    #print(acc.item())
    return acc.item()

### Saving checkpoint

In [14]:
def save_checkpoint(epoch, model, optimizer, loss, config):
    if epoch%3 == 0:
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(prefix='modified_fc.'),
            'optimizer_state_dict': optimizer.state_dict(), # contains information like lr scheduler state
            'loss': loss,  # Save the current training loss if needed
        }
        info = config.get_configuration_info()
        checkpoint_path = f'/kaggle/working/{epoch}_{info}.pth'
        torch.save(checkpoint, checkpoint_path)
        config.last_checkpoint_path = checkpoint_path
    return

### Training loop

In [15]:
class TrainingManager: # singleton class
    _instance = None  # Class variable to store the instance

    def __new__(cls, *args, **kwargs):
        if not cls._instance:
            cls._instance = super(TrainingManager, cls).__new__(cls, *args, **kwargs)
        return cls._instance
    
    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        
        self.net = None
        self.optimizer = None
        self.scheduler = None
        self.criterion = None
        
        self.train_loader = None
        self.valid_loader = None
        self.test_loader = None
        self.resume_epoch = None
        self.training_accuracy_epoch = 0
        self.validation_accuracy_epoch = 0
        self.training_loss_epoch = 0
        self.validation_loss_epoch = 100
        self.total_train_batches = 0
        self.total_valid_batches = 0
        self.resume_epoch = 0
        self.cnt = 0
        
        self.log_interval = 1
        self.mean_loss = 0.0
        self.mean_lr = 0.0
        self.mean_grad_magnitude = 0.0
        self.termination_counter = 0
        self.last_validation_loss = 0

    def setup_configuration(self, config, annotations):
        self.resume_epoch = 0
        self.net = config.model.to(self.device)
        self.optimizer = config.optimizer
        self.scheduler = config.scheduler
        self.criterion = config.criterion
        self.train_loader, self.valid_loader, self.test_loader = get_dataloaders(annotations, config)
        
        self.total_train_batches = len(self.train_loader)
        self.total_valid_batches = len(self.valid_loader)
        
        if config.last_checkpoint_path != None:
            checkpoint = torch.load(config.last_checkpoint_path)
            self.optimizer.load_state_dict(checkpoint['optimizer_state_dict'])
            self.resume_epoch = checkpoint['epoch']
            # Remove the prefix from the keys
            state_dict = {k.replace('modified_fc.', ''): v for k, v in checkpoint['model_state_dict'].items()}
            # It is required after DataParallel wraps the module
            state_dict = {k.replace('module.', ''): v for k, v in state_dict.items()}
            self.net.load_state_dict(state_dict)
            
        d = next(self.net.parameters()).device
        if d.type == 'cuda':
            print("Model is on GPU")
            self.net = torch.nn.DataParallel(self.net) # if multiple GPUs use them
        else:
            print("Model is on CPU")
                
    def training(self, config, annotations):
        num_epochs = 25
        self.termination_counter = 0
        self.setup_configuration(config, annotations)
        self.net.train()
        for epoch in range(num_epochs - self.resume_epoch):
            self.training_loss_epoch = 0
            self.training_accuracy_epoch = 0
            self.cnt = 0

            for train_features_batch, train_labels_batch in self.train_loader:
                train_features_batch, train_labels_batch = train_features_batch.to(self.device), train_labels_batch.to(self.device)
                self.optimizer.zero_grad()

                y_hat = self.net(train_features_batch)

                training_loss = self.criterion(y_hat, train_labels_batch)
                training_loss.backward()
                self.optimizer.step()

                self.training_loss_epoch += training_loss.item()
                train_accuracy = my_accuracy(y_hat, train_labels_batch)
                self.training_accuracy_epoch += train_accuracy
                
                self.cnt += 1
                self.batch_log_metrics(training_loss)
                
            #save_checkpoint(epoch+self.resume_epoch, self.net, self.optimizer, self.training_loss_epoch, config)
            self.last_validation_loss = self.validation_loss_epoch
            self.validation_loss_epoch = 0
            self.validation_accuracy_epoch = 0
            self.cnt = 0

            for val_features_batch, val_labels_batch in self.valid_loader:
                val_features_batch, val_labels_batch = val_features_batch.to(self.device), val_labels_batch.to(self.device)
                with torch.no_grad():
                    y_hat_val = self.net(val_features_batch)
                    validation_loss = self.criterion(y_hat_val, val_labels_batch)
                    self.validation_loss_epoch += validation_loss.item()
                    val_accuracy = my_accuracy(y_hat_val, val_labels_batch)
                    self.validation_accuracy_epoch += val_accuracy
                    self.cnt += 1
                    
            self.normalize_metrics()      
            self.log_metrics()
            self.print_metrics(epoch + self.resume_epoch)
            self.scheduler.step(self.validation_loss_epoch)
            # Early stopping
            if  (self.validation_accuracy_epoch - self.training_accuracy_epoch < -0.20) and (self.training_loss_epoch - self.validation_loss_epoch < -0.15):
                save_checkpoint(epoch+self.resume_epoch, self.net, self.optimizer, self.training_loss_epoch, config)
                print('Early stopping! Overfitting')
                return
            if  self.validation_loss_epoch >= self.last_validation_loss:
                self.termination_counter += 1
                save_checkpoint(epoch+self.resume_epoch, self.net, self.optimizer, self.training_loss_epoch, config)
                if self.termination_counter > 3:
                    print('Early stopping! Overfitting')
                    return
            else:
                self.termination_counter = 0
        save_checkpoint(epoch+self.resume_epoch, self.net, self.optimizer, self.training_loss_epoch, config)
        return
    def normalize_metrics(self):
            self.training_accuracy_epoch = self.training_accuracy_epoch / self.total_train_batches
            self.validation_accuracy_epoch = self.validation_accuracy_epoch / self.total_valid_batches
            self.training_loss_epoch = self.training_loss_epoch / self.total_train_batches
            self.validation_loss_epoch = self.validation_loss_epoch / self.total_valid_batches
            
    def log_metrics(self):
        wandb.log({f'validation_loss_epoch': self.validation_loss_epoch,
                   f'training_loss_epoch': self.training_loss_epoch})
        wandb.log({f'training_accuracy_epoch': self.training_accuracy_epoch,
                   f'validation_accuracy_epoch': self.validation_accuracy_epoch})

    def print_metrics(self, epoch):
        print('~~~~~~~~~~~~~~~~~~~~~ Epoch: ', epoch, ' ~~~~~~~~~~~~~~~~~~~~~')
        print({f'training_loss_epoch': self.training_loss_epoch,
               f'training_accuracy_epoch': self.training_accuracy_epoch})
        print({f'validation_loss_epoch': self.validation_loss_epoch,
               f'validation_accuracy_epoch': self.validation_accuracy_epoch})
        
    def batch_log_metrics(self, train_loss):
        # Calculate mean values every n batches
        self.mean_loss += train_loss.item()
        self.mean_lr += self.optimizer.param_groups[0]['lr']
        self.mean_grad_magnitude += self.calculate_gradient_magnitude()

        if self.cnt % self.log_interval == 0:
            # Log mean values
            mean_loss_batch = self.mean_loss / self.log_interval
            mean_lr_batch = self.mean_lr / self.log_interval
            mean_grad_magnitude_batch = self.mean_grad_magnitude / self.log_interval

            wandb.log({'training_loss_every_n_batches': mean_loss_batch,
                       'training_learning_rate_every_n_batches': mean_lr_batch,
                       'training_gradient_magnitude_every_n_batches': mean_grad_magnitude_batch})

            # Reset mean values
            self.mean_loss = 0.0
            self.mean_lr = 0.0
            self.mean_grad_magnitude = 0.0
            
    def calculate_gradient_magnitude(self):
        # Example implementation of calculating the gradient magnitude
        total_norm = 0.0
        for param in self.net.parameters():
            if param.grad is not None:
                total_norm += param.grad.data.norm(2).item()
        return total_norm
    def evaluate_on_test_dataset(config, annotations):
        #setup_configuration(self, config, annotations)
        #self.net.eval()
        config.model.eval()
        self.train_loader, self.valid_loader, self.test_loader = get_dataloaders(annotations, config)
        predictions = []
        labels = []
        with torch.no_grad():
            for inputs_batch, labels_batch in self.test_loader:
                outputs = model(inputs_batch)
                predictions.append(outputs)
                labels.append(labels_batch)
        print(my_accuracy(predictions,labels))
        
    def evaluate_on_test_dataset(self, config, annotations):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.setup_configuration(config, annotations)
        self.net.eval()
        predictions = []
        labels = []
        with torch.no_grad():
            for inputs_batch, labels_batch in self.test_loader:
                inputs_batch = inputs_batch.to(self.device)
                labels_batch = labels_batch.tolist()
                outputs = self.net(inputs_batch)
                predicted_classes = [torch.argmax(pred).item() for pred in outputs]
                predictions += predicted_classes
                labels += labels_batch
        return accuracy_score(labels, predictions), confusion_matrix(labels, predictions)

# Defining models

### ResNet101

In [16]:
def get_pretrained_resnet101():
    model = models.resnet101(weights=torchvision.models.ResNet101_Weights.DEFAULT)
    num_classes = 37
    model.fc = nn.Sequential(
        nn.Linear(model.fc.in_features, num_classes),
        nn.Softmax(dim=1)
    )
    nn.init.xavier_normal_(model.fc[0].weight)
    nn.init.constant_(model.fc[0].bias, 0)
    
    weights = torchvision.models.ResNet101_Weights.DEFAULT
    preprocess_transforms = weights.transforms()
    
    return model, preprocess_transforms

In [17]:
passed_config = {
        'optimizer': 'sgd',
        'learning_rate': {
            'sgd': 0.01,
            'adam': 0.0001,
        },
        'batch_size': 64,
    }

In [18]:
sweep_config = {
    'method': 'bayes',
    'name': 'final_tuning_ResNet101',
    "metric": {"goal": "minimize", "name": "validation_loss_epoch"},
    'parameters': {
        'model': {'values': ['ResNet101']},
        'optimizer': {'values': ['sgd', 'adam']},
        'learning_rate': {'min': 0.0001, 'max': 0.07},
        'batch_size': {'values': [32, 64]},
    }
}

In [19]:
#initialize_wandb(sweep_config)
#print(wandb.config)

In [20]:
def start_training_with_sweep():
    print(wandb.config)
    print("*")
    model, preprocess_transforms = get_pretrained_resnet101()
    traning_config = configuration(wandb.config, model, preprocess_transforms)
    manager = TrainingManager()
    manager.training(traning_config, annotations)

In [None]:
initialize_wandb(sweep_config)
sweep_id = wandb.sweep(sweep=sweep_config, project="cats&dogs_ML&DL_project")
print(wandb.config)
wandb.agent(sweep_id, function=start_training_with_sweep, count=20)

[34m[1mwandb[0m: Currently logged in as: [33mjerzyjerzu[0m ([33mjerze[0m). Use [1m`wandb login --relogin`[0m to force relogin




Create sweep with ID: 5de3bz4p
Sweep URL: https://wandb.ai/jerze/cats%26dogs_ML%26DL_project/sweeps/5de3bz4p
{}


[34m[1mwandb[0m: Agent Starting Run: tdqe3mcp with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.057675771560222806
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: adam


VBox(children=(Label(value='0.000 MB of 0.000 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))
<IPython.core.display.HTML object>
<IPython.core.display.HTML object>
{}
*


Downloading: "https://download.pytorch.org/models/resnet101-cd907fc2.pth" to /root/.cache/torch/hub/checkpoints/resnet101-cd907fc2.pth
100%|██████████| 171M/171M [00:00<00:00, 274MB/s] 


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
adam
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6136058625720797, 'training_accuracy_epoch': 0.02317862628268547}
{'validation_loss_epoch': 3.6114711890349516, 'validation_accuracy_epoch': 0.028153153186714328}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6135798473747407, 'training_accuracy_epoch': 0.022328286146631047}
{'validation_loss_epoch': 3.612875442247133, 'validation_accuracy_epoch': 0.02533783783783784}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6119993981860934, 'training_accuracy_epoch': 0.025935374149659865}
{'validation_loss_epoch': 3.611706147322

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▃▂▄▃▃▄▁▃▅█▇
training_gradient_magnitude_every_n_batches,▃▁▇▇▁▁▂▁▁▁▁▁▁▁▂▁▁▁▃▁▂▂▁▁▁█▂▁▂▁▃▄▂▂▁▁▁▁▂▃
training_learning_rate_every_n_batches,█████████████████████████████▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,▇▇▃▄▂▅██▁▂▃
training_loss_every_n_batches,▇▇▇▇▆▇▆▇▇▆▇▇▇▆▇▇▇▇▇▁▇█▇▆▇▇▇▇█▇▆▆▇▇█▂▇▁█▆
validation_accuracy_epoch,▃▁▃▃▁▃▁▂▆█▇
validation_loss_epoch,▄▅▄▄▄█▆▅▃▁▃

0,1
training_accuracy_epoch,0.03104
training_gradient_magnitude_every_n_batches,0.00498
training_learning_rate_every_n_batches,0.02884
training_loss_epoch,3.61191
training_loss_every_n_batches,3.6152
validation_accuracy_epoch,0.0366
validation_loss_epoch,3.6105


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: mey3w6it with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.022109318296587136
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed3ef86d0>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.5938762974094702, 'training_accuracy_epoch': 0.14142191571158333}
{'validation_loss_epoch': 3.5175186960320723, 'validation_accuracy_epoch': 0.3574561413965727}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.258423747243108, 'training_accuracy_epoch': 0.4852264056334624}
{'validation_loss_epoch': 2.9786567437021354, 'validation_accuracy_epoch': 0.7088815789473685}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.0084819117107906, 'training_accuracy_epoch': 0.6724267102576591}
{'validation_loss_epoch': 2.8781134329344096, 'v

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▅▆▇▇█████████
training_gradient_magnitude_every_n_batches,▁▁▁▂▂▃▄▅▅▆▄▅▅▄▄▄▅▄▃▅▄▆▃▄▄▆▄█▅▄▄▃▃▅▃▄▆▄▅▂
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▅▃▂▂▁▁▁▁▁▁▁▁▁
training_loss_every_n_batches,███▇▅▄▃▃▄▂▂▂▃▂▃▂▂▁▁▁▂▂▁▁▂▂▂▂▂▂▁▁▁▁▁▂▂▂▂▁
validation_accuracy_epoch,▁▆▇███████████
validation_loss_epoch,█▃▂▁▁▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.81326
training_gradient_magnitude_every_n_batches,34.49673
training_learning_rate_every_n_batches,0.02211
training_loss_epoch,2.84831
training_loss_every_n_batches,2.87077
validation_accuracy_epoch,0.89145
validation_loss_epoch,2.77132


[34m[1mwandb[0m: Agent Starting Run: trhhka76 with config:
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	learning_rate: 0.04644151467360942
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed0cb6710>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.3377371710173938, 'training_accuracy_epoch': 0.36364384451690984}
{'validation_loss_epoch': 3.047783445667576, 'validation_accuracy_epoch': 0.6196509019748585}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.206893950092549, 'training_accuracy_epoch': 0.4564406407528183}
{'validation_loss_epoch': 3.0908426336340002, 'validation_accuracy_epoch': 0.5703828834198617}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.226043026463515, 'training_accuracy_epoch': 0.4324939652365081}
{'validation_loss_epoch': 3.0861191234073124, 'val

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▃▃▃▃▅▅▆▇▇█▇██
training_gradient_magnitude_every_n_batches,▁▃▅▆▃█▇▄▄▅▇▃▄▆▆▃▅▅▃▇▄▄▄▅▄▃▃▇▃▄▆▂▄▃▂▃▄▂▂▄
training_learning_rate_every_n_batches,██████████████▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▅▅▅▅▄▃▃▂▁▁▂▁▁
training_loss_every_n_batches,█▆▂▄▄▅▆▄▅▃▄▄▅▅▃▄▄▂▃▄▃▃▃▃▄▃▃▄▁▃▂▃▁▁▃▄▃▂▃▁
validation_accuracy_epoch,▃▁▁▁▃▅▅▇▇▇▇███
validation_loss_epoch,▆███▆▄▄▂▂▂▁▁▁▁

0,1
training_accuracy_epoch,0.62748
training_gradient_magnitude_every_n_batches,18.9284
training_learning_rate_every_n_batches,0.02322
training_loss_epoch,3.0322
training_loss_every_n_batches,3.00513
validation_accuracy_epoch,0.71227
validation_loss_epoch,2.94664


[34m[1mwandb[0m: Agent Starting Run: 6dt2o338 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.046143960773363864
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: adam


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed0c9e5f0>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
adam
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6127423176894315, 'training_accuracy_epoch': 0.020903716216216218}
{'validation_loss_epoch': 3.612959309628135, 'validation_accuracy_epoch': 0.02549342105263158}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6119277992764034, 'training_accuracy_epoch': 0.022592905405405407}
{'validation_loss_epoch': 3.6116315314644263, 'validation_accuracy_epoch': 0.02631578947368421}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6118993340311825, 'training_accuracy_epoch': 0.022592905405405407}
{'validation_loss_epoch': 3.610752143357

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▄▇▇▅▅▆▁█
training_gradient_magnitude_every_n_batches,▆▂█▁▂▁▂▁▁▁▁▅▁▂▇▂▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_learning_rate_every_n_batches,███████████████████████████████████▁▁▁▁▁
training_loss_epoch,█▄▄▅▅▅▄▁
training_loss_every_n_batches,▇▇▆▆▆▆█▁▆▆▆▆▅▆▆▆▇▅▆▆▅▇▆▆▆▆▆▆▇▇▆▆▆▆▅▆▆▅▆▇
validation_accuracy_epoch,▁▃▅▃▄▆█▃
validation_loss_epoch,█▄▁▂▄▁▄▄

0,1
training_accuracy_epoch,0.02302
training_gradient_magnitude_every_n_batches,0.00456
training_learning_rate_every_n_batches,0.02307
training_loss_epoch,3.61134
training_loss_every_n_batches,3.61398
validation_accuracy_epoch,0.02632
validation_loss_epoch,3.61157


[34m[1mwandb[0m: Agent Starting Run: 6shcpczb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.02379107616933932
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: adam


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed4bf52d0>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
adam
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.6114884872694275, 'training_accuracy_epoch': 0.02364864864864865}
{'validation_loss_epoch': 3.6119837384474907, 'validation_accuracy_epoch': 0.0276864035741279}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.611693501472473, 'training_accuracy_epoch': 0.027674095453442755}
{'validation_loss_epoch': 3.6111355580781637, 'validation_accuracy_epoch': 0.02549342105263158}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.611541309872189, 'training_accuracy_epoch': 0.025984906264253566}
{'validation_loss_epoch': 3.612842208460757

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▂▂▂▄▅▅▅▆▆▇▇█▇
training_gradient_magnitude_every_n_batches,▁▁▁▁▁▁▂▁▁▃▂▁▃▁▃▂▂▃▂▂▂▁▃▂▂▂▂▂▂▂▂▂▂▂▂▃▂▃▃█
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,████▆▆▆▅▄▂▂▁▁▂
training_loss_every_n_batches,▆▆▆▆▆▆▆▆▇▆▆▆▃▃██▆▄▇▅▆▆▆▅▆▆▄▅▇▅▇▄▄▇▆▆▅▁▄▂
validation_accuracy_epoch,▃▂▁▄▃▄▄▅▆▆▇▆█▇
validation_loss_epoch,█▇█▇▇▆▆▅▃▃▃▃▁▃

0,1
training_accuracy_epoch,0.04497
training_gradient_magnitude_every_n_batches,0.07285
training_learning_rate_every_n_batches,0.02379
training_loss_epoch,3.60122
training_loss_every_n_batches,3.61982
validation_accuracy_epoch,0.04688
validation_loss_epoch,3.60091


[34m[1mwandb[0m: Agent Starting Run: xlpy5lhi with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.026059992327453
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed4afb040>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.5790636958302677, 'training_accuracy_epoch': 0.14771550772963343}
{'validation_loss_epoch': 3.4493753031680456, 'validation_accuracy_epoch': 0.3673245624492043}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.1643828797984765, 'training_accuracy_epoch': 0.5716815606967823}
{'validation_loss_epoch': 2.854821054559005, 'validation_accuracy_epoch': 0.8262061413965727}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.9221909883859993, 'training_accuracy_epoch': 0.7517777351108758}
{'validation_loss_epoch': 2.811593670594065, 'va

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▅▇█▇█████████
training_gradient_magnitude_every_n_batches,▁▁▂▂▃▃▅▅▇▃▅█▄▇▃▅▅▆▇▇▄▇▅▇▇▄▆██▇▅▄▆▇█▆▃▅▄▆
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▄▂▁▂▁▁▁▁▁▁▁▁▁
training_loss_every_n_batches,██▇▆▃▄▃▂▃▂▂▂▂▂▂▂▁▂▂▂▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▂▂▂▂
validation_accuracy_epoch,▁▇▇▇▇▇████████
validation_loss_epoch,█▂▂▁▂▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.81642
training_gradient_magnitude_every_n_batches,32.55459
training_learning_rate_every_n_batches,0.02606
training_loss_epoch,2.84368
training_loss_every_n_batches,2.86676
validation_accuracy_epoch,0.89172
validation_loss_epoch,2.7668


[34m[1mwandb[0m: Agent Starting Run: ald60ed2 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.020342289804041555
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed3efb010>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.594311975144051, 'training_accuracy_epoch': 0.12620559052841082}
{'validation_loss_epoch': 3.529814318606728, 'validation_accuracy_epoch': 0.3623903519228885}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.290136279286565, 'training_accuracy_epoch': 0.4738243786064354}
{'validation_loss_epoch': 2.939739315133346, 'validation_accuracy_epoch': 0.756578947368421}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.973248504303597, 'training_accuracy_epoch': 0.7091938208889317}
{'validation_loss_epoch': 2.8433588931435034, 'valida

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▄▇▇▇▇███████
training_gradient_magnitude_every_n_batches,▁▁▁▂▂▄▄▄▄▃▃▄▃▆▆▅▇▅▄▆▄▄▅▄█▃▄▄▃▅▃▅▆▅▄▆▅▄▅▅
training_learning_rate_every_n_batches,██████████████████████████████████▁▁▁▁▁▁
training_loss_epoch,█▅▃▂▂▂▁▁▁▁▁▁▁
training_loss_every_n_batches,███▇▆▄▄▃▃▃▂▂▃▂▂▃▂▂▂▂▂▂▂▂▃▁▂▂▁▂▂▁▂▂▁▂▁▂▁▂
validation_accuracy_epoch,▁▆▇▇▇████████
validation_loss_epoch,█▃▂▂▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.86713
training_gradient_magnitude_every_n_batches,35.64826
training_learning_rate_every_n_batches,0.01017
training_loss_epoch,2.79364
training_loss_every_n_batches,2.78748
validation_accuracy_epoch,0.929
validation_loss_epoch,2.73336


[34m[1mwandb[0m: Agent Starting Run: iid4tpov with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.01895447688718646
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2eb0cf09d0>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.5999471400235152, 'training_accuracy_epoch': 0.11434039876267717}
{'validation_loss_epoch': 3.5610333618364836, 'validation_accuracy_epoch': 0.29523026315789475}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.327373659288561, 'training_accuracy_epoch': 0.4631239095249692}
{'validation_loss_epoch': 2.9780752658843994, 'validation_accuracy_epoch': 0.7305372827931454}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.98154536131266, 'training_accuracy_epoch': 0.7169790751225239}
{'validation_loss_epoch': 2.804545101366545, 'val

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▄▇██████████
training_gradient_magnitude_every_n_batches,▁▁▁▁▂▂▃▄▅▅▆▅▇▅▅▄▃▄▅▅▆▅▄▄█▅▅▅▇▇▆▅▄▆█▅▄▆▄▇
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▆▂▂▁▁▁▁▁▁▁▁▁
training_loss_every_n_batches,████▆▄▄▃▃▂▃▂▃▂▂▁▁▁▂▃▂▂▁▂▂▃▂▂▁▁▁▂▂▂▂▂▁▂▂▂
validation_accuracy_epoch,▁▆████▇██████
validation_loss_epoch,█▃▁▁▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.82955
training_gradient_magnitude_every_n_batches,17.98096
training_learning_rate_every_n_batches,0.01895
training_loss_epoch,2.83104
training_loss_every_n_batches,2.78368
validation_accuracy_epoch,0.90461
validation_loss_epoch,2.75553


[34m[1mwandb[0m: Agent Starting Run: 8ljia6cn with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.02683765530552515
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed0cb4550>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.5778316807102515, 'training_accuracy_epoch': 0.1790608650929219}
{'validation_loss_epoch': 3.3817342833468786, 'validation_accuracy_epoch': 0.4895833354247244}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.1205188551464595, 'training_accuracy_epoch': 0.6029179378135784}
{'validation_loss_epoch': 2.877543248628315, 'validation_accuracy_epoch': 0.7957785098176253}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.955684977608758, 'training_accuracy_epoch': 0.7186682643117132}
{'validation_loss_epoch': 2.8461920211189673, 'val

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▆▇█████████
training_gradient_magnitude_every_n_batches,▁▁▁▂▃▄▃▃▄▅▆▅▃█▂▃▄▂▆▄▇▃▅▆▅▅▆▂▅▄▅▇▇▆▄▅▆▄▅▆
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▃▂▁▁▁▁▁▁▁▁▁
training_loss_every_n_batches,███▇▅▄▄▂▂▂▂▃▂▃▂▁▃▂▂▂▂▂▂▂▁▂▂▂▂▁▂▂▂▂▁▂▁▂▂▂
validation_accuracy_epoch,▁▇▇▇████████
validation_loss_epoch,█▂▂▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.76764
training_gradient_magnitude_every_n_batches,28.89072
training_learning_rate_every_n_batches,0.02684
training_loss_epoch,2.89559
training_loss_every_n_batches,2.90351
validation_accuracy_epoch,0.85197
validation_loss_epoch,2.8079


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: rzusyey5 with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.02250976598190853
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed1d421a0>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.595784200204385, 'training_accuracy_epoch': 0.1323016563782821}
{'validation_loss_epoch': 3.521827459335327, 'validation_accuracy_epoch': 0.3070175443824969}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.294494068300402, 'training_accuracy_epoch': 0.4551138837595244}
{'validation_loss_epoch': 2.9721349038575826, 'validation_accuracy_epoch': 0.7371162301615665}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.9793934242145435, 'training_accuracy_epoch': 0.712122656203605}
{'validation_loss_epoch': 2.8283550990255257, 'valid

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▄▇▇▇███████
training_gradient_magnitude_every_n_batches,▁▁▁▂▃▄▄▅▄▃▇▆▅▃▇▇█▇▇▆█▅▅▇▄▆▇▅▅▅▇▇▅▄▆▃▆▅█▅
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▅▂▂▂▁▁▁▁▁▁▁
training_loss_every_n_batches,███▇▆▅▄▃▃▂▂▂▂▂▂▂▃▂▂▂▂▂▃▁▂▂▂▁▁▁▂▂▁▂▂▁▁▁▁▂
validation_accuracy_epoch,▁▆▇▇████████
validation_loss_epoch,█▃▂▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.82907
training_gradient_magnitude_every_n_batches,17.51988
training_learning_rate_every_n_batches,0.02251
training_loss_epoch,2.83447
training_loss_every_n_batches,2.8964
validation_accuracy_epoch,0.89227
validation_loss_epoch,2.76652


[34m[1mwandb[0m: Sweep Agent: Waiting for job.
[34m[1mwandb[0m: Job received.
[34m[1mwandb[0m: Agent Starting Run: 9b9t7ggb with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.024254882405332377
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed1d78f70>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.585335654181403, 'training_accuracy_epoch': 0.15030378143529635}
{'validation_loss_epoch': 3.476905207884939, 'validation_accuracy_epoch': 0.3415570180667074}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.2202391302263416, 'training_accuracy_epoch': 0.5077035201562418}
{'validation_loss_epoch': 2.9173278934077214, 'validation_accuracy_epoch': 0.7700109670036718}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.956588854660859, 'training_accuracy_epoch': 0.7228639931292147}
{'validation_loss_epoch': 2.8221622266267476, 'val

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
training_accuracy_epoch,▁▅▇████████
training_gradient_magnitude_every_n_batches,▁▁▁▂▂▃▃▄▄▃▅▃▄▅▅▃▃▆▃▄█▃▅▃▄▇▃▄▃▄▅▅▄▅▆▄▅▅▅▅
training_learning_rate_every_n_batches,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
training_loss_epoch,█▄▂▁▁▁▁▁▁▁▁
training_loss_every_n_batches,████▆▅▅▃▃▃▂▂▂▃▃▂▂▂▂▂▂▂▂▂▃▂▂▂▁▁▂▂▂▂▂▂▂▁▂▂
validation_accuracy_epoch,▁▇█████████
validation_loss_epoch,█▂▁▁▁▁▁▁▁▁▁

0,1
training_accuracy_epoch,0.78321
training_gradient_magnitude_every_n_batches,25.71805
training_learning_rate_every_n_batches,0.02425
training_loss_epoch,2.87698
training_loss_every_n_batches,3.00815
validation_accuracy_epoch,0.85526
validation_loss_epoch,2.8043


[34m[1mwandb[0m: Agent Starting Run: moez940a with config:
[34m[1mwandb[0m: 	batch_size: 64
[34m[1mwandb[0m: 	learning_rate: 0.022022429723317652
[34m[1mwandb[0m: 	model: ResNet101
[34m[1mwandb[0m: 	optimizer: sgd


<wandb.sdk.lib.preinit.PreInitObject object at 0x7e2ed2deab30>
*


loading parameters
batch
lr
optimizer
loaded parameters
getting optimizer
sgd
got optimizer
Model is on GPU
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  0  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.5881423692445495, 'training_accuracy_epoch': 0.15020842407200788}
{'validation_loss_epoch': 3.474503015217028, 'validation_accuracy_epoch': 0.3459429835018359}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  1  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 3.2490236888060697, 'training_accuracy_epoch': 0.491363338522009}
{'validation_loss_epoch': 2.904381425757157, 'validation_accuracy_epoch': 0.7897478091089349}
image has Alpha channel
image has Alpha channel
image has Alpha channel
~~~~~~~~~~~~~~~~~~~~~ Epoch:  2  ~~~~~~~~~~~~~~~~~~~~~
{'training_loss_epoch': 2.9553980666237907, 'training_accuracy_epoch': 0.7296207498859715}
{'validation_loss_epoch': 2.82947039604187, 'valid

In [None]:
manager = TrainingManager()
manager.training(traning_config, annotations)

In [None]:
batch_size = 64
#optimizer = optim.Adam(model.parameters(), lr=0.0001)
optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, verbose=True, min_lr=5e-4, factor=0.3)
config = configuration(batch_size, model, optimizer, scheduler, preprocess_transforms, costum_transforms)
print(config.get_configuration_info())

In [None]:
initialize_wandb(config)

In [None]:
manager = TrainingManager()
manager.training(config, annotations)

In [None]:
config.last_checkpoint_path = "/kaggle/input/checkpoint/21_ResNet_SGD_Batch64_lr0.01.pth"
manager = TrainingManager()
acc, confusion_mat = manager.evaluate_on_test_dataset(config, annotations)
print(acc)

In [None]:
print(confusion_mat)

In [None]:
def evaluate_on_test_dataset(config, annotations):
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        #setup_configuration(self, config, annotations)
        #self.net.eval()
        config.model.eval()
        train_loader, valid_loader, test_loader = get_dataloaders(annotations, config)
        predictions = []
        labels = []
        with torch.no_grad():
            for inputs_batch, labels_batch in test_loader:
                inputs_batch = inputs_batch.to(device)
                labels_batch = labels_batch.tolist()
                outputs = model(inputs_batch)
                predicted_classes = [torch.argmax(pred).item() for pred in outputs]
                predicted_classes = predicted_classes
                predictions += predicted_classes
                labels += labels_batch
            #print(predictions)
            #print(labels)
            confusion_mat = confusion_matrix(labels, predictions)
        print(accuracy_score(labels, predictions))
        print(confusion_mat)

In [None]:
evaluate_on_test_dataset(config,annotations)

### ResNet18

resnet18model = models.resnet18(weights=torchvision.models.ResNet18_Weights.DEFAULT)

num_classes = 37
resnet18model.fc = nn.Sequential(
    nn.Linear(resnet18model.fc.in_features, num_classes),
    nn.Softmax(dim=1)
)
nn.init.xavier_normal_(resnet18model.fc[0].weight)
nn.init.constant_(resnet18model.fc[0].bias, 0)

costum_transforms = torchvision.transforms.Compose([
    #transforms.RandomCrop(32, padding=4),
    transforms.RandomHorizontalFlip(p=0.4),
  ])
weights = torchvision.models.ResNet18_Weights.DEFAULT
preprocess_transforms = weights.transforms()

batch_size = 64
#optimizer = torch.optim.SGD(resnet18model.parameters(), lr=0.8)
optimizer = optim.Adam(resnet18model.parameters(), lr=0.0001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=3, verbose=True, min_lr=7e-6, factor=0.5)
config = configuration(batch_size, resnet18model, optimizer, scheduler, preprocess_transforms, costum_transforms)
print(config.get_configuration_info())

config.get_configuration_dictionary()

initialize_wandb(config)

manager = TrainingManager()
manager.training(config, annotations)

mobileNet_model = torchvision.models.mobilenet_v2(pretrained=True)
preprocess_transforms = mobileNet_model.transforms()

num_classes = 37
mobileNet_model.classifier[1] = nn.Sequential(
    nn.Linear(mobileNet_model.last_channel, num_classes),
    nn.Softmax(dim=1)
)
mobileNet_model.classifier[1][0].apply(initialize_weights)

batch_size = 64
optimizer = torch.optim.SGD(resnet18model.parameters(), lr=0.000001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=20, verbose=True, min_lr=7e-5, factor=0.5)
config2 = configuration(batch_size, mobileNet_model, optimizer, scheduler, preprocess_transforms, costum_transforms)

print(config2.get_configuration_info())

preprocess_transforms

manager = TrainingManager()
manager.training(config2, annotations)