<a href="https://colab.research.google.com/github/AhmedEssam19/Graduation-Project/blob/Hyperparameter-Tuning/Hyperparameters_Tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/drive/',force_remount=True)

Mounted at /content/drive/


In [None]:
!unzip '/content/drive/MyDrive/Colab Notebooks/data.zip' -d '/content/'

In [None]:
pip install torch torchvision pytorch-lightning

In [None]:
!pip install wandb --upgrade

In [None]:
import wandb

wandb.login()

In [15]:
import torch
import torch.nn as nn
import torch.optim as optim  
import torchvision.transforms as transforms
import torchvision
import os
from torchvision.io import decode_jpeg
import pandas as pd
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader, TensorDataset
from torchvision import datasets,models
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt
from torchvision.io import read_image
import pytorch_lightning as pl
import torchmetrics
from torch import nn
import math
from pytorch_lightning.loggers import TensorBoardLogger, WandbLogger

In [8]:
NUM_CLASSES = 10
BATCH_SIZE = 32

data_dir = '/content/data/'
ckpt_dir = '/content/results/checkpoints/'
log_dir = '/content/results/logs/'
submission_dir = '/content/results/submissions/'

In [9]:
class CreateDataset(Dataset):
    def __init__(self, df,transform=False):
        self.df = df
        self.transform = transform

    def __len__(self):
        return len(self.df)

    def __getitem__(self, index):   
        img_path = self.df.iloc[index, 0]
        image = read_image(img_path) / 255.0
        label = self.df.iloc[index, 1]

        if self.transform:
            image = self.transform(image)

        return image, label

In [10]:
transformers = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

In [11]:
#search space
import math

sweep_config = {
    'method': 'random',
    'metric':{
      'name': 'loss',
      'goal': 'minimize'   
      },
    'parameters':{
        'learning_rate':{
            #'values':[1e-5,1e-4,1e-3,1e-2,1e-1],
            'distribution': 'uniform',
            'min': 0,
            'max': 0.1
            },
        'dropout':{
            #'values':[0.1,0.15,0.2,0.25],
            'distribution': 'uniform',
            'min': 0,
            'max': 0.3
            },
        'batch_size':{
            #'values':[16,32,64,128],
            'distribution': 'q_log_uniform',
            'q': 1,
            'min': math.log(32),
            'max': math.log(256),
            }
    }
}

In [12]:
import pprint

pprint.pprint(sweep_config)

{'method': 'random',
 'metric': {'goal': 'minimize', 'name': 'loss'},
 'parameters': {'batch_size': {'distribution': 'q_log_uniform',
                               'max': 5.545177444479562,
                               'min': 3.4657359027997265,
                               'q': 1},
                'dropout': {'distribution': 'uniform', 'max': 0.3, 'min': 0},
                'learning_rate': {'distribution': 'uniform',
                                  'max': 0.1,
                                  'min': 0}}}


In [13]:
class DistractionDataModule(pl.LightningDataModule):
  def __init__(self, data_dir, batch_size):
        super().__init__()
        self.data_dir = data_dir
        self.batch_size = batch_size

  def setup(self,stage=None):
        train_df = pd.read_csv(self.data_dir+"train.csv")
        self.train_set = CreateDataset(train_df,transformers)

        val_df = pd.read_csv(self.data_dir+"val.csv")
        self.val_set = CreateDataset(val_df,transformers)

        test_df = pd.read_csv(self.data_dir+"test.csv")
        self.test_set = CreateDataset(test_df,transformers)

  def train_dataloader(self):
        return DataLoader(self.train_set, self.batch_size, shuffle=True)

  def val_dataloader(self):
        return DataLoader(self.val_set, self.batch_size, shuffle=False)
    
  def test_dataloader(self):
        return DataLoader(self.test_set, self.batch_size, shuffle=False)

In [14]:
class Model(pl.LightningModule):
    def __init__(self, output_units, learning_rate, dropout, freeze_base=False):
        super().__init__()
        self.base_model = torchvision.models.resnet50(pretrained=True)
        
        freezing_layers = [
            self.base_model.conv1,
            self.base_model.bn1,
            self.base_model.layer1,
            self.base_model.layer2,
        ]
        for layer in freezing_layers:
          for param in layer.parameters():
            param.requires_grad = False
    
                
        self.base_model.fc = torch.nn.Linear(in_features=self.base_model.fc.in_features, out_features=500)
        self.dropout = torch.nn.Dropout(p=dropout)
        self.clf = torch.nn.Linear(in_features=500, out_features=output_units)
        
        self.criterion = nn.CrossEntropyLoss()
        self.train_acc = torchmetrics.Accuracy()
        self.val_acc = torchmetrics.Accuracy()

        self.learning_rate = learning_rate
        self.save_hyperparameters()
        
    def forward(self, input_data):
        features = self.base_model(input_data)
        features = self.dropout(features)
        return self.clf(features)

    def training_step(self, batch, batch_nb):
        input_data, targets = batch
        preds = self(input_data)
        loss = self.criterion(preds, targets)
        self.log('train_loss', loss)
        self.train_acc(preds, targets)
        self.log('train_acc', self.train_acc, on_step=True, on_epoch=False, prog_bar=True)
        wandb.log({"train_loss": loss})
        return loss
    
    def validation_step(self, batch, batch_nb):
        input_data, targets = batch
        preds = self(input_data)
        loss = self.criterion(preds, targets)
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True)
        self.val_acc(preds, targets)
        self.log('val_acc', self.val_acc, on_step=False, on_epoch=True, prog_bar=True)

    def test_step(self, batch, batch_nb):
        self.validation_step(batch, batch_nb)
        
    def predict_step(self, batch, batch_nb):
        input_data, targets = batch
        preds = self(input_data)
        return torch.argmax(preds, dim=1)
    
    def configure_optimizers(self):
        return torch.optim.AdamW(self.parameters(), lr=self.learning_rate, weight_decay=0)

In [19]:
def train(config={"learning_rate": 1e-5, "batch_size": 16, "dropout": 0.2}):
    # Initialize a new wandb run
    with wandb.init(job_type="train",config=config) as run :
        # If called by wandb.agent, as below,
        # this config will be set by Sweep Controller
        config = run.config

        # set up W&B logger
        wandb_logger = WandbLogger(experiment=run, log_model=True)

        # setup data
        dm = DistractionDataModule(data_dir,config.batch_size)
        dm.setup()

        # setup model
        model = Model(NUM_CLASSES,config.learning_rate,config.dropout)

        callbacks = [
          pl.callbacks.ModelCheckpoint(monitor='val_acc', dirpath=ckpt_dir, verbose=True, mode='max', filename='resnet50-t3-{val_acc:.4f}'),
          pl.callbacks.EarlyStopping(monitor='val_acc', patience=20, verbose=True, mode='max')
        ]

        gpus = 1 if torch.cuda.is_available() else 0

        # setup Trainer
        trainer = pl.Trainer(
            logger=wandb_logger,    # W&B integration
            gpus=gpus,                # use all GPU's
            max_epochs=2,            # number of epochs
            callbacks=callbacks
            )

        # train
        trainer.fit(model, dm.train_dataloader(), dm.val_dataloader())

In [17]:
sweep_id = wandb.sweep(sweep_config, project="distraction")

Create sweep with ID: abthe5m2
Sweep URL: https://wandb.ai/youssef-mostafa/distraction/sweeps/abthe5m2


In [None]:
wandb.agent(sweep_id, function=train)

[34m[1mwandb[0m: Agent Starting Run: e5zrp4gx with config:
[34m[1mwandb[0m: 	batch_size: 128
[34m[1mwandb[0m: 	dropout: 0.14948090757121912
[34m[1mwandb[0m: 	learning_rate: 0.061384109053975656


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs

  | Name       | Type             | Params
------------------------------------------------
0 | base_model | ResNet           | 24.5 M
1 | dropout    | Dropout          | 0     
2 | clf        | Linear           | 5.0 K 
3 | criterion  | CrossEntropyLoss | 0     
4 | train_acc  | Accuracy         | 0     
5 | val_acc    | Accuracy         | 0     
------------------------------------------------
23.1 M    Trainable params
1.4 M     Non-trainable params
24.5 M    Total params
98.150    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]