<a href="https://colab.research.google.com/github/RaviShah1/COVID-19-Impact-On-Digital-Learning/blob/main/notebooks/try.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
! pip install -q kaggle
! mkdir ~/.kaggle
! cp kaggle.json ~/.kaggle/
! chmod 600 ~/.kaggle/kaggle.json
! kaggle competitions download -c 'cassava-leaf-disease-classification'
! mkdir train
! unzip /content/cassava-leaf-disease-classification.zip -d train

In [2]:
!pip install timm

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.5.4-py3-none-any.whl (431 kB)
[K     |████████████████████████████████| 431 kB 9.9 MB/s 
Installing collected packages: timm
Successfully installed timm-0.5.4


In [4]:
import pandas as pd
import numpy as np
from tqdm import tqdm
import gc
import cv2

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim import AdamW
from torch.optim import lr_scheduler
from torch.utils.data import DataLoader, Dataset
from torch.cuda.amp import autocast, GradScaler

from sklearn.model_selection import StratifiedKFold

from albumentations import (
    Compose, OneOf, Normalize, Resize, RandomCrop, HorizontalFlip, VerticalFlip, 
    RandomBrightness, RandomContrast, RandomBrightnessContrast, Rotate, ShiftScaleRotate, Cutout, 
    IAAAdditiveGaussianNoise, Transpose
    )
from albumentations.pytorch import ToTensor
from albumentations import ImageOnlyTransform

import timm

import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [5]:
train_df = pd.read_csv("/content/train/train.csv")

In [6]:
kf = StratifiedKFold(n_splits=5)
for f, (t_, v_) in enumerate(kf.split(X=train_df, y=train_df['label'].values)):
    train_df.loc[v_, 'fold'] = f

train = train_df[train_df.fold!=0]
valid = train_df[train_df.fold==0]

In [7]:
class CFG:
    model = "resnext50_32x4d"
    optimizer = "AdamW"
    lr = 1e-5
    weight_decay = 0.1
    betas = (0.9, 0.999)
    epochs = 3

In [8]:
class LeafDataset(Dataset):
    def __init__(self, df, transform):
        self.files = list(df["image_id"])
        self.labels = list(df["label"])
        self.transform = transform

    def __len__(self):
        return len(self.files)

    def __getitem__(self, idx):
        file_name = self.files[idx]
        file_path = f"/content/train/train_images/{file_name}"
        image = cv2.imread(file_path)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        label = torch.tensor(self.labels[idx]).long()
        return {
            "image" : image,
            "label" : label
        }

In [9]:
train_transforms = Compose([
                        Resize(256, 256),
                        Transpose(p=0.5),
                        HorizontalFlip(p=0.5),
                        VerticalFlip(p=0.5),
                        ShiftScaleRotate(p=0.5),
                        Normalize(
                        mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225],
                        ),
                        ToTensor(),
                    ])

valid_transforms = Compose([
                        Resize(256, 256),
                        Normalize(
                        mean=[0.485, 0.456, 0.406],
                        std=[0.229, 0.224, 0.225],
                        ),
                        ToTensor(),
                    ])

In [10]:
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

def get_optimizer(model, lr, weight_decay, betas):
    param_optimizer = list(model.named_parameters())
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
    optimizer_grouped_parameters = [
        {'params': [p for n, p in param_optimizer if not any(
            nd in n for nd in no_decay)], 'weight_decay': weight_decay},
        {'params': [p for n, p in param_optimizer if any(
            nd in n for nd in no_decay)], 'weight_decay': 0.0}
    ]
    optimizer = AdamW(optimizer_grouped_parameters,
                      lr=lr,
                      weight_decay=weight_decay,
                      betas=betas,
                     )
    return optimizer

def get_scheduler(optimizer, T_max=300):
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer,
                                                           T_max=T_max)
    return scheduler

In [11]:
class LeafModel(nn.Module):
    def __init__(self, model_name='resnext50_32x4d', pretrained=True):
        super().__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        n_features = self.model.fc.in_features
        self.model.fc = nn.Linear(n_features, 5)

    def forward(self, x):
        x = self.model(x)
        return x

In [12]:
class AverageMeter(object):
    """Computes and stores the average and current value"""
    def __init__(self):
        self.reset()
    def reset(self):
        self.val = 0
        self.avg = 0
        self.sum = 0
        self.count = 0
    def update(self, val, n=1):
        self.val = val
        self.sum += val * n
        self.count += n
        self.avg = self.sum / self.count

In [25]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    
    losses = AverageMeter()
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        image = data['image'].to(device)
        image.type(torch.FloatTensor)
        targets = data['label'].to(device, dtype=torch.long)
        
        batch_size = image.size(0)

        outputs = model(image)

        loss = criterion(outputs, targets)
        losses.update(loss.item(), outputs.size(0))
        loss.backward()
    
        optimizer.step()
        optimizer.zero_grad()

        if scheduler is not None:
            scheduler.step()
        
        bar.set_postfix(Epoch=epoch, Train_Loss=losses.avg,
                        LR=optimizer.param_groups[0]['lr'])
    gc.collect()
    
    return losses.avg

In [26]:
@torch.no_grad()
def valid_one_epoch(model, optimizer, dataloader, device, epoch):
    model.eval()
    
    losses = AverageMeter()
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        image = data['image'].to(device)
        image.type(torch.FloatTensor)
        targets = data['label'].to(device, dtype=torch.long)
        
        batch_size = image.size(0)

        outputs = model(image)
        
        loss = criterion(outputs, targets)
        losses.update(loss.item(), outputs.size(0))
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=losses.avg,
                        LR=optimizer.param_groups[0]['lr'])   
    
    gc.collect()
    
    return losses.avg

In [31]:
def train_one_fold(model, 
                   optimizer, 
                   scheduler, 
                   train_loader, 
                   valid_loader, 
                   fold):
    
    best_epoch_loss = np.inf
    model.to(device)

    for epoch in range(CFG.epochs):
        gc.collect()
        train_epoch_loss = train_one_epoch(model, 
                                           optimizer, 
                                           scheduler, 
                                           dataloader=train_loader, 
                                           device=device, 
                                           epoch=epoch)

        val_epoch_loss = valid_one_epoch(model,
                                         optimizer, 
                                         valid_loader, 
                                         device=device, epoch=epoch)
        
        if val_epoch_loss <= best_epoch_loss:
            print(f"Validation Loss Improved ({best_epoch_loss} ---> {val_epoch_loss})")
            best_epoch_loss = val_epoch_loss
            
    print("Best Loss: {:.4f}".format(best_epoch_loss))

In [32]:
train_ds = LeafDataset(train, train_transforms)
valid_ds = LeafDataset(valid, valid_transforms)

train_loader = DataLoader(train_ds, batch_size=16, shuffle=True)
valid_loader = DataLoader(valid_ds, batch_size=32, shuffle=False)

In [33]:
model = LeafModel()
optimizer = get_optimizer(model, CFG.lr, CFG.weight_decay, CFG.betas)
scheduler = get_scheduler(optimizer) 

In [34]:
train_one_fold(model, optimizer, scheduler, train_loader, valid_loader, f)

100%|██████████| 1070/1070 [06:34<00:00,  2.71it/s, Epoch=0, LR=6.04e-6, Train_Loss=1.33]
100%|██████████| 134/134 [00:56<00:00,  2.39it/s, Epoch=0, LR=6.04e-6, Valid_Loss=1.14]


Validation Loss Improved (inf ---> 1.144788203284005)


100%|██████████| 1070/1070 [06:12<00:00,  2.87it/s, Epoch=1, LR=4.32e-7, Train_Loss=1.03]
100%|██████████| 134/134 [00:54<00:00,  2.44it/s, Epoch=1, LR=4.32e-7, Valid_Loss=0.97]


Validation Loss Improved (1.144788203284005 ---> 0.9696341545782357)


100%|██████████| 1070/1070 [06:12<00:00,  2.87it/s, Epoch=2, LR=2.06e-6, Train_Loss=0.932]
100%|██████████| 134/134 [00:54<00:00,  2.44it/s, Epoch=2, LR=2.06e-6, Valid_Loss=0.881]

Validation Loss Improved (0.9696341545782357 ---> 0.8807327479959648)
Best Loss: 0.8807





In [35]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

Mon Jun 20 02:14:46 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   43C    P0    34W / 250W |   4225MiB / 16280MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces