In [1]:
!pip install timm

Collecting timm
  Downloading timm-0.6.7-py3-none-any.whl (509 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m510.0/510.0 kB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: timm
Successfully installed timm-0.6.7
[0m

In [2]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import torch
from torch.utils.data import Dataset,DataLoader
import torch.nn as nn
import torch.nn.functional as F
import os
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedGroupKFold
import timm
from tqdm import tqdm
import albumentations as A
from collections import defaultdict
from albumentations.pytorch import ToTensorV2
from sklearn.metrics import roc_auc_score,f1_score
from torch.cuda import amp
import torch.optim as optim
from torch.optim import lr_scheduler
import time
import gc
import copy

In [3]:
class CFG:
    seed          = 42
    debug         = False # set debug=False for Full Training
    model         =  'vit_tiny_patch16_224'
    batch_size    = 16
    img_size      = [224, 224]
    epochs        = 5
    lr            = 1e-2
    scheduler     = 'CosineAnnealingLR'
    min_lr        = 1e-6
    T_max         = int(30000/batch_size*epochs)+50
    T_0           = 25
    warmup_epochs = 0
    wd            = 1e-6
    n_accumulate  = 1
    n_fold        = 5
    folds         = [0]
    device        = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [4]:
df = pd.read_csv('../input/ultra-sound-covid-detection/covid_data.csv')

def correct_path(idx):
    path = '../input/ultra-sound-covid-detection/data/'+idx[6::]
    return path

df['path'] = df['path'].apply(correct_path)

In [5]:
df['fold'] = -1
skf = StratifiedGroupKFold(n_splits=5,shuffle=True,random_state=42)
for fold,(train_idx,test_idx) in enumerate(skf.split(df['path'],df['label'],groups=df['video_id'])):
    df.loc[test_idx,'fold'] = fold

In [6]:
df

Unnamed: 0,path,label,video_id,fold
0,../input/ultra-sound-covid-detection/data//Vid...,1,Video_0,1
1,../input/ultra-sound-covid-detection/data//Vid...,1,Video_0,1
2,../input/ultra-sound-covid-detection/data//Vid...,1,Video_0,1
3,../input/ultra-sound-covid-detection/data//Vid...,1,Video_0,1
4,../input/ultra-sound-covid-detection/data//Vid...,1,Video_0,1
...,...,...,...,...
19969,../input/ultra-sound-covid-detection/data//Vid...,1,Video_131,1
19970,../input/ultra-sound-covid-detection/data//Vid...,1,Video_131,1
19971,../input/ultra-sound-covid-detection/data//Vid...,1,Video_131,1
19972,../input/ultra-sound-covid-detection/data//Vid...,1,Video_131,1


In [7]:
class CovidDataset(Dataset):    
    def __init__(self,df,transforms=None,is_valid=False):
        self.df = df
        self.transforms = transforms
        self.is_valid = is_valid
    def __len__(self):
        return len(self.df)
    
    def __getitem__(self,idx):
        img = cv2.imread(self.df['path'].iloc[idx])
        label = self.df['label'].iloc[idx]
        
        if self.transforms:
            img = self.transforms(image=img)['image']
        img = torch.tensor(img,dtype=torch.float32)
        label = torch.tensor(label)
        
        if self.is_valid:
            return img,label,self.df['video_id'].iloc[idx]
        return img,label

In [8]:
def get_transforms(data):
    
    if data == 'train':
        return A.Compose([
            A.Resize(*CFG.img_size,interpolation=cv2.INTER_NEAREST),
            ToTensorV2(),
        ],p=1.0)

    elif data == 'valid':
        return A.Compose([
            A.Resize(*CFG.img_size,interpolation=cv2.INTER_NEAREST),
            ToTensorV2(),
        ],p=1.0)

In [9]:
class BaseModel(nn.Module):
    def __init__(self,cfg,pretrained=False):
        super().__init__()
        self.cfg = cfg
        self.model = timm.create_model(cfg.model,pretrained=pretrained)
        self.model.head = nn.Linear(self.model.head.in_features,1)
        
    def forward(self,x):
        output = self.model(x)
        return output

In [10]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train()
    scaler = amp.GradScaler()
    
    dataset_size = 0
    running_loss = 0.0
    epoch_loss = 0
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Train ')
    for step, (images, labels) in pbar:         
        images = images.to(device, dtype=torch.float)
        labels  = labels.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        
        with amp.autocast(enabled=True):
            y_pred = model(images)
            loss   = criterion(y_pred.view(-1), labels)
            loss   = loss / CFG.n_accumulate
            
        scaler.scale(loss).backward()
    
        if (step + 1) % CFG.n_accumulate == 0:
            scaler.step(optimizer)
            scaler.update()
            # zero the parameter gradients
            optimizer.zero_grad()
            if scheduler is not None:
                scheduler.step()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        #print(epoch_loss)
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(train_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_mem=f'{mem:0.2f} GB')
        torch.cuda.empty_cache()
        gc.collect()
    
    return epoch_loss

In [11]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch,df,fold):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    val_scores = []
    video_ids_list = list(df[df['fold'] == fold]['video_id'].unique())
    video_ids_score = {}
    video_ids_count = {}
    for i in video_ids_list:
        video_ids_score[i] = 0
        
    for i in video_ids_list:
        df_1 = df[df['fold'] == fold].copy()
        video_ids_count[i] = len(df_1[df['video_id']==i])
        
    pbar = tqdm(enumerate(dataloader), total=len(dataloader), desc='Valid ')
    for step, (images, labels, video_ids) in pbar:        
        images  = images.to(device, dtype=torch.float)
        labels   = labels.to(device, dtype=torch.float)
        
        batch_size = images.size(0)
        
        y_pred  = model(images)
        loss    = criterion(y_pred.view(-1), labels)
       
            
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        y_pred = nn.Sigmoid()(y_pred)
        
        for i in range(len(video_ids)):
            video_ids_score[video_ids[i]] += y_pred[i].cpu().detach().numpy()
            
        #val_dice = dice_coef(masks, y_pred).cpu().detach().numpy()
        #val_jaccard = iou_coef(masks, y_pred).cpu().detach().numpy()
        #val_scores.append([val_dice, val_jaccard])
        
        mem = torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0
        current_lr = optimizer.param_groups[0]['lr']
        pbar.set_postfix(valid_loss=f'{epoch_loss:0.4f}',
                        lr=f'{current_lr:0.5f}',
                        gpu_memory=f'{mem:0.2f} GB')
    scores = []
    labels = []
    for ids in video_ids_score:
        video_ids_score[ids] = video_ids_score[ids]/video_ids_count[ids]
        
    for ids in video_ids_score:
        labels.append(df[df['video_id']==ids]['label'].iloc[0])
        scores.append(float(video_ids_score[ids]))
    scores = np.array(scores)
    scores = scores > 0.5
    scores = list(scores)
    val_scores  = f1_score(labels,scores)
    torch.cuda.empty_cache()
    gc.collect()
    
    return epoch_loss, val_scores

In [12]:
def run_training(model, optimizer, scheduler, device, num_epochs,df,fold):
    
    if torch.cuda.is_available():
        print("cuda: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_score      = -np.inf
    best_epoch     = -1
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        print(f'Epoch {epoch}/{num_epochs}', end='')
        train_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader,
                                     device=CFG.device, epoch=epoch)
        val_loss, val_scores = valid_one_epoch(model, valid_loader, 
                                                 device=CFG.device, 
                                                 epoch=epoch,df=df,fold=fold)
    
        history['Train Loss'].append(train_loss)
        history['Valid Loss'].append(val_loss)
        history['Valid Score'].append(val_scores)
        
        
        
        print(f'Valid Score: {val_scores:0.4f}')
        
        # deep copy the model
        if val_scores >= best_score:
            print(f"Valid Score Improved ({best_score:0.4f} ---> {val_scores:0.4f})")
            best_score    = val_scores
            best_epoch   = epoch
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"best_epoch-{fold:02d}.bin"
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            print(f"Model Saved")
            
        last_model_wts = copy.deepcopy(model.state_dict())
        PATH = f"last_epoch-{fold:02d}.bin"
        torch.save(model.state_dict(), PATH)
            
        print(); print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best Score: {:.4f}".format(best_score))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [13]:
def fetch_scheduler(optimizer):
    if CFG.scheduler == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CFG.T_max, 
                                                   eta_min=CFG.min_lr)
    elif CFG.scheduler == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CFG.T_0, 
                                                             eta_min=CFG.min_lr)
    elif CFG.scheduler == 'ReduceLROnPlateau':
        scheduler = lr_scheduler.ReduceLROnPlateau(optimizer,
                                                   mode='min',
                                                   factor=0.1,
                                                   patience=7,
                                                   threshold=0.0001,
                                                   min_lr=CFG.min_lr,)
    elif CFG.scheduer == 'ExponentialLR':
        scheduler = lr_scheduler.ExponentialLR(optimizer, gamma=0.85)
    elif CFG.scheduler == None:
        return None
        
    return scheduler

In [14]:
criterion = nn.BCEWithLogitsLoss()

In [15]:
for fold in CFG.folds:
    print(f'#'*15)
    print(f'### Fold: {fold}')
    print(f'#'*15)
    train_df = df[df['fold'] !=fold].reset_index(drop=True)
    valid_df = df[df['fold'] ==fold].reset_index(drop=True)
    
    if CFG.debug:
        train_df = train_df.head()
        valid_df = valid_df.head()
    
    train_dataset = CovidDataset(train_df,transforms=get_transforms(data='train'),is_valid=False)
    valid_dataset = CovidDataset(valid_df,transforms=get_transforms(data='valid'),is_valid=True)
    
    train_loader = DataLoader(train_dataset,batch_size=CFG.batch_size,shuffle=True,num_workers=2,drop_last=True)
    valid_loader = DataLoader(valid_dataset,batch_size=CFG.batch_size,shuffle=False,num_workers=2,drop_last=False)
    
    cfg = CFG()
    
    model     = BaseModel(cfg,pretrained=True)
    model.to(CFG.device)
    optimizer = optim.Adam(model.parameters(), lr=CFG.lr, weight_decay=CFG.wd)
    scheduler = fetch_scheduler(optimizer)
    model, history = run_training(model, optimizer, scheduler,
                                  device=CFG.device,
                                  num_epochs=CFG.epochs,
                                 df=df,
                                 fold=fold)

###############
### Fold: 0
###############
cuda: Tesla P100-PCIE-16GB

Epoch 1/5

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Train : 100%|██████████| 958/958 [04:10<00:00,  3.83it/s, gpu_mem=0.59 GB, lr=0.00975, train_loss=nan]
  app.launch_new_instance()
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Valid : 100%|██████████| 290/290 [00:24<00:00, 11.65it/s, gpu_memory=0.18 GB, lr=0.00975, valid_loss=0.2310]


Valid Score: 0.7692
Valid Score Improved (-inf ---> 0.7692)
Model Saved


Epoch 2/5

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Train : 100%|██████████| 958/958 [03:43<00:00,  4.29it/s, gpu_mem=0.61 GB, lr=0.00901, train_loss=nan]
  app.launch_new_instance()
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Valid : 100%|██████████| 290/290 [00:18<00:00, 15.84it/s, gpu_memory=0.20 GB, lr=0.00901, valid_loss=0.2310]


Valid Score: 0.7692
Valid Score Improved (0.7692 ---> 0.7692)
Model Saved


Epoch 3/5

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Train : 100%|██████████| 958/958 [03:41<00:00,  4.32it/s, gpu_mem=0.62 GB, lr=0.00788, train_loss=nan]
  app.launch_new_instance()
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Valid : 100%|██████████| 290/290 [00:17<00:00, 16.18it/s, gpu_memory=0.22 GB, lr=0.00788, valid_loss=0.2310]


Valid Score: 0.7692
Valid Score Improved (0.7692 ---> 0.7692)
Model Saved


Epoch 4/5

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Train : 100%|██████████| 958/958 [03:42<00:00,  4.31it/s, gpu_mem=0.61 GB, lr=0.00645, train_loss=nan]
  app.launch_new_instance()
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Valid : 100%|██████████| 290/290 [00:18<00:00, 15.75it/s, gpu_memory=0.21 GB, lr=0.00645, valid_loss=0.2310]


Valid Score: 0.7692
Valid Score Improved (0.7692 ---> 0.7692)
Model Saved


Epoch 5/5

  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Train : 100%|██████████| 958/958 [03:43<00:00,  4.28it/s, gpu_mem=0.61 GB, lr=0.00487, train_loss=nan]
  app.launch_new_instance()
  from ipykernel import kernelapp as app
  from ipykernel import kernelapp as app
Valid : 100%|██████████| 290/290 [00:18<00:00, 15.81it/s, gpu_memory=0.20 GB, lr=0.00487, valid_loss=0.2310]


Valid Score: 0.7692
Valid Score Improved (0.7692 ---> 0.7692)
Model Saved


Training complete in 0h 20m 43s
Best Score: 0.7692
