In [1]:
import glob
import sys
import os
import time
import random
import math

# DATALOADER
import cv2
from PIL import Image
import numpy as np
import albumentations as A
import torchvision.transforms as T
from PIL import Image
import pandas as pd

# BUILDING MODEL
import torch
import torch as th
import torch.nn as nn
import torch.nn.functional as F

# TRAINING
from torch.utils.data import DataLoader, Dataset
from tqdm import tqdm_notebook as tqdm

# OTHER STUFF
import timm
from transformers import (get_linear_schedule_with_warmup, 
                          get_constant_schedule,
                          get_cosine_schedule_with_warmup, 
                          get_cosine_with_hard_restarts_schedule_with_warmup,
                          get_constant_schedule_with_warmup)
import gc
import transformers
from transformers import CLIPProcessor, CLIPVisionModel,  CLIPVisionConfig
from pytorch_metric_learning import losses
import open_clip
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import PIL


# UTILS
import utilities

%load_ext autoreload
%autoreload 2

2023-09-20 21:16:25.591691: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
PIL.Image.MAX_IMAGE_PIXELS = 108000001

In [3]:
torch.cuda.is_available()

True

In [4]:
open_clip.list_pretrained()

[('RN50', 'openai'),
 ('RN50', 'yfcc15m'),
 ('RN50', 'cc12m'),
 ('RN50-quickgelu', 'openai'),
 ('RN50-quickgelu', 'yfcc15m'),
 ('RN50-quickgelu', 'cc12m'),
 ('RN101', 'openai'),
 ('RN101', 'yfcc15m'),
 ('RN101-quickgelu', 'openai'),
 ('RN101-quickgelu', 'yfcc15m'),
 ('RN50x4', 'openai'),
 ('RN50x16', 'openai'),
 ('RN50x64', 'openai'),
 ('ViT-B-32', 'openai'),
 ('ViT-B-32', 'laion400m_e31'),
 ('ViT-B-32', 'laion400m_e32'),
 ('ViT-B-32', 'laion2b_e16'),
 ('ViT-B-32', 'laion2b_s34b_b79k'),
 ('ViT-B-32', 'datacomp_m_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_clip_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_laion_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_image_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_text_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_basic_s128m_b4k'),
 ('ViT-B-32', 'commonpool_m_s128m_b4k'),
 ('ViT-B-32', 'datacomp_s_s13m_b4k'),
 ('ViT-B-32', 'commonpool_s_clip_s13m_b4k'),
 ('ViT-B-32', 'commonpool_s_laion_s13m_b4k'),
 ('ViT-B-32', 'commonpool_s_image_s13m_b4k'),
 ('ViT-B-32', 'commo

In [5]:
class CFG:
    model_name = 'ViT-L-14' 
    model_data = 'datacomp_xl_s13b_b90k'
    samples_per_class = 5
    n_classes = 6
    min_samples = 1
    image_size = 224 
    hidden_layer = 768
    seed = 5
    workers = 12
    train_batch_size = 16
    valid_batch_size = 32 
    emb_size = 768
    vit_bb_lr = {"8": 1.25e-6, "16": 2.5e-6, "20": 5e-6, "24": 10e-6}
    vit_bb_wd = 1e-3
    hd_lr = 3e-4
    hd_wd = 1e-5
    autocast = True
    n_warmup_steps = 1000
    n_epochs = 20
    device = torch.device('cuda')
    s=30.
    m=.45
    m_min=.05
    acc_steps = 1
    global_step = 0
    crit = 'ce'

In [6]:
CFG.device

device(type='cuda')

In [7]:
utilities.set_seed(CFG.seed)

## Dataset

In [8]:
img_dir = "../../data_round_2/final"
annotations_csv = "../../data_round_2/phase2_train_v0.csv"


labels = [
    "albopictus",
    "culex",
    "japonicus/koreicus",
    "culiseta",
    "anopheles",
    "aegypti"
]

In [9]:
# used for training
training_samples = []
validation_samples = []
values_counts = []
validation_values_counts = []
num_classes = 6


annotations_df = pd.read_csv(annotations_csv)
train_df, val_df = train_test_split(
    annotations_df,
    test_size=0.2,
    stratify=annotations_df["class_label"],
    random_state=200,
)

for _, row in train_df.iterrows():
    img_path = row['img_fName']
    class_label = row['class_label']
    training_samples.append((labels.index(class_label), os.path.join(img_dir, img_path)))

name_counts = train_df['class_label'].value_counts()
values_counts = [name_counts[label] for label in labels]


for _, row in val_df.iterrows():
    img_path = row['img_fName']
    class_label = row['class_label']
    validation_samples.append((labels.index(class_label), os.path.join(img_dir, img_path)))

name_counts = val_df['class_label'].value_counts()
validation_values_counts = [name_counts[label] for label in labels]


In [10]:
data_train = training_samples 
data_val = validation_samples
value_counts = np.array(values_counts)
CFG.n_classes = num_classes

In [11]:
len(data_train), len(data_val), CFG.n_classes 

(8285, 2072, 6)

## CLIP Model

In [12]:
class Head(nn.Module):
    def __init__(self, hidden_size, k=3):
        super(Head, self).__init__()
        self.emb = nn.Linear(hidden_size, CFG.emb_size, bias=False)
        self.dropout = utilities.Multisample_Dropout()
        self.arc = utilities.ArcMarginProduct_subcenter(CFG.emb_size, CFG.n_classes, k)
        
    def forward(self, x):
        embeddings = self.dropout(x, self.emb)
        output = self.arc(embeddings)
        return output, F.normalize(embeddings)
    
class HeadV2(nn.Module):
    def __init__(self, hidden_size, k=3):
        super(HeadV2, self).__init__()
        self.arc = utilities.ArcMarginProduct_subcenter(hidden_size, CFG.n_classes, k)
        
    def forward(self, x):
        output = self.arc(x)
        return output, F.normalize(x)
    
class HeadV3(nn.Module):
    def __init__(self, hidden_size, k=3):
        super(HeadV3, self).__init__()        
        self.emb = nn.Linear(hidden_size, CFG.emb_size, bias=False)
        self.dropout = nn.Dropout1d(0.2)
        self.arc = utilities.ArcMarginProduct_subcenter(CFG.emb_size, CFG.n_classes, k)
        
    def forward(self, x):
        x = self.dropout(x)
        x = self.emb(x)
        output = self.arc(x)
        return output, F.normalize(x)

In [13]:
class Model(nn.Module):
    def __init__(self, vit_backbone, head_size, version='v1', k=3):
        super(Model, self).__init__()
        if version == 'v1':
            self.head = Head(head_size, k)
        elif version == 'v2':
            self.head = HeadV2(head_size, k)
        elif version == 'v3':
            self.head = HeadV3(head_size, k)
        else:
            self.head = Head(head_size, k)
        
        self.encoder = vit_backbone.visual
    def forward(self, x):
        x = self.encoder(x)
        return self.head(x)

    def get_parameters(self):

        parameter_settings = [] 
        parameter_settings.extend(
            self.get_parameter_section(
                [(n, p) for n, p in self.encoder.named_parameters()], 
                lr=CFG.vit_bb_lr, 
                wd=CFG.vit_bb_wd
            )
        ) 

        parameter_settings.extend(
            self.get_parameter_section(
                [(n, p) for n, p in self.head.named_parameters()], 
                lr=CFG.hd_lr, 
                wd=CFG.hd_wd
            )
        ) 

        return parameter_settings

    def get_parameter_section(self, parameters, lr=None, wd=None): 
        parameter_settings = []


        lr_is_dict = isinstance(lr, dict)
        wd_is_dict = isinstance(wd, dict)

        layer_no = None
        for no, (n,p) in enumerate(parameters):
            
            for split in n.split('.'):
                if split.isnumeric():
                    layer_no = int(split)
            
            if not layer_no:
                layer_no = 0
            
            if lr_is_dict:
                for k,v in lr.items():
                    if layer_no < int(k):
                        temp_lr = v
                        break
            else:
                temp_lr = lr

            if wd_is_dict:
                for k,v in wd.items():
                    if layer_no < int(k):
                        temp_wd = v
                        break
            else:
                temp_wd = wd

            weight_decay = 0.0 if 'bias' in n else temp_wd

            parameter_setting = {"params" : p, "lr" : temp_lr, "weight_decay" : temp_wd}

            parameter_settings.append(parameter_setting)

            #print(f'no {no} | params {n} | lr {temp_lr} | weight_decay {weight_decay} | requires_grad {p.requires_grad}')

        return parameter_settings

In [14]:
def ArcFace_criterion(logits_m, target, margins):
    arc = utilities.ArcFaceLossAdaptiveMargin(margins=margins, s=CFG.s, crit=CFG.crit)
    loss_m = arc(logits_m, target, CFG.n_classes)
    return loss_m

## Training and Validation

In [15]:
def train(model, train_loader, optimizer, scaler, scheduler, epoch):
    model.train()
    loss_metrics = utilities.AverageMeter()
    criterion = ArcFace_criterion

    tmp = np.sqrt(1 / np.sqrt(value_counts))
    margins = (tmp - tmp.min()) / (tmp.max() - tmp.min()) * CFG.m + CFG.m_min
        
    bar = tqdm(train_loader)
    for step, data in enumerate(bar):
        step += 1
        images = data['images'].to(CFG.device, dtype=torch.float)
        labels = data['labels'].to(CFG.device)
        batch_size = labels.size(0)

        with torch.cuda.amp.autocast(enabled=CFG.autocast):
            outputs, _ = model(images)

        loss = criterion(outputs, labels, margins)
        loss_metrics.update(loss.item(), batch_size)
        loss = loss / CFG.acc_steps
        scaler.scale(loss).backward()

        if step % CFG.acc_steps == 0 or step == len(bar):
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()
            scheduler.step()
            CFG.global_step += 1
                        
        lrs = utilities.get_lr_groups(optimizer.param_groups)

        loss_avg = loss_metrics.avg

        bar.set_postfix(loss=loss_avg, epoch=epoch, lrs=lrs, step=CFG.global_step)
    
@torch.no_grad()
def val(model, valid_loader):
    model.eval() 

    all_outputs = []
    all_labels = [] 

    for data in tqdm(valid_loader):
        images = data['images'].to(CFG.device, dtype=torch.float)
        labels = data['labels'].to(CFG.device)

        outputs, _ = model(images)

        all_outputs.append(outputs.argmax(dim=1).detach().cpu().numpy())
        all_labels.append(labels.detach().cpu().numpy())


    all_outputs = np.concatenate(all_outputs, axis=0)
    all_labels = np.concatenate(all_labels, axis=0)

    return all_outputs, all_labels

def training(train_loader, 
             val_loader, 
             experiment_folder, 
             version='v1', 
             k=3, 
             use_rampup=True):
    
    os.makedirs(experiment_folder, exist_ok=True)
    
    backbone, _, _ = open_clip.create_model_and_transforms(CFG.model_name, CFG.model_data)

    model = Model(backbone, CFG.hidden_layer, version, k).to(CFG.device)
    
    optimizer = torch.optim.AdamW(model.get_parameters())
 
    scaler = torch.cuda.amp.GradScaler(enabled=CFG.autocast)
    
    if use_rampup:
        scheduler = get_linear_schedule_with_warmup(optimizer,
                                                    num_training_steps=12800,
                                                    num_warmup_steps=CFG.n_warmup_steps)  
    else:
        scheduler = get_constant_schedule(optimizer)
        
    best_score = 0
    best_updated_ = 0
    CFG.global_step = 0                   
    for epoch in range(math.ceil(CFG.n_epochs)):
        print(f'starting epoch {epoch}')

        train(model, train_loader, optimizer, scaler, scheduler, epoch)

        val_preds, val_labels = val(model, val_loader)
        print('validation step is finished')
        # idk why it is needed
        gc.collect()
        torch.cuda.empty_cache() 
        print('calcualting the f1 score')
        score = f1_score(val_labels, val_preds, average='macro')

        print('validation score', score)


        if score > best_score:
            best_updated_ = 0
            best_score = score
            torch.save({
                    'model_state_dict': model.state_dict(),
                }, f'{experiment_folder}/model_epoch_{epoch+1}_f1_score_{score:.2f}.pt')

        
        # early stopping
        if best_updated_ > 3:
            print('no improvement done training....')
            break


        best_updated_ += 1

            

## Data Loaders for Training and Validation

In [16]:
# data loader

def read_img(img_path, is_gray=False):
    mode = cv2.IMREAD_COLOR if not is_gray else cv2.IMREAD_GRAYSCALE
    img = cv2.imread(img_path, mode)
    if not is_gray:
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    return img

def get_final_transform():  
    final_transform = T.Compose([
            T.Resize(
                size=(CFG.image_size, CFG.image_size), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True),
            T.ToTensor(), 
            T.Normalize(
                mean=(0.48145466, 0.4578275, 0.40821073), 
                std=(0.26862954, 0.26130258, 0.27577711)
            )
        ])
    return final_transform

class ProductDataset(Dataset):
    def __init__(self, 
                 data, 
                 transform=None, 
                 final_transform=None):
        self.data = data
        self.transform = transform
        self.final_transform = final_transform
            
    def __len__(self):
        return len(self.data)
    
    def __getitem__(self, idx):
       
        img = read_img(self.data[idx][1])            
        
        if self.transform is not None:
            if isinstance(self.transform, A.Compose):
                img = self.transform(image=img)['image']
            else:
                img = self.transform(img)
        
        if self.final_transform is not None:
            if isinstance(img, np.ndarray):
                img =  Image.fromarray(img)
            img = self.final_transform(img)
            
        product_id = self.data[idx][0]
        return {"images": img, "labels": product_id}
    
def get_train_dataloader(data_train, data_aug='image_net'):
    
    transform = None
    if data_aug == 'image_net':
        transform = T.Compose([
            T.ToPILImage(),
            T.AutoAugment(T.AutoAugmentPolicy.IMAGENET)
        ])
        
    elif data_aug == 'aug_mix':
        transform = T.Compose([
            T.ToPILImage(),
            T.AugMix()
        ])
    elif data_aug == 'happy_whale':
        aug8p3 = A.OneOf([
            A.Sharpen(p=0.3),
            A.ToGray(p=0.3),
            A.CLAHE(p=0.3),
        ], p=0.5)

        transform = A.Compose([
            A.ShiftScaleRotate(rotate_limit=15, scale_limit=0.1, border_mode=cv2.BORDER_REFLECT, p=0.5),
            A.Resize(CFG.image_size, CFG.image_size),
            aug8p3,
            A.HorizontalFlip(p=0.5),
            A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1)
        ])
    
    elif data_aug == 'cut_out':        
        transform = A.Compose([
            A.HorizontalFlip(p=0.5),
            A.ImageCompression(quality_lower=99, quality_upper=100),
            A.ShiftScaleRotate(shift_limit=0.2, scale_limit=0.2, rotate_limit=10, border_mode=0, p=0.7),
            A.Resize(CFG.image_size, CFG.image_size),
            A.Cutout(max_h_size=int(CFG.image_size * 0.4), 
                     max_w_size=int(CFG.image_size * 0.4), 
                     num_holes=1, p=0.5),
        ])
    elif data_aug == "hca":
        aug8p3 = A.OneOf(
            [
                A.Sharpen(p=0.3),
                A.ToGray(p=0.3),
                A.CLAHE(p=0.3),
            ],
            p=0.5,
        )

        blur = A.OneOf(
            [
                A.GaussianBlur(p=0.3),
                A.MotionBlur(p=0.3),
            ],
            p=0.5,
        )

        transform = A.Compose(
            [
                A.ShiftScaleRotate(
                    rotate_limit=45,
                    scale_limit=0.1,
                    border_mode=cv2.BORDER_REFLECT,
                    interpolation=cv2.INTER_CUBIC,
                    p=0.5,
                ),
                A.Resize(224, 224, cv2.INTER_CUBIC),
                aug8p3,
                blur,
                A.HorizontalFlip(p=0.5),
                A.VerticalFlip(p=0.5),
                A.ElasticTransform(p=0.5),
                A.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1),
            ]
        )
    elif data_aug == 'clip':
        transform = T.Compose([
            T.ToPILImage(),
            T.RandomResizedCrop(
                size=(224, 224), 
                scale=(0.9, 1.0), 
                ratio=(0.75, 1.3333), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True
            )
        ])
    elif data_aug == 'clip+image_net':
        transform = T.Compose([
            T.ToPILImage(),
            T.AutoAugment(T.AutoAugmentPolicy.IMAGENET),
            T.RandomResizedCrop(
                size=(224, 224), 
                scale=(0.9, 1.0), 
                ratio=(0.75, 1.3333), 
                interpolation=T.InterpolationMode.BICUBIC,
                antialias=True
            )
        ])
    
    final_transform = get_final_transform()
    train_dataset = ProductDataset(data_train, 
                                   transform, 
                                   final_transform)
    train_loader = DataLoader(train_dataset, 
                              batch_size = CFG.train_batch_size, 
                              num_workers=CFG.workers, 
                              shuffle=True, 
                              drop_last=True)
    print(f'Training Data -> Dataset Length ({len(train_dataset)})')
    return train_loader

def get_val_dataloader(data_val):
    final_transform = get_final_transform()
    val_dataset = ProductDataset(data_val, None, final_transform)
    val_loader = DataLoader(val_dataset, 
                              batch_size = CFG.valid_batch_size, 
                              num_workers=CFG.workers, 
                              shuffle=False, 
                              drop_last=False)
    print(f'Validation Data -> Dataset Length ({len(val_dataset)})')
    return val_loader

## Training

In [17]:
k = 3  
version = 'v2'
data_aug = 'hca'
train_loader = get_train_dataloader(data_train, data_aug)
val_loader = get_val_dataloader(data_val)
experiment_folder = f'my_experiments/{CFG.model_name}-{CFG.model_data}-{str(data_aug)}-{str(version)}-mosquito-All-Epoch({str(CFG.n_epochs)})'
training(train_loader, 
         val_loader, 
         experiment_folder, 
         version=version,
         k=k)
# idk why it is needed
gc.collect()
torch.cuda.empty_cache() 

Training Data -> Dataset Length (8285)
Validation Data -> Dataset Length (2072)
starting epoch 0


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.3496760828508318
starting epoch 1


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.4794597692335812
starting epoch 2


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.479115605454078
starting epoch 3


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.5058643072369026
starting epoch 4


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.5191241824331815
starting epoch 5


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.5101830050233198
starting epoch 6


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.5976729423894495
starting epoch 7


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.5768423361712025
starting epoch 8


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6316959660564053
starting epoch 9


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6331101211797913
starting epoch 10


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6195044085416291
starting epoch 11


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6307186691561179
starting epoch 12


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6030090882136939
starting epoch 13


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6382454783496475
starting epoch 14


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6265142315413926
starting epoch 15


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6222702756889728
starting epoch 16


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6358672721535621
starting epoch 17


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6408725267115699
starting epoch 18


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.6341053980708694
starting epoch 19


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  bar = tqdm(train_loader)


  0%|          | 0/517 [00:00<?, ?it/s]

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for data in tqdm(valid_loader):


  0%|          | 0/65 [00:00<?, ?it/s]

validation step is finished
calcualting the f1 score
validation score 0.644091046400568
