# [Pytorch] Birdclef starter

In [1]:
!pip install timm
!pip install --upgrade wandb



In [7]:
!pip install opencv-python

Looking in indexes: http://ftp.daumkakao.com/pypi/simple
Collecting opencv-python
  Downloading http://mirror.kakao.com/pypi/packages/cf/09/b24c266cd61ddeed101b90c92a26f54d060b06f4a1b102eb891576d6e9e2/opencv_python-4.6.0.66-cp36-abi3-win_amd64.whl (35.6 MB)
Installing collected packages: opencv-python
Successfully installed opencv-python-4.6.0.66


You should consider upgrading via the 'c:\users\ftmlab\documents\hyoon\project_new\kaggle\birdclef2022\venv\scripts\python.exe -m pip install --upgrade pip' command.


In [10]:
!pip install timm

Looking in indexes: http://ftp.daumkakao.com/pypi/simple
Collecting timm
  Downloading http://mirror.kakao.com/pypi/packages/49/65/a83208746dc9c0d70feff7874b49780ff110810feb528df4b0ecadcbee60/timm-0.5.4-py3-none-any.whl (431 kB)
Installing collected packages: timm
Successfully installed timm-0.5.4


You should consider upgrading via the 'c:\users\ftmlab\documents\hyoon\project_new\kaggle\birdclef2022\venv\scripts\python.exe -m pip install --upgrade pip' command.


In [11]:
import os
import gc
import cv2
import math
import copy
import time
import random

# For data manipulation
import numpy as np
import pandas as pd

# Pytorch Imports
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp

# Audio 
import torchaudio
from torchaudio.transforms import MelSpectrogram, Resample

# Utils
import joblib
from tqdm import tqdm
from collections import defaultdict

# Sklearn Imports
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import f1_score

# For Image Models
import timm

# For colored terminal text
from colorama import Fore, Back, Style
b_ = Fore.BLUE
sr_ = Style.RESET_ALL

import warnings
warnings.filterwarnings("ignore")

# For descriptive error messages
os.environ['CUDA_LAUNCH_BLOCKING'] = "1"

**import wandb and log in**     
You will need a unique API key to log in to Weights & Biases.

If you don't have a Weights & Biases account, you can go to https://wandb.ai/site and create a FREE account.
Access your API key: https://wandb.ai/authorize.

In [12]:
import wandb

wandb.login()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33mhgyoon0928[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [13]:
CONFIG = {"seed": 2022,
          "epochs": 10,
          "model_name": "tf_efficientnet_b0_ns",
          "embedding_size": 768,
          "num_classes": 152,
          "train_batch_size": 32,
          "valid_batch_size": 64,
          "learning_rate": 1e-4,
          "scheduler": 'CosineAnnealingLR',
          "min_lr": 1e-6,
          "T_max": 500,
          "weight_decay": 1e-6,
          "n_fold": 5,
          "n_accumulate": 1,
          "device": torch.device("cuda:0" if torch.cuda.is_available() else "cpu"),
          "competition": "birdclef2022",
          "_wandb_kernel": "deb",
          # Audio Specific
          "sample_rate": 32_000,
          "max_time": 5,
          "n_mels": 224,
          "n_fft": 1024,
          }

In [14]:
def set_seed(seed=42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG['seed'])

In [15]:
ROOT_DIR = '../input/birdclef-2022'
TRAIN_DIR = '../input/birdclef-2022/train_audio'
TEST_DIR = '../input/birdclef-2022/test_soundscapes'

In [16]:
def get_train_file_path(filename):
    return f"{TRAIN_DIR}/{filename}"

# Read data

In [17]:
df = pd.read_csv(f"{ROOT_DIR}/train_metadata.csv")
df['file_path'] = df['filename'].apply(get_train_file_path)
df.head()

Unnamed: 0,primary_label,secondary_labels,type,latitude,longitude,scientific_name,common_name,author,license,rating,time,url,filename,file_path
0,afrsil1,[],"['call', 'flight call']",12.391,-1.493,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,2.5,08:00,https://www.xeno-canto.org/125458,afrsil1/XC125458.ogg,../input/birdclef-2022/train_audio/afrsil1/XC1...
1,afrsil1,"['houspa', 'redava', 'zebdov']",['call'],19.8801,-155.7254,Euodice cantans,African Silverbill,Dan Lane,Creative Commons Attribution-NonCommercial-Sha...,3.5,08:30,https://www.xeno-canto.org/175522,afrsil1/XC175522.ogg,../input/birdclef-2022/train_audio/afrsil1/XC1...
2,afrsil1,[],"['call', 'song']",16.2901,-16.0321,Euodice cantans,African Silverbill,Bram Piot,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:30,https://www.xeno-canto.org/177993,afrsil1/XC177993.ogg,../input/birdclef-2022/train_audio/afrsil1/XC1...
3,afrsil1,[],"['alarm call', 'call']",17.0922,54.2958,Euodice cantans,African Silverbill,Oscar Campbell,Creative Commons Attribution-NonCommercial-Sha...,4.0,11:00,https://www.xeno-canto.org/205893,afrsil1/XC205893.ogg,../input/birdclef-2022/train_audio/afrsil1/XC2...
4,afrsil1,[],['flight call'],21.4581,-157.7252,Euodice cantans,African Silverbill,Ross Gallardy,Creative Commons Attribution-NonCommercial-Sha...,3.0,16:30,https://www.xeno-canto.org/207431,afrsil1/XC207431.ogg,../input/birdclef-2022/train_audio/afrsil1/XC2...


## Visualize images

In [18]:
run = wandb.init(project=CONFIG['competition'],
                 job_type='Visualization',
                 name='Audio Visualization',
                 )

In [19]:
preview_table = wandb.Table(columns=['Audio', 'Label', 'Rating', 'Time'])

temp_df = df.sample(5000).reset_index(drop=True)

for i in tqdm(range(len(temp_df))):
    row = temp_df.loc[i]
    audio = wandb.Audio(row.file_path, sample_rate=CONFIG['sample_rate'])
    preview_table.add_data(audio,
                           row.primary_label,
                           row.rating,
                           row.time)

wandb.log({'Visualization': preview_table})
run.finish()

100%|██████████| 5000/5000 [00:47<00:00, 105.74it/s]


VBox(children=(Label(value='2009.074 MB of 2009.241 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=0.…

인코더는 사용할 때마다 다르게 인코딩을 할 수 있으므로 재현성을 위하여 저장해둔다.

In [11]:
encoder = LabelEncoder()
df['primary_label'] = encoder.fit_transform(df['primary_label'])

with open('le.pkl', "wb") as fp:
    joblib.dump(encoder, fp)

# Create folds

각 새 종류가 train, val에 적절한 비율로 들어가도록 하기 위해 `StratifiedKFold`를 이용한다.

In [12]:
skf = StratifiedKFold(n_splits=CONFIG['n_fold'])

for fold, ( _, val_) in enumerate(skf.split(X=df, y=df.primary_label)):
      df.loc[val_ , "kfold"] = fold

# Dataset Class

In [13]:
df['file_path']

0        ../input/birdclef-2022/train_audio/afrsil1/XC1...
1        ../input/birdclef-2022/train_audio/afrsil1/XC1...
2        ../input/birdclef-2022/train_audio/afrsil1/XC1...
3        ../input/birdclef-2022/train_audio/afrsil1/XC2...
4        ../input/birdclef-2022/train_audio/afrsil1/XC2...
                               ...                        
14847    ../input/birdclef-2022/train_audio/zebdov/XC62...
14848    ../input/birdclef-2022/train_audio/zebdov/XC64...
14849    ../input/birdclef-2022/train_audio/zebdov/XC66...
14850    ../input/birdclef-2022/train_audio/zebdov/XC66...
14851    ../input/birdclef-2022/train_audio/zebdov/XC66...
Name: file_path, Length: 14852, dtype: object

In [14]:
class BirdCLEFDataset(Dataset):
    def __init__(self, df, target_sample_rate, max_time, image_transforms=None):
        self.file_paths = df['file_path'].values # 오디오 파일 경로
        self.labels = df['primary_label'].values # 오디오 파일의 label(target)
        self.target_sample_rate = target_sample_rate # 모델에 input으로 들어갈 파일의 sampling rate (오디오 파일마다 다를 수 있으므로 모델에 들어가기 전 맞춰준다.)
        num_samples = target_sample_rate * max_time # 샘플의 전체 데이터 포인트 개수 (sr (data / time) * time(time))
        self.num_samples = num_samples 
        self.image_transforms = image_transforms 
        
        
    def __len__(self):
        return len(self.file_paths)
    
    
    def __getitem__(self, index):
        filepath = self.file_paths[index] 
        audio, sample_rate = torchaudio.load(filepath) # torchaudio의 반환 결과 (sample, sample_rate)
        audio = self.to_mono(audio) # 채널 수가 여러 개일 수 있으므로 평균을 통해 하나로 맞춰준다.
        
        if sample_rate != self.target_sample_rate:
            resample = Resample(sample_rate, self.target_sample_rate) # 원본 데이터의 sampling rate가 모델의 것과 맞지 않을 경우 바꾸어준다.
            audio = resample(audio)
        
        if audio.shape[0] > self.num_samples: # 오디오의 샘플 수가 max보다 클 경우 자른다. 
            audio = self.crop_audio(audio)
            
        if audio.shape[0] < self.num_samples: # 오디오가 짧을 경우 padding을 넣어준다.
            audio = self.pad_audio(audio)
            
        mel_spectogram = MelSpectrogram(sample_rate=self.target_sample_rate, # mel스펙트로그램으로 변환
                                        n_mels=CONFIG['n_mels'], 
                                        n_fft=CONFIG['n_fft'])
        mel = mel_spectogram(audio)
        label = torch.tensor(self.labels[index])
        
        # Convert to Image
        image = torch.stack([mel, mel, mel]) # 사용하려는 사전 훈련 모델의 input차원이 3차원이므로 차원수를 3개로 늘린다.
        
        # Normalize Image
        max_val = torch.abs(image).max() # 각 샘플의 max값을 나눠줌으로써 normalize
        image = image / max_val 
        
        return {
            "image": image, 
            "label": label
        }
    
    
    def pad_audio(self, audio):
        """_summary_
        길이가 짧은 오디오 샘플을 패딩(뒤쪽에 필요한 만큼 0을 추가한다.)

        Args:
            audio (torch.Tensor): 오디오 샘플 (1D, [length_of_samples])

        Returns:
            torch.Tensor: 패딩된 오디오 샘플 (1D, [self.num_samples])
        """
        pad_length = self.num_samples - audio.shape[0]
        last_dim_padding = (0, pad_length) # (pad_left, pad_right)
        audio = F.pad(audio, last_dim_padding)
        return audio
    
    
    def crop_audio(self, audio):
        """_summary_
        길이가 긴 audio sample을 자른다.(뒷부분을 자른다.)
        Args:
            audio (torch.Tensor): 오디오 샘플 (1D, [length_of_samples])

        Returns:
            torch.Tensor: 잘린 오디오 샘플 (1D, [self.num_samples])
        """
        return audio[:self.num_samples]
        
    
    def to_mono(self, audio):
        return torch.mean(audio, axis=0)

# GeM Pooling

In [15]:
class GeM(nn.Module):
    def __init__(self, p=3, eps=1e-6):
        super(GeM, self).__init__()
        self.p = nn.Parameter(torch.ones(1)*p) # p를 학습 가능한 파라미터로 설정
        self.eps = eps

    def forward(self, x):
        return self.gem(x, p=self.p, eps=self.eps)
        
    def gem(self, x, p=3, eps=1e-6):
        return F.avg_pool2d(x.clamp(min=eps).pow(p), (x.size(-2), x.size(-1))).pow(1./p)
        
    def __repr__(self):
        return self.__class__.__name__ + \
                '(' + 'p=' + '{:.4f}'.format(self.p.data.tolist()[0]) + \
                ', ' + 'eps=' + str(self.eps) + ')'

# Create model

In [16]:
class BirdCLEFModel(nn.Module):
    def __init__(self, model_name, embedding_size, pretrained=True):
        super(BirdCLEFModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity()
        self.model.global_pool = nn.Identity()
        self.pooling = GeM()
        self.embedding = nn.Linear(in_features, embedding_size)
        self.fc = nn.Linear(embedding_size, CONFIG['num_classes'])


In [17]:
class BirdCLEFModel(nn.Module):
    def __init__(self, model_name, embedding_size, pretrained=True):
        super(BirdCLEFModel, self).__init__()
        self.model = timm.create_model(model_name, pretrained=pretrained)
        in_features = self.model.classifier.in_features
        self.model.classifier = nn.Identity() # 끝 레이어를 삭제하는 대신 입력과 같은 출력을 내는 Identity 레이어 사용
        self.model.global_pool = nn.Identity() # pooling전 레이어를 image의 feature 정보로 이용가능
        self.pooling = GeM()
        self.embedding = nn.Linear(in_features, embedding_size)
        self.fc = nn.Linear(embedding_size, CONFIG['num_classes'])

    def forward(self, images):
        features = self.model(images)
        pooled_features = self.pooling(features).flatten(1)
        embedding = self.embedding(pooled_features)
        output = self.fc(embedding)
        return output
    
model = BirdCLEFModel(CONFIG['model_name'], CONFIG['embedding_size'])
model.to(CONFIG['device'])

BirdCLEFModel(
  (model): EfficientNet(
    (conv_stem): Conv2dSame(3, 32, kernel_size=(3, 3), stride=(2, 2), bias=False)
    (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
    (act1): SiLU(inplace=True)
    (blocks): Sequential(
      (0): Sequential(
        (0): DepthwiseSeparableConv(
          (conv_dw): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
          (bn1): BatchNorm2d(32, eps=0.001, momentum=0.1, affine=True, track_running_stats=True)
          (act1): SiLU(inplace=True)
          (se): SqueezeExcite(
            (conv_reduce): Conv2d(32, 8, kernel_size=(1, 1), stride=(1, 1))
            (act1): SiLU(inplace=True)
            (conv_expand): Conv2d(8, 32, kernel_size=(1, 1), stride=(1, 1))
            (gate): Sigmoid()
          )
          (conv_pw): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
          (bn2): BatchNorm2d(16, eps=0.001, momentum=0.1, affine=True, track_ru

# Loss Function

In [18]:
def criterion(outputs, labels):
    return nn.CrossEntropyLoss()(outputs, labels)

# Training Function

In [19]:
def train_one_epoch(model, optimizer, scheduler, dataloader, device, epoch):
    model.train() # 모델 훈련 모드로 변환
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:
        images = data['image'].to(device, dtype=torch.float) # device로 input 이동
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)
        
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss = loss / CONFIG['n_accumulate']
            
        loss.backward()
    
        if (step + 1) % CONFIG['n_accumulate'] == 0:
            optimizer.step()

            # zero the parameter gradients
            optimizer.zero_grad()

            if scheduler is not None:
                scheduler.step()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])
    gc.collect()
    
    return epoch_loss

# Validation Function

In [20]:
@torch.no_grad()
def valid_one_epoch(model, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    LABELS = []
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        images = data['image'].to(device, dtype=torch.float)
        labels = data['label'].to(device, dtype=torch.long)
        
        batch_size = images.size(0)

        outputs = model(images)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss / dataset_size
        
        PREDS.append(preds.view(-1).cpu().detach().numpy())
        LABELS.append(labels.view(-1).cpu().detach().numpy())
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])   
    
    LABELS = np.concatenate(LABELS)
    PREDS = np.concatenate(PREDS)
    val_f1 = f1_score(LABELS, PREDS, average='macro')
    gc.collect()
    
    return epoch_loss, val_f1

# Run training

In [21]:
def run_training(model, optimizer, scheduler, device, num_epochs):
    # To automatically log gradients
    wandb.watch(model, log_freq=100)
    
    if torch.cuda.is_available():
        print("[INFO] Using GPU: {}\n".format(torch.cuda.get_device_name()))
    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_epoch_f1 = 0
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, scheduler, 
                                           dataloader=train_loader, 
                                           device=CONFIG['device'], epoch=epoch)
        
        val_epoch_loss, val_epoch_f1 = valid_one_epoch(model, valid_loader, 
                                                       device=CONFIG['device'], 
                                                       epoch=epoch)
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(val_epoch_loss)
        history['Valid F1'].append(val_epoch_f1)
        
        # Log the metrics
        wandb.log({"Train Loss": train_epoch_loss})
        wandb.log({"Valid Loss": val_epoch_loss})
        wandb.log({"Valid F1": val_epoch_f1})
        
        # deep copy the model
        if val_epoch_f1 >= best_epoch_f1:
            print(f"{b_}Validation F1 Improved ({best_epoch_f1} ---> {val_epoch_f1})")
            best_epoch_f1 = val_epoch_f1
            run.summary["Best F1 Score"] = best_epoch_f1
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = "F1{:.4f}_epoch{:.0f}.bin".format(best_epoch_f1, epoch)
            torch.save(model.state_dict(), PATH)
            # Save a model file from the current directory
            print(f"Model Saved{sr_}")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print("Best F1: {:.4f}".format(best_epoch_f1))
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [22]:
def fetch_scheduler(optimizer):
    if CONFIG['scheduler'] == 'CosineAnnealingLR':
        scheduler = lr_scheduler.CosineAnnealingLR(optimizer,T_max=CONFIG['T_max'], 
                                                   eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == 'CosineAnnealingWarmRestarts':
        scheduler = lr_scheduler.CosineAnnealingWarmRestarts(optimizer,T_0=CONFIG['T_0'], 
                                                             eta_min=CONFIG['min_lr'])
    elif CONFIG['scheduler'] == None:
        return None
        
    return scheduler

In [23]:
def prepare_loaders(df, fold):
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    
    train_dataset = BirdCLEFDataset(df_train, target_sample_rate=CONFIG['sample_rate'], max_time=CONFIG['max_time'])
    valid_dataset = BirdCLEFDataset(df_valid, target_sample_rate=CONFIG['sample_rate'], max_time=CONFIG['max_time'])

    train_loader = DataLoader(train_dataset, batch_size=CONFIG['train_batch_size'], 
                              num_workers=2, shuffle=True, pin_memory=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG['valid_batch_size'], 
                              num_workers=2, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

# Prepare dataloader

In [24]:
train_loader, valid_loader = prepare_loaders(df, fold=0)

# Define optimizer and schedular

In [25]:
optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'], 
                       weight_decay=CONFIG['weight_decay'])
scheduler = fetch_scheduler(optimizer)

# Start training

In [26]:
run = wandb.init(project=CONFIG['competition'], 
                 config=CONFIG,
                 job_type='Train',
                 tags=['gem-pooling', CONFIG['model_name']],
                 anonymous='must')

In [27]:
model, history = run_training(model, optimizer, scheduler,
                              device=CONFIG['device'],
                              num_epochs=CONFIG['epochs'])

[INFO] Using GPU: Quadro P2000



100%|█| 371/371 [11:14<00:00,  1.82s/it, Epoch=1
100%|█| 47/47 [01:51<00:00,  2.38s/it, Epoch=1, LR=


[34mValidation F1 Improved (0 ---> 0.05600983144349195)
Model Saved[0m



100%|█| 371/371 [10:07<00:00,  1.64s/it, Epoch=2, LR=4.8e-
100%|█| 47/47 [01:49<00:00,  2.33s/it, Epoch=2, LR=4.8e-5,


[34mValidation F1 Improved (0.05600983144349195 ---> 0.07018584519172984)
Model Saved[0m



100%|█| 371/371 [10:13<00:00,  1.65s/it, Epoch=3, LR=8.8e-
100%|█| 47/47 [01:50<00:00,  2.34s/it, Epoch=3, LR=8.8e-5,


[34mValidation F1 Improved (0.07018584519172984 ---> 0.13970427166341445)
Model Saved[0m



100%|█| 371/371 [09:28<00:00,  1.53s/it, Epoch=4, LR=1.25e
100%|█| 47/47 [01:48<00:00,  2.32s/it, Epoch=4, LR=1.25e-6


[34mValidation F1 Improved (0.13970427166341445 ---> 0.1942389139159001)
Model Saved[0m



100%|█| 371/371 [09:21<00:00,  1.51s/it, Epoch=5, LR=8.08e
100%|█| 47/47 [01:50<00:00,  2.35s/it, Epoch=5, LR=8.08e-5


[34mValidation F1 Improved (0.1942389139159001 ---> 0.1998091264455871)
Model Saved[0m



100%|█| 371/371 [09:28<00:00,  1.53s/it, Epoch=6, LR=5.79e
100%|█| 47/47 [01:52<00:00,  2.39s/it, Epoch=6, LR=5.79e-5


[34mValidation F1 Improved (0.1998091264455871 ---> 0.23686745376458151)
Model Saved[0m



100%|█| 371/371 [09:23<00:00,  1.52s/it, Epoch=7, LR=9.91e
100%|█| 47/47 [01:48<00:00,  2.31s/it, Epoch=7, LR=9.91e-6


[34mValidation F1 Improved (0.23686745376458151 ---> 0.24713160332833878)
Model Saved[0m



100%|█| 371/371 [09:12<00:00,  1.49s/it, Epoch=8, LR=9.9e-
100%|█| 47/47 [01:56<00:00,  2.47s/it, Epoch=8, LR=9.9e-5,





100%|█| 371/371 [12:44<00:00,  2.06s/it, Epoch=9, LR=2.42e
100%|█| 47/47 [02:06<00:00,  2.70s/it, Epoch=9, LR=2.42e-5


[34mValidation F1 Improved (0.24713160332833878 ---> 0.2770673789711036)
Model Saved[0m



100%|█| 371/371 [13:29<00:00,  2.18s/it, Epoch=10, LR=3.82
100%|█| 47/47 [02:44<00:00,  3.49s/it, Epoch=10, LR=3.82e-



Training complete in 2h 4m 29s
Best F1: 0.2771


In [28]:
run.finish()

VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
Train Loss,█▆▆▄▃▃▂▂▂▁
Valid F1,▁▁▄▅▆▇▇▆██
Valid Loss,█▇▃▂▂▁▁▃▂▂

0,1
Best F1 Score,0.27707
Train Loss,0.53769
Valid F1,0.263
Valid Loss,2.55077


In [32]:
del model

NameError: name 'model' is not defined

In [None]:
torch.cuda.empty_cache

<function torch.cuda.memory.empty_cache() -> None>