In [1]:
# !pip install -q nlpretext loguru

In [2]:
import os
import gc
import copy
import time
import numpy as np
import pandas as pd
import plotly.graph_objects as go

import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
from torch.utils.data import Dataset, DataLoader
from torch.cuda import amp
import scipy as sp

import transformers
from transformers import T5Tokenizer, T5EncoderModel
from transformers import AutoTokenizer, AutoModel, AutoConfig

from transformers import AdamW, get_linear_schedule_with_warmup

from tqdm import tqdm
from collections import defaultdict

from loguru import logger

from sklearn.metrics import mean_squared_error
from sklearn.model_selection import StratifiedKFold, KFold

from nlpretext import Preprocessor
from nlpretext.basic.preprocess import (normalize_whitespace, remove_punct, 
                                        remove_eol_characters, remove_stopwords, 
                                        lower_text, unpack_english_contractions)

from colorama import Fore
b_ = Fore.BLUE

import warnings
warnings.filterwarnings("ignore")

In [3]:
class CONFIG:
    seed = 42
    max_len = 205
    model = ""
    size = "small"
    model_name = f"{model}-{size}"
    if size=="small":
        hidden_state = hiddden_state_fix = 512
    elif size=="base":
        hidden_state = hiddden_state_fix = 768
    elif size == "large"
        hidden_state = hiddden_state_fix = 1024
    # model_name = 't5-base'
    # model_name = "t5-large"
    # model_name ="t5-large"
    # hidden_state = 1024#768
    # hidden_state_fixed = 1024# 768 # ONLY CHANGE WHEN CHANGING THE MODEL
                             # 512 for t5-small, 768 for t5-base, 1024 for t5-large   
    train_batch_size =8
    valid_batch_size = 8
    epochs = 10
    learning_rate = 2e-5
    n_accumulate = 1
    folds = 4
    tokenizer = T5Tokenizer.from_pretrained(model_name)
    tokenizer.save_pretrained('./tokenizer')
    device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [4]:
INPUT_DIR = '../../data/us-patent-phrase-to-phrase-matching/'
OUTPUT_DIR = "./output/t5/"
train = pd.read_csv(f"{INPUT_DIR}train.csv")
train.head()

Unnamed: 0,id,anchor,target,context,score
0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5
1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75
2,36d72442aefd8232,abatement,active catalyst,A47,0.25
3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5
4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0


In [5]:
test = pd.read_csv(f"{INPUT_DIR}test.csv")
test.head()

Unnamed: 0,id,anchor,target,context
0,4112d61851461f60,opc drum,inorganic photoconductor drum,G02
1,09e418c93a776564,adjust gas flow,altering gas flow,F23
2,36baf228038e314b,lower trunnion,lower locating,B60
3,1f37ead645e7f0c8,cap component,upper portion,D06
4,71a5b6ad068d531f,neural stimulation,artificial neural network,H04


In [6]:
preprocessor = Preprocessor()
preprocessor.pipe(unpack_english_contractions)
preprocessor.pipe(remove_eol_characters)
preprocessor.pipe(lower_text)
preprocessor.pipe(normalize_whitespace)

In [7]:
# ====================================================
# CPC Data
# ====================================================
def get_cpc_texts():
    contexts = []
    pattern = '[A-Z]\d+'
    for file_name in os.listdir(INPUT_DIR+'CPCSchemeXML202105'):
        result = re.findall(pattern, file_name)
        if result:
            contexts.append(result)
    contexts = sorted(set(sum(contexts, [])))
    results = {}
    for cpc in ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'Y']:
        with open(f'{INPUT_DIR}CPCTitleList202202/cpc-section-{cpc}_20220201.txt') as f:
            s = f.read()
        pattern = f'{cpc}\t\t.+'
        result = re.findall(pattern, s)
        cpc_result = result[0].lstrip(pattern)
        for context in [c for c in contexts if c[0] == cpc]:
            pattern = f'{context}\t\t.+'
            result = re.findall(pattern, s)
            results[context] = cpc_result + ". " + result[0].lstrip(pattern)
    return results


# cpc_texts = get_cpc_texts()
# torch.save(cpc_texts, OUTPUT_DIR+"cpc_texts.pth")
cpc_texts = torch.load(INPUT_DIR+"cpc_texts.pth")
train['context_text'] = train['context'].map(cpc_texts)
test['context_text'] = test['context'].map(cpc_texts)
display(train.head())
display(test.head())

CFG = CONFIG
tokenizer = CFG.tokenizer
# ====================================================
# Define max_len
# ====================================================
lengths_dict = {}

lengths = []
tk0 = tqdm(cpc_texts.values(), total=len(cpc_texts))
for text in tk0:
    length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
    lengths.append(length)
lengths_dict['context_text'] = lengths

for text_col in ['anchor', 'target']:
    lengths = []
    tk0 = tqdm(train[text_col].fillna("").values, total=len(train))
    for text in tk0:
        length = len(tokenizer(text, add_special_tokens=False)['input_ids'])
        lengths.append(length)
    lengths_dict[text_col] = lengths
    
CFG.max_len = max(lengths_dict['anchor']) + max(lengths_dict['target'])\
                + max(lengths_dict['context_text']) + 4 # CLS + SEP + SEP + SEP


train['text'] = train['anchor'] + '[SEP]' + train['target'] + '[SEP]'  + train['context_text']
test['text'] = test['anchor'] + '[SEP]' + test['target'] + '[SEP]'  + test['context_text']
display(train.head())
display(test.head())

Unnamed: 0,id,anchor,target,context,score,context_text
0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...
1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...
2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...
3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...
4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...


Unnamed: 0,id,anchor,target,context,context_text
0,4112d61851461f60,opc drum,inorganic photoconductor drum,G02,PHYSICS. OPTICS
1,09e418c93a776564,adjust gas flow,altering gas flow,F23,MECHANICAL ENGINEERING; LIGHTING; HEATING; WEA...
2,36baf228038e314b,lower trunnion,lower locating,B60,PERFORMING OPERATIONS; TRANSPORTING. VEHICLES ...
3,1f37ead645e7f0c8,cap component,upper portion,D06,TEXTILES; PAPER. TREATMENT OF TEXTILES OR THE ...
4,71a5b6ad068d531f,neural stimulation,artificial neural network,H04,ELECTRICITY. ELECTRIC COMMUNICATION TECHNIQUE


100%|███████████████████████████████████████████████████████████████████████████████| 136/136 [00:00<00:00, 5932.91it/s]
100%|██████████████████████████████████████████████████████████████████████████| 36473/36473 [00:02<00:00, 14504.07it/s]
100%|██████████████████████████████████████████████████████████████████████████| 36473/36473 [00:02<00:00, 13227.15it/s]


Unnamed: 0,id,anchor,target,context,score,context_text,text
0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]abatement of pollution[SEP]HUMAN...
1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]act of abating[SEP]HUMAN NECESSI...
2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]active catalyst[SEP]HUMAN NECESS...
3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]eliminating process[SEP]HUMAN NE...
4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[SEP]forest region[SEP]HUMAN NECESSIT...


Unnamed: 0,id,anchor,target,context,context_text,text
0,4112d61851461f60,opc drum,inorganic photoconductor drum,G02,PHYSICS. OPTICS,opc drum[SEP]inorganic photoconductor drum[SEP...
1,09e418c93a776564,adjust gas flow,altering gas flow,F23,MECHANICAL ENGINEERING; LIGHTING; HEATING; WEA...,adjust gas flow[SEP]altering gas flow[SEP]MECH...
2,36baf228038e314b,lower trunnion,lower locating,B60,PERFORMING OPERATIONS; TRANSPORTING. VEHICLES ...,lower trunnion[SEP]lower locating[SEP]PERFORMI...
3,1f37ead645e7f0c8,cap component,upper portion,D06,TEXTILES; PAPER. TREATMENT OF TEXTILES OR THE ...,cap component[SEP]upper portion[SEP]TEXTILES; ...
4,71a5b6ad068d531f,neural stimulation,artificial neural network,H04,ELECTRICITY. ELECTRIC COMMUNICATION TECHNIQUE,neural stimulation[SEP]artificial neural netwo...


In [8]:
train['text'] = train['text'].apply(preprocessor.run)

<span style="color: #000508; font-family: Segoe UI; font-size: 1.5em; font-weight: 300;">Maximum Length of Text present in the Dataset</span>

In [9]:
excerpt_lenghts = train['text'].apply(lambda x: len(x.split()))
max(excerpt_lenghts)

51

In [10]:
def set_seed(seed = 42):
    '''Sets the seed of the entire notebook so results are the same every time we run.
    This is for REPRODUCIBILITY.'''
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    # When running on the CuDNN backend, two further options must be set
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # Set a fixed value for the hash seed
    os.environ['PYTHONHASHSEED'] = str(seed)
    
set_seed(CONFIG.seed)

In [11]:
def create_folds(df, n_s=5, n_grp=None):
    df['kfold'] = -1
    
    if n_grp is None:
        skf = KFold(n_splits=n_s, random_state=CONFIG.seed)
        target = df.score
    else:
        skf = StratifiedKFold(n_splits=n_s, shuffle=True, random_state=CONFIG.seed)
        df['grp'] = pd.cut(df.score, n_grp, labels=False)
        target = df.grp
    
    for fold_no, (t, v) in enumerate(skf.split(target, target)):
        df.loc[v, 'kfold'] = fold_no
    return df

In [12]:
df = create_folds(train, n_s=CONFIG.folds, n_grp=12)
df.head()

Unnamed: 0,id,anchor,target,context,score,context_text,text,kfold,grp
0,37d61fd2272659b1,abatement,abatement of pollution,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[sep]abatement of pollution[sep]human...,1,5
1,7b9652b17b68b7a4,abatement,act of abating,A47,0.75,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[sep]act of abating[sep]human necessi...,2,8
2,36d72442aefd8232,abatement,active catalyst,A47,0.25,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[sep]active catalyst[sep]human necess...,1,2
3,5296b0c19e1ce60e,abatement,eliminating process,A47,0.5,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[sep]eliminating process[sep]human ne...,2,5
4,54c1e3b9184cb5b6,abatement,forest region,A47,0.0,HUMAN NECESSITIES. FURNITURE; DOMESTIC ARTICLE...,abatement[sep]forest region[sep]human necessit...,0,0


<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Dataset Class</h1>

In [13]:
class T5Dataset(Dataset):
    def __init__(self, df, tokenizer, max_len):
        self.text = df['text'].values
        self.score = df['score'].values
        self.max_len = max_len
        self.tokenizer = tokenizer
        
    def __len__(self):
        return len(self.text)
    
    def __getitem__(self, index):
        text = self.text[index]
        inputs = self.tokenizer.encode_plus(
            text,
            truncation=True,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length'
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        
        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'score': torch.tensor(self.score[index], dtype=torch.float)
        }

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Loss Function</h1>

In [14]:
# def criterion(outputs, targets):
#     return torch.sqrt(nn.MSELoss()(outputs.view(-1), targets.view(-1)))

criterion = nn.BCEWithLogitsLoss(reduction="mean")

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Create Model</h1>

<span style="color: #000508; font-family: Segoe UI; font-size: 1.5em; font-weight: 300;">We need <code>T5Pooler</code> to pool the outputs of the model</span><br>
<span style="color: #000508; font-family: Segoe UI; font-size: 1.5em; font-weight: 300;">This is simply done by taking the mean of the hidden states </span>

In [15]:
class T5Pooler(nn.Module):
    def __init__(self, hidden_size, activation=nn.Tanh()):
        super().__init__()
        self.dense = nn.Linear(CONFIG.hidden_state_fixed, hidden_size)
        self.activation = activation
        
    def forward(self, hidden_states):
        # We simply take the mean of the hidden states
        mean_tensor = torch.mean(hidden_states, dim=1)
        pooled_output = self.dense(mean_tensor)
        pooled_output = self.activation(pooled_output)
        return pooled_output

In [16]:
class T5Model(nn.Module):
    def __init__(self):
        super(T5Model, self).__init__()
        self.t5 = T5EncoderModel.from_pretrained(CONFIG.model_name)
        # self.t5 = AutoModel.from_pretrained(CFG.model_name, config=CFG)
        self.pooler = T5Pooler(CONFIG.hidden_state, nn.LeakyReLU())
        self.fc = nn.Linear(CONFIG.hidden_state, 1)
    
    def forward(self, ids, mask):
        outputs = self.t5(ids, attention_mask=mask)
        pooled_outputs = self.pooler(outputs.last_hidden_state)
        outputs = self.fc(pooled_outputs)
        return outputs

model = T5Model()
model.to(CONFIG.device);

Some weights of the model checkpoint at t5-large were not used when initializing T5EncoderModel: ['decoder.block.2.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.0.layer_norm.weight', 'decoder.block.19.layer.2.DenseReluDense.wo.weight', 'decoder.block.23.layer.2.layer_norm.weight', 'decoder.block.23.layer.0.SelfAttention.v.weight', 'decoder.block.16.layer.2.DenseReluDense.wo.weight', 'decoder.block.17.layer.0.SelfAttention.v.weight', 'decoder.block.16.layer.0.SelfAttention.q.weight', 'decoder.block.15.layer.1.EncDecAttention.q.weight', 'decoder.block.20.layer.2.layer_norm.weight', 'decoder.block.20.layer.1.EncDecAttention.o.weight', 'decoder.block.16.layer.1.EncDecAttention.o.weight', 'decoder.block.7.layer.1.EncDecAttention.k.weight', 'decoder.block.9.layer.1.EncDecAttention.v.weight', 'decoder.block.14.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.2.DenseReluDense.wi.weight', 'decoder.block.8.layer.1.EncDecAttention.v.weight', 'decoder.block.4.layer.1.EncDecAtt

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Training Function</h1>

In [17]:
def train_one_epoch(model, optimizer, dataloader, device, epoch):
    model.train()
    scaler = amp.GradScaler()
    
    dataset_size = 0
    running_loss = 0.0
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        score = data['score'].to(device, dtype = torch.float)
        
        batch_size = ids.size(0)
        
        with amp.autocast(enabled=True):
            outputs = model(ids, mask)
            
            loss = criterion(outputs, torch.tensor(score).unsqueeze(1))
            loss = loss / CONFIG.n_accumulate
            
        scaler.scale(loss).backward()
        
        if (step + 1) % CONFIG.n_accumulate == 0:
            scaler.step(optimizer)
            scaler.update()
            
            # zero the parameter gradients
            optimizer.zero_grad()
                
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss/dataset_size
        
        bar.set_postfix(Epoch=epoch, Train_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])
    gc.collect()
    
    return epoch_loss

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Validation Function</h1>

In [18]:
def get_score(y_true, y_pred):
    score = sp.stats.pearsonr(y_true, y_pred)[0]
    return score

@torch.no_grad()
def valid_one_epoch(model, optimizer, dataloader, device, epoch):
    model.eval()
    
    dataset_size = 0
    running_loss = 0.0
    
    SCORES = []
    PREDS = []
    
    bar = tqdm(enumerate(dataloader), total=len(dataloader))
    for step, data in bar:        
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        scores = data['score'].to(device, dtype = torch.float)
        
        batch_size = ids.size(0)
        
        outputs = model(ids, mask)
        loss = criterion(outputs, torch.tensor(scores).unsqueeze(1))
        
        running_loss += (loss.item() * batch_size)
        dataset_size += batch_size
        
        epoch_loss = running_loss/dataset_size
        
        PREDS.extend(outputs.cpu().detach().numpy().tolist())
        SCORES.extend(scores.cpu().detach().numpy().tolist())
        
        bar.set_postfix(Epoch=epoch, Valid_Loss=epoch_loss,
                        LR=optimizer.param_groups[0]['lr'])   
        
    val_score = get_score(SCORES, PREDS)
    print(f"valid score  {val_score}")
    gc.collect()
    
    return epoch_loss, val_score

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Run</h1>

In [19]:
@logger.catch
def run(model, optimizer, scheduler, device, num_epochs, fold):    
    start = time.time()
    best_model_wts = copy.deepcopy(model.state_dict())
    best_score = -np.inf
    history = defaultdict(list)
    
    for epoch in range(1, num_epochs + 1): 
        gc.collect()
        train_epoch_loss = train_one_epoch(model, optimizer, dataloader=train_loader, 
                                           device=CONFIG.device, epoch=epoch)
        
        valid_epoch_loss, valid_epoch_score = valid_one_epoch(model, optimizer,
                                                       dataloader=valid_loader, 
                                                       device=CONFIG.device, epoch=epoch)
    
        history['Train Loss'].append(train_epoch_loss)
        history['Valid Loss'].append(valid_epoch_loss)
        history['Valid Score'].append(valid_epoch_score)
        
        print(f'Valid Score: {valid_epoch_score}')
        
        if scheduler is not None:
            scheduler.step()
        
        # deep copy the model
        if valid_epoch_score >= best_score:
            print(f"Validation score Improved ({best_score} ---> {valid_epoch_score})")
            best_score = valid_epoch_score
            best_model_wts = copy.deepcopy(model.state_dict())
            PATH = f"{OUTPUT_DIR}t5_fold{fold}.pth".format(best_score, epoch)
            torch.save(model.state_dict(), PATH)
            print("Model Saved")
            
        print()
    
    end = time.time()
    time_elapsed = end - start
    print('Training complete in {:.0f}h {:.0f}m {:.0f}s'.format(
        time_elapsed // 3600, (time_elapsed % 3600) // 60, (time_elapsed % 3600) % 60))
    print(f"Best Loss: {best_score}")
    
    # load best model weights
    model.load_state_dict(best_model_wts)
    
    return model, history

In [20]:
def prepare_data(fold):
    df_train = df[df.kfold != fold].reset_index(drop=True)
    df_valid = df[df.kfold == fold].reset_index(drop=True)
    
    train_dataset = T5Dataset(df_train, CONFIG.tokenizer, CONFIG.max_len)
    valid_dataset = T5Dataset(df_valid, CONFIG.tokenizer, CONFIG.max_len)

    train_loader = DataLoader(train_dataset, batch_size=CONFIG.train_batch_size, 
                              num_workers=4, shuffle=True, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=CONFIG.valid_batch_size, 
                              num_workers=4, shuffle=False, pin_memory=True)
    
    return train_loader, valid_loader

In [21]:
# Defining Optimizer with weight decay to params other than bias and layer norms
param_optimizer = list(model.named_parameters())
no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
optimizer_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)], 
     'weight_decay': 0.0001},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)], 
     'weight_decay': 0.0}
    ]

optimizer = AdamW(optimizer_parameters, lr=CONFIG.learning_rate)

# Defining LR Scheduler
scheduler = get_linear_schedule_with_warmup(
    optimizer, 
    num_warmup_steps=0, 
    num_training_steps=CONFIG.epochs
)

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Train Fold: 0</h1>

In [None]:
for fold in range(CONFIG.folds):
    print(f"\033[38;5;40m========== fold: {fold} training ==========\033[0m");
    train_loader, valid_loader = prepare_data(fold=fold)
    model, history = run(model, optimizer, scheduler, device=CONFIG.device, num_epochs=CONFIG.epochs, fold=fold)



100%|█████████████████████████████████████████████| 3420/3420 [18:00<00:00,  3.16it/s, Epoch=1, LR=2e-5, Train_Loss=nan]
100%|███████████████████████████████████████████| 1140/1140 [02:35<00:00,  7.35it/s, Epoch=1, LR=2e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved (-inf ---> [-0.00767485672543637])
Model Saved



100%|███████████████████████████████████████████| 3420/3420 [17:54<00:00,  3.18it/s, Epoch=2, LR=1.8e-5, Train_Loss=nan]
100%|█████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=2, LR=1.8e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|███████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=3, LR=1.6e-5, Train_Loss=nan]
100%|█████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=3, LR=1.6e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|███████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=4, LR=1.4e-5, Train_Loss=nan]
100%|█████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=4, LR=1.4e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|███████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=5, LR=1.2e-5, Train_Loss=nan]
100%|█████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=5, LR=1.2e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|█████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=6, LR=1e-5, Train_Loss=nan]
100%|███████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=6, LR=1e-5, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|█████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=7, LR=8e-6, Train_Loss=nan]
100%|███████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=7, LR=8e-6, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|█████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=8, LR=6e-6, Train_Loss=nan]
100%|███████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=8, LR=6e-6, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|█████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=9, LR=4e-6, Train_Loss=nan]
100%|███████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=9, LR=4e-6, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved



100%|████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=10, LR=2e-6, Train_Loss=nan]
100%|██████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=10, LR=2e-6, Valid_Loss=0.691]


valid score  [-0.00767485672543637]
Valid Score: [-0.00767485672543637]
Validation score Improved ([-0.00767485672543637] ---> [-0.00767485672543637])
Model Saved

Training complete in 3h 24m 56s
Best Loss: [-0.00767485672543637]


100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=1, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.48it/s, Epoch=1, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved (-inf ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=2, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=2, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=3, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=3, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=4, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=4, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:50<00:00,  3.20it/s, Epoch=5, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=5, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=6, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.47it/s, Epoch=6, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=7, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.46it/s, Epoch=7, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:49<00:00,  3.20it/s, Epoch=8, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.46it/s, Epoch=8, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|████████████████████████████████████████████████| 3420/3420 [17:50<00:00,  3.20it/s, Epoch=9, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:32<00:00,  7.46it/s, Epoch=9, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved



100%|███████████████████████████████████████████████| 3420/3420 [17:52<00:00,  3.19it/s, Epoch=10, LR=0, Train_Loss=nan]
100%|█████████████████████████████████████████████| 1140/1140 [02:35<00:00,  7.33it/s, Epoch=10, LR=0, Valid_Loss=0.691]


valid score  [-0.007460085420272789]
Valid Score: [-0.007460085420272789]
Validation score Improved ([-0.007460085420272789] ---> [-0.007460085420272789])
Model Saved

Training complete in 3h 24m 45s
Best Loss: [-0.007460085420272789]


100%|████████████████████████████████████████████████| 3420/3420 [18:19<00:00,  3.11it/s, Epoch=1, LR=0, Train_Loss=nan]
100%|██████████████████████████████████████████████| 1140/1140 [02:40<00:00,  7.11it/s, Epoch=1, LR=0, Valid_Loss=0.691]


valid score  [-0.029093232294545624]
Valid Score: [-0.029093232294545624]
Validation score Improved (-inf ---> [-0.029093232294545624])
Model Saved



  5%|██▍                                              | 172/3420 [00:56<17:45,  3.05it/s, Epoch=2, LR=0, Train_Loss=nan]

<h1 style = "font-family: garamond; font-size: 40px; font-style: normal; letter-spcaing: 3px; background-color: #f6f5f5; color :#fe346e; border-radius: 100px 100px; text-align:center">Visualizations 📉</h1>

In [None]:
epochs = list(range(1, CONFIG.epochs + 1))
fig = go.Figure()
trace1 = go.Scatter(x=epochs, y=history['Train Loss'],
                    mode='lines+markers',
                    name='Train Loss')
trace2 = go.Scatter(x=epochs, y=history['Valid Loss'],
                    mode='lines+markers',
                    name='Valid Loss')
layout = go.Layout(template="plotly_dark", title='Loss Curve', 
                   xaxis=dict(title='Epochs'), yaxis=dict(title='Loss'))
fig = go.Figure(data = [trace1, trace2], layout = layout)
fig.show()

In [None]:
!touch 