In [1]:
from config import model_config
from model import VedioRecommender
from dataset import ViewDataSet
import utils

In [2]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
from adamp import AdamP

In [3]:
import torch
from torch import nn

In [4]:
from sklearn import metrics
from tqdm import tqdm
import numpy as np
import pandas as pd
import os
import json

### 1) Load Model

In [5]:
model = VedioRecommender(model_config)

### 2) Read Data 

In [6]:
df_agg_dataset = utils.open_object("./artifacts/df_agg_dataset.pkl")

In [7]:
df_train,df_test = train_test_split(df_agg_dataset,test_size=0.3,random_state=33,shuffle = True )

In [8]:
train_dataset = ViewDataSet(df_train)
test_dataset = ViewDataSet(df_test)

In [9]:
batch_size = 32

In [10]:
train_loader = DataLoader(train_dataset, batch_size=12, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=12, shuffle=True)

In [11]:
for inputs in train_loader:
    break

### 3) Training

In [12]:
scores = model(inputs)

In [13]:
BCELoss = nn.BCELoss()

In [14]:
labels = inputs['label']

In [15]:
BCELoss(scores,labels.view(-1,1))

tensor(0.6819, grad_fn=<BinaryCrossEntropyBackward0>)

In [16]:
optimizer = AdamP(model.parameters(),lr=4e-4,
                  betas=(0.9, 0.999), weight_decay=1e-1)

In [17]:
class train_config:
    epoches = 5
    device = "cuda" if torch.cuda.is_available() else "cpu"
    model_save_dir = "./artifacts/models"
    train_batch_size = 12
    val_batch_size = int(train_batch_size*1.5)
    eval_steps = (len(train_dataset)//train_batch_size)//3

In [18]:
total_pbar = tqdm(total = len(train_loader)*train_config.epoches,desc = "Model Training",position=0, leave=True)

Model Training:   0%|          | 0/490 [00:00<?, ?it/s]

In [19]:
def evaluate_full_metrics(model,dataset_loader):

    model.eval()

    loss_list = []
    labels_list = []
    pred_list = []
    prob_list = []

    pbar = tqdm(total = len(dataset_loader),desc = "Model Evaluating",position=0, leave=True)


    for inputs in dataset_loader:

        with torch.no_grad():
            
            inputs = utils.to_device(inputs,train_config.device)
            labels = inputs['label'].view(-1,1)
            
            probs = model(inputs)
            
            loss = BCELoss(probs,labels).item()
            loss_list.append(loss)
            
            labels  = labels.detach().cpu().numpy()
            labels_list.extend(labels.flatten())

            probs = probs.detach().cpu().numpy()
            prob_list.extend(probs.flatten())
            pbar.update(1)

    pbar.close()
    
    auc = metrics.roc_auc_score(labels_list, prob_list)
    recall, precision, thres = metrics.precision_recall_curve(labels_list, prob_list)
    
    
    f1 = recall*precision*2 / (recall + precision)
    f1_temp = f1
    f1 = np.nan_to_num(f1,nan = -1)

    arg = f1.argmax()
    
    best_thres = thres[arg]
    best_f1 = f1[arg]
    best_recall = recall[arg]
    best_precision = precision[arg]
    
    pred_list = [1 if prob>=best_thres else 0 for prob in prob_list]
    accuracy = metrics.accuracy_score(labels_list,pred_list)
    
    avg_loss = np.mean(loss_list)
    
    result = {"threshold":best_thres,
              "accuracy":accuracy,
              "recall":best_recall,
              "precision":best_precision,
              "f1":best_f1,'auc':auc,
              'eval_loss':avg_loss} 

    return result,prob_list

In [20]:
result,prob_list = evaluate_full_metrics(model,test_loader)

Model Evaluating:  14%|█▍        | 6/42 [00:14<01:25,  2.39s/it]

In [22]:
result

{'threshold': 0.49956766,
 'accuracy': 0.1536926147704591,
 'recall': 0.152,
 'precision': 1.0,
 'f1': 0.2638888888888889,
 'auc': 0.27975232198142413,
 'eval_loss': 0.694588398649579}

In [23]:
def save_model(model, model_save_dir,step,model_metrics):
    model_save_dir = os.path.join(model_save_dir,f"checkpoint-{step}")
    model_name = "pytorch_model.bin"
    train_state_name = "training_state.json"
    os.makedirs(model_save_dir,exist_ok=True)
    
    model_path = os.path.join(model_save_dir,model_name)
    train_state_path = os.path.join(model_save_dir,train_state_name)

    torch.save(model,model_path)
    
    if model_metrics is not None:
        with open(train_state_path,mode = 'w',encoding = 'utf-8-sig') as f:
            model_metrics = {str(k):str(v) for k,v in model_metrics.items()} 
            json.dump(model_metrics,f,indent=4)

In [25]:
total_pbar = tqdm(total = len(train_loader)*train_config.epoches,desc = "Model Training",position=0, leave=True)

total_batch = 0 
for epoch in range(train_config.epoches):
    print("*"*50 + f"epoch: {epoch + 1}" + "*"*50)
    
    train_losses = []
    
    for inputs in train_loader:
        model = model.train()
        inputs = utils.to_device(inputs,train_config.device)
        labels = inputs['label'].view(-1,1)
        
        
        optimizer.zero_grad()
        
        probs = model(inputs)
        
        loss = BCELoss(probs,labels)
        
        loss.backward()
        optimizer.step()
        
        train_losses.append(loss.item())
        
        if (total_batch+1) % train_config.eval_steps ==0:
            model_metrics,_ = evaluate_full_metrics(model,test_loader)
            train_loss = np.mean(train_losses)
            model_metrics['train_loss'] = train_loss
            model_metrics["steps"] = total_batch+1
        
            save_model(model,train_config.model_save_dir,total_batch+1,model_metrics)
            df_metrics_temp = pd.DataFrame([model_metrics])
            display(df_metrics_temp)
            
            model = model.train()
            
    total_batch +=1
    total_pbar.update(1)
        
total_pbar.close()

**************************************************epoch: 1**************************************************


: 

: 