# Multilabel Classification (multiple tags)

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import torch
import ast
import nltk
from nltk.stem.porter import PorterStemmer

In [None]:
basepath= '/content/drive/MyDrive/Colab_Notebooks/embeddings_NN'
folder= Path(basepath)
folder

In [None]:
multi_label = folder / 'multilabel_hw.csv'

In [None]:
# convert to pandas dataframe
query_csv = pd.read_csv(multi_label,encoding='ISO-8859-1',
                        names= ['1', '2', 'Id','Title','Body','Tags','Tag_Number'],
                        usecols=['Id', 'Title','Body','Tags','Tag_Number'], header=0)
query_df = pd.DataFrame(query_csv)
query_df.head()

Unnamed: 0,Id,Title,Body,Tags,Tag_Number
0,3589945,ASP Query String From DropDown,<p>I have a webpage: <strong>Menu.aspx</strong...,c# asp.net,"[0, 9]"
1,5756415,How can I run JavaScript code at server side J...,<p>I want to run JavaScript code at the server...,java javascript,"[1, 3]"
2,2358597,linq to sql throwing an exception row not foun...,<p>Hi I am linq to sql and i am getting the er...,c# asp.net,"[0, 9]"
3,4332882,Running a Python script on a PHP server,"<p>I am running a nginx web server, along with...",php python,"[2, 7]"
4,5922133,some advice on how to write a window.resize fu...,<p>Im trying to write a function that resizes ...,javascript jquery,"[3, 5]"


In [None]:
!pip install swifter



In [None]:

import swifter
import re
query_df['Tag_Number'] =query_df['Tag_Number'].swifter.apply(
    lambda tags:ast.literal_eval(tags))

Pandas Apply:   0%|          | 0/47427 [00:00<?, ?it/s]

In [None]:
import sklearn
from sklearn.preprocessing import MultiLabelBinarizer
Binarizer= MultiLabelBinarizer()

In [None]:
query_df['Tag_Number'][0]

[0, 9]

In [None]:
!pip install spacy



Spacy Preprocessing:

In [None]:
# load spacy model
spacy_folder = Path('/content/drive/MyDrive/Colab_Notebooks/nlpAssignment/HW_4_embeddings_NN')
spacy_model = spacy_folder /'en_core_web_lg-2.2.5'/'en_core_web_lg'/'en_core_web_lg-2.2.5'
#nlp = spacy.load(model)

In [None]:
import re
from bs4 import BeautifulSoup
import random
import spacy
from spacy.matcher import Matcher
from spacy.tokens import Token

from scipy.sparse import hstack
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline, FeatureUnion
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer, TfidfTransformer 
from sklearn.base import BaseEstimator, TransformerMixin

I am commenting out the following code after unzipping the 'en_core_web_lg-2.2.5' file.

In [None]:
#import tarfile

In [None]:
#tar_folder= Path('/content/drive/MyDrive/Colab_Notebooks/nlpAssignment/HW_4_embeddings_NN')
#file= tar_folder / 'en_core_web_lg-2.2.5.tar.gz'

In [None]:
# with tarfile.open(file, 'r') as tar:
  # tar.extractall(path= tar_folder)

In [None]:
# !python -m spacy download en_core_web_lg

# # load spacy model


In [None]:
# Import random function
import random

# Fix seed value
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

In [None]:
# Preprocessor Function
class SpacyPreprocessor(BaseEstimator, TransformerMixin):
    np.random.seed(0)
    def __init__(self, lammetize=True, lower=True, remove_stop=True, 
                 remove_punct=True, remove_email=True, remove_url=True, 
                 remove_num=False, stemming = False,
                 add_user_mention_prefix=True, remove_hashtag_prefix=False):
        self.remove_stop = remove_stop
        self.remove_punct = remove_punct
        self.remove_num = remove_num
        self.remove_url = remove_url
        self.remove_email = remove_email
        self.lammetize = lammetize
        self.lower = lower
        self.stemming = stemming
        self.add_user_mention_prefix = add_user_mention_prefix
        self.remove_hashtag_prefix = remove_hashtag_prefix

 # helpfer functions for basic cleaning 

    def basic_clean(self, text):
        
        '''
        This fuction removes HTML tags from text
        '''
        if (bool(BeautifulSoup(text, "html.parser").find())==True):         
            soup = BeautifulSoup(text, "html.parser")
            text = soup.get_text()
        else:
            pass
        return re.sub(r'[\n\r]',' ', text) 

    # helper function for pre-processing with spacy and Porter Stemmer
    
    def basic_clean_array(self, X):
      '''
      This function removes Html Tags from each text in X array
      '''
      return [self.basic_clean(text) for text in X]


    def spacy_preprocessor(self,texts):

        final_result = []
        nlp = spacy.load(spacy_model, disable=['parser','ner'])
        
        ## Add @ as a prefix so that we can separate the word from its token
        prefixes = list(nlp.Defaults.prefixes)

        if self.add_user_mention_prefix:
            prefixes += ['@']

        ## Remove # as a prefix so that we can keep hashtags and words together
        if self.remove_hashtag_prefix:
            prefixes.remove(r'#')

        prefix_regex = spacy.util.compile_prefix_regex(prefixes)
        nlp.tokenizer.prefix_search = prefix_regex.search

        matcher = Matcher(nlp.vocab)
        if self.remove_stop:
            matcher.add("stop_words", [[{"is_stop" : True}]])
        if self.remove_punct:
            matcher.add("punctuation",[ [{"is_punct": True}]])
        if self.remove_num:
            matcher.add("numbers", [[{"like_num": True}]])
        if self.remove_url:
            matcher.add("urls", [[{"like_url": True}]])
        if self.remove_email:
            matcher.add("emails", [[{"like_email": True}]])
            
        Token.set_extension('is_remove', default=False, force=True)

        cleaned_text = []
        for doc in nlp.pipe(texts,batch_size= 1000,disable=['parser','ner'], 
                            n_process = -1):
            matches = matcher(doc)
            for _, start, end in matches:
                for token in doc[start:end]:
                    token._.is_remove =True
                    
            if self.lammetize:              
                text = ' '.join(token.lemma_ for token in doc if (
                    token._.is_remove==False))
            elif self.stemming:
                text = ' '.join(PorterStemmer().stem(token.text) for token in doc
                                if (token._.is_remove==False))
            else:
                text = ' '.join(token.text for token in doc if (
                    token._.is_remove==False))
                                   
            if self.lower:
                text=text.lower()
            cleaned_text.append(text)
        return cleaned_text

    def fit(self, X,y=None):
        return self

    def transform(self, X, y=None):
        try:
            if str(type(X)) not in ["<class 'list'>","<class 'numpy.ndarray'>"]:
                raise Exception('Expected list or numpy array got {}'.format(type(X)))
            x_clean = [self.basic_clean(text) for text in X]
            x_clean_final = self.spacy_preprocessor(x_clean)
            return x_clean_final
        except Exception as error:
            print('An exception occured: ' + repr(error))

In [None]:
pre_processor = SpacyPreprocessor()
query_df['Body'] = pre_processor.fit_transform(np.array(query_df['Body']))
query_df['Body'].head()

0    webpage menu.aspx follow control relevance    ...
1    want run javascript code server want manipulat...
2    hi linq sql get error row find change update t...
3    run nginx web server php cgi like know possibl...
4    be try write function resize css width element...
Name: Body, dtype: object

In [None]:
query_df['Title'] = pre_processor.fit_transform(np.array(query_df['Title']))
query_df['Title'].head()

0                     asp query stre dropdown
1        run javascript code server java code
2    linq sql throw exception row find change
3                run python script php server
4         advice write window.resize function
Name: Title, dtype: object

In [None]:
query_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47427 entries, 0 to 47426
Data columns (total 5 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Id          47427 non-null  int64 
 1   Title       47427 non-null  object
 2   Body        47427 non-null  object
 3   Tags        47427 non-null  object
 4   Tag_Number  47427 non-null  object
dtypes: int64(1), object(4)
memory usage: 1.8+ MB


In [None]:
#save querydf after cleaning. 
query_df.to_csv('/content/drive/MyDrive/Colab_Notebooks/embeddings_NN/cleaned_query.csv')


In [None]:
query_clean_csv = pd.read_csv(
    '/content/drive/MyDrive/Colab_Notebooks/embeddings_NN/cleaned_query.csv', 
    encoding='ISO-8859-1', names= ['1', 'Id','Title','Body','Tags','Tag_Number'],
                        usecols=['Id', 'Title','Body','Tags','Tag_Number'], header=0)
query_df = pd.DataFrame(query_clean_csv)

In [None]:
query_df.head()

Unnamed: 0,Id,Title,Body,Tags,Tag_Number
0,3589945,asp query stre dropdown,webpage menu.aspx follow control relevance ...,c# asp.net,"[0, 9]"
1,5756415,run javascript code server java code,want run javascript code server want manipulat...,java javascript,"[1, 3]"
2,2358597,linq sql throw exception row find change,hi linq sql get error row find change update t...,c# asp.net,"[0, 9]"
3,4332882,run python script php server,run nginx web server php cgi like know possibl...,php python,"[2, 7]"
4,5922133,advice write window.resize function,be try write function resize css width element...,javascript jquery,"[3, 5]"


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
import swifter
import re
query_df['Tag_Number'] =query_df['Tag_Number'].swifter.apply(lambda tags:ast.literal_eval(tags))

Pandas Apply:   0%|          | 0/47427 [00:00<?, ?it/s]

The Train set and Validation set are 20,000 rows each and the test set contains the remaining rows = 7,427.


In [None]:
X = query_df['Title']+" "+query_df['Body']
y = query_df['Tag_Number']
X_train_valid, X_test, y_train_valid, y_test = train_test_split(
    X, y, test_size = 7427, random_state=42)
X_train , X_valid, y_train, y_valid = train_test_split(
    X_train_valid, y_train_valid, test_size = 0.5, random_state=42)

In [None]:
y[0]

[0, 9]

In [None]:
print(y_train)

23159       [1, 4]
11157       [3, 5]
2701        [2, 5]
23541       [3, 5]
6214        [3, 5]
           ...    
22917       [3, 5]
30364       [0, 9]
26630    [3, 4, 5]
280         [3, 5]
7353        [3, 5]
Name: Tag_Number, Length: 20000, dtype: object


In [None]:
vectorizer = TfidfVectorizer(
    stop_words="english", min_df=4, max_features=5000).fit(X_train.values.astype('U'))

X_train_vec = vectorizer.transform(X_train.values.astype('U'))
X_valid_vec = vectorizer.transform(X_valid.values.astype('U'))
X_test_vec = vectorizer.transform(X_test.values.astype('U'))

In [None]:
y_train_bin = Binarizer.fit_transform(y_train)
y_valid_bin = Binarizer.transform(y_valid)
y_test_bin = Binarizer.transform(y_test)


In [None]:
y_train_bin[0]

array([0, 1, 0, 0, 1, 0, 0, 0, 0, 0])

Create train/valid/test datasets:

In [None]:

# Creating a tensors of data

X_train_tensor = torch.tensor(X_train_vec.toarray()).float()
X_valid_tensor = torch.tensor(X_valid_vec.toarray()).float()
X_test_tensor = torch.tensor(X_test_vec.toarray()).float()

y_train_tensor = torch.tensor(y_train_bin).float()
y_valid_tensor = torch.tensor(y_valid_bin).float()
y_test_tensor = torch.tensor(y_test_bin).float()

# Creating tensor dataset
train_set = torch.utils.data.TensorDataset(X_train_tensor, y_train_tensor)
valid_set = torch.utils.data.TensorDataset(X_valid_tensor, y_valid_tensor)
test_set = torch.utils.data.TensorDataset(X_test_tensor, y_test_tensor)

In [None]:
torch.mean(y_train_tensor,0)


tensor([0.2208, 0.1745, 0.1190, 0.5750, 0.1597, 0.5284, 0.0152, 0.0124, 0.0070,
        0.2648])

In [None]:
train_set

<torch.utils.data.dataset.TensorDataset at 0x7f454e39a3d0>

In [None]:
from torch.utils.data import Dataset, DataLoader
#check_loader = torch.utils.data.DataLoader(train_set, batch_size = 32, shuffle = True)

In [None]:
# Install wandb and update it to the latest version
%%capture
!pip install wandb --upgrade

In [None]:
# Import wandb
import wandb

# Login to W&B
wandb.login()



True

## MODEL, TRAINING FUNCTIONS:

In [None]:
print(train_set.tensors[0].shape)
import torch.nn as nn

torch.Size([20000, 5000])


Creating a model with two hidden layers:

In [None]:
# model with 2 hidden layers
model = nn.Sequential(nn.Flatten(),
                    nn.Linear(train_set.tensors[0].shape[1], 512),
                    nn.ReLU(),
                    nn.Linear(512, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10),
                    nn.Sigmoid())

I am using fbeta_score as the evaluation metric with beta = 0.2. That means the function will give more importance to recall than precision.

In [None]:
from sklearn.metrics import fbeta_score

## Train

In [None]:
def train(train_loader, model, optimizer, loss_function, log_batch, log_interval,
          grad_clipping, max_norm):

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_train
  global batch_ct_train

  # Training Loop loop
  # Initialize train_loss at the he start of the epoch
  running_train_loss = 0
  running_train_f2_sum = 0
  
  # put the model in training mode
  model.train()

  # Iterate on batches from the dataset using train_loader
  for input, targets in train_loader:
    # move inputs and outputs to GPUs
    input = input.to(device)
    targets = targets.to(device)
    # Forward pass
    output = model(input)
    loss = loss_function(output, targets)
    # Correct prediction
    y_pred = (output > 0.5).float()
    # print("OUTPut",output)
    # print ("1 at",(y_pred == 1).nonzero(as_tuple=False))

    #correct = torch.sum(y_pred == targets)

    example_ct_train +=  len(targets)
    batch_ct_train += 1

    # set gradients to zero 
    optimizer.zero_grad()

    # Backward pass
    loss.backward()

    # Gradient Clipping
    if grad_clipping:
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm, norm_type=2)

    # Update parameters using their gradient
    optimizer.step()
          
    # Add train loss of a batch 
    running_train_loss += loss.item()

    # Add Corect counts of a batch
    running_train_f2_sum += fbeta_score(y_true=targets,y_pred=y_pred,beta=2,
                                        average=None,zero_division=0).mean()

    # log batch loss and accuracy
    if log_batch:
      if ((batch_ct_train + 1) % log_interval) == 0:
        wandb.log({f"Train Batch Loss  :": loss})
        wandb.log({f"Train Batch Acc :": running_train_f2_sum})

  
  # Calculate mean train loss for the whole dataset for a particular epoch
  train_loss = running_train_loss/len(train_loader)

  # Calculate f2 for the whole dataset for a particular epoch
  train_fbeta = running_train_f2_sum/len(train_loader)                                                                 #running_train_correct/len(train_loader.dataset)

  return train_loss, train_fbeta

## Valid/ Test Epoch

In [None]:
#Valid Functions

def valid(loader, model, optimizer, loss_function, log_batch, log_interval):

  """ 
  Function for training the model and plotting the graph for train & valid loss vs epoch.
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate, batch size.
  Output: final weights, bias and train loss and valid loss for each epoch.
  """

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_valid
  global batch_ct_valid

  # Validation loop
  # Initialize train_loss at the he strat of the epoch
  running_valid_loss = 0
  running_valid_f2_sum = 0

  
  # put the model in evaluation mode
  model.eval()

  with torch.no_grad():
    for input,targets in loader:

      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)

      # Forward pass
      output = model(input)
      loss = loss_function(output,targets)

      # Correct Predictions
      y_pred = (output > 0.5).float()
      #correct = torch.sum(y_pred == targets)

      # count of images and batches
      example_ct_valid +=  len(targets)
      batch_ct_valid += 1

      # Add valid loss of a batch 
      running_valid_loss += loss.item()

      # Add  fbeta score for each batch
      running_valid_f2_sum += fbeta_score(y_true= targets, y_pred=y_pred, beta=2,
                                          average=None,zero_division=0).mean()

      # log batch loss and accuracy
      if log_batch:
        if ((batch_ct_valid + 1) % log_interval) == 0:
          wandb.log({f"Valid Batch Loss  :": loss})
          wandb.log({f"Valid Batch Accuracy :": running_valid_f2_sum})


    # Calculate mean valid loss for the whole dataset for a particular epoch
    valid_loss = running_valid_loss/len(valid_loader)

    # Calculate accuracy for the whole dataset for a particular epoch
    valid_f2_sum = running_valid_f2_sum/len(valid_loader)
    
  return valid_loss, valid_f2_sum

In [None]:
from datetime import datetime

## Model Training Loop 

In [None]:
# Model Training Loop

def train_loop(train_loader, valid_loader, model, loss_function, optimizer, 
               epochs, device, patience, early_stopping,
               file_model):

  # Create lists to store train and valid loss at each epoch

  train_loss_history = []
  valid_loss_history = []
  train_f2_history = []
  valid_f2_history = []
  delta = 0
  best_score = None
  valid_loss_min = np.Inf
  counter_early_stop=0
  early_stop=False


  # Iterate for the given number of epochs
  for epoch in range(epochs):
    t0 = datetime.now()
    # Get train loss and fbeta_score for one epoch

    train_loss, train_f2_mean = train(train_loader, model, optimizer, loss_function, 
                                  wandb.config.log_batch, wandb.config.log_interval,
                                  wandb.config.grad_clipping, wandb.config.max_norm)
    valid_loss, valid_f2_mean = valid(valid_loader, model, optimizer, loss_function,
                                    wandb.config.log_batch, wandb.config.log_interval)

    dt = datetime.now() - t0

    # Save history of the Losses and accuracy
    train_loss_history.append(train_loss)
    train_f2_history.append(train_f2_mean)
    valid_loss_history.append(valid_loss)
    valid_f2_history.append(valid_f2_mean)

    if early_stopping:
      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        counter_early_stop += 1
        print(f'Early stoping counter: {counter_early_stop} out of {patience}')
        if counter_early_stop > patience:
          early_stop = True

      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        counter_early_stop=0
        valid_loss_min = valid_loss

      if early_stop:
        print('Early Stopping')
        break

    else:

      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        print(f'Validation loss has not decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Not Saving Model...')
      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss


    # Log the train and valid loss to W&B
    wandb.log({f"Train epoch Loss :": train_loss, f"Valid epoch Loss :": valid_loss })
    wandb.log({f"Train epoch fbeta :": train_f2_mean, f"Valid epoch fbeta :": valid_f2_mean})


    # Print the train loss and accuracy for given number of epochs, batch size and number of samples
    print(f'Epoch : {epoch+1} / {epochs}')
    print(f'Time to complete {epoch+1} is {dt}')
    # print(f'Learning rate: {scheduler._last_lr[0]}')
    print(f'Train Loss: {train_loss : .4f} | Train fbeta: {train_f2_mean * 100 : .4f}%')
    print(f'Valid Loss: {valid_loss : .4f} | Valid fbeta: {valid_f2_mean * 100 : .4f}%')
    print()
    torch.cuda.empty_cache()

  return train_loss_history, train_f2_history, valid_loss_history, valid_f2_history


In [None]:

save_model_folder = Path('/content/drive/MyDrive/Colab_Notebooks/embeddings_NN/Saved_models')

## META DATA

In [None]:
hyperparameters = dict(
    epochs = 75,
    
    batch_size = 128,
    learning_rate = 1.0,
    dataset="IMDB",
    architecture="2_hidden_layers_512_256",
    log_interval = 25,
    log_batch = True,
    file_model = save_model_folder/'2_hidden_layers_512_128.imdb.pt',
    grad_clipping = False,
    max_norm = 0,
    momentum = 0,
    patience = 10,
    early_stopping = True,
    scheduler_factor = 0,
    scheduler_patience = 0,
    weight_decay = 0
   )

# non_linearity = F.elu 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [None]:
# initialize wandb

wandb.init(name = 'nn_mtl', project = 'NN_Training', config = hyperparameters)


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Batch Acc :,▂▅▁▅▁▅▂▆▁▆▁▅▂▇▂▆▂▇▃▇▂▆▂▇▃▇▂▇▃█▃▇▃▇▄█▃▇▃▇
Train Batch Loss :,▅▄▆▆▃▄█▅▅▅▁▅▁▄▄▄▅▇▇▅▄▃▇▃▄▃▅▆▅▄▆▃▅▁▄▄▅▆▄▅
Train epoch Loss :,█▃▂▂▂▂▂▂▁▂▂▁▂▁▁▁▁▁▁
Train epoch fbeta :,▁▄▆▆▆▅▇▇▆▇▇▆▇▇▇▇███
Valid Batch Accuracy :,▂▆▁▁▁▃▂▄▂▄▂▆▂▄▂▆▂▇▃▇▃▇▂█▃▇▃▇▄█▃▇▃▇▄█▄█▃▇
Valid Batch Loss :,█▄██▃▅▅▄▂▃▃▂▆▂▂▂▇▂▂▇▄▂▃▁▂▃▁▂▄▅▄▅█▃▃▅▂▄▃▁
Valid epoch Loss :,▂█▄▃▁▁▁▁▁▂▂▁▂▂▄▃▂▁▂
Valid epoch fbeta :,█▁▅▅▅█▅████████████

0,1
Train Batch Acc :,23.63589
Train Batch Loss :,0.39187
Train epoch Loss :,0.394
Train epoch fbeta :,0.16747
Valid Batch Accuracy :,24.20123
Valid Batch Loss :,0.39801
Valid epoch Loss :,0.3995
Valid epoch fbeta :,0.17161


In [None]:
# wandb.config.non_linearity = non_linearity
wandb.config.device = device
print(wandb.config.device )

cpu


## Specify Dataloader

In [None]:
# Fix seed value
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Data Loader
train_loader = torch.utils.data.DataLoader(train_set, batch_size=wandb.config.batch_size,
                                           shuffle = True)
valid_loader = torch.utils.data.DataLoader(valid_set, batch_size=wandb.config.batch_size, 
                                           shuffle = False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size=wandb.config.batch_size, 
                                          shuffle = False)

# cross entropy loss function
loss_function = nn.BCELoss()

# put model to GPUs
model.to(wandb.config.device)

# Intialize stochiastic gradient descent optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = wandb.config.learning_rate)

wandb.config.optimizer = optimizer

## TRAIN MODEL

In [None]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_f2_history, valid_loss_history, valid_f2_history = train_loop(
    train_loader, valid_loader, model, loss_function, optimizer, 
    wandb.config.epochs, wandb.config.device, wandb.config.patience, 
    wandb.config.early_stopping, wandb.config.file_model)

Validation loss has decreased (inf --> 0.392123). Saving Model...
Epoch : 1 / 75
Time to complete 1 is 0:00:08.597030
Train Loss:  0.4194 | Train fbeta:  17.1775%
Valid Loss:  0.3921 | Valid fbeta:  17.1613%

Validation loss has decreased (0.392123 --> 0.375178). Saving model...
Epoch : 2 / 75
Time to complete 2 is 0:00:08.565524
Train Loss:  0.3860 | Train fbeta:  17.1977%
Valid Loss:  0.3752 | Valid fbeta:  17.4149%

Validation loss has decreased (0.375178 --> 0.273443). Saving model...
Epoch : 3 / 75
Time to complete 3 is 0:00:08.537946
Train Loss:  0.3204 | Train fbeta:  19.0469%
Valid Loss:  0.2734 | Valid fbeta:  25.5090%

Validation loss has decreased (0.273443 --> 0.254962). Saving model...
Epoch : 4 / 75
Time to complete 4 is 0:00:08.584655
Train Loss:  0.2672 | Train fbeta:  30.1462%
Valid Loss:  0.2550 | Valid fbeta:  32.6016%

Validation loss has decreased (0.254962 --> 0.229659). Saving model...
Epoch : 5 / 75
Time to complete 5 is 0:00:08.588860
Train Loss:  0.2450 | Trai

## Get predictions

In [None]:
# Evaluation Metric used = Fbeta measure where beta = 2.0

def get_fbeta_pred(data_loader, model):
  """ 
  Function to get predictions for a given test set and calculate accuracy.
  """
  with torch.no_grad():
    # Array to store predicted labels
    predictions = torch.Tensor()
    predictions = predictions.to(device)

    # Array to store actual labels
    y = torch.Tensor()
    y = y.to(device)
    model.eval()

    # Iterate over batches from test set
    for input, targets  in data_loader:
      
      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)
      # Calculated the predicted labels
      output = model(input)

      # Convert probabilities into labels 0 or 1
      preds_batch = (output > 0.5).float()

      # Add the predicted labels in this batch to the predictions array
      predictions = torch.cat((predictions, preds_batch)) 

      # Add the actual labels to the y array
      y = torch.cat((y, targets)) 

    # Check for complete dataset if actual and predicted labels are same or not
    # Calculate accuracy
    test_f2_sum = fbeta_score(y_true= y, y_pred=predictions, beta=2, average=None,
                              zero_division=0).mean()

  # Return array containing predictions and accuracy
  return predictions, test_f2_sum

## Load the saved model

In [None]:
model_load = nn.Sequential(nn.Flatten(),
                    nn.Linear(train_set.tensors[0].shape[1], 512),
                    nn.ReLU(),
                    nn.Linear(512, 256),
                    nn.ReLU(),
                    nn.Linear(256, 10),
                    nn.Sigmoid())
model_load.to(device)
model_loaded_dict = torch.load(save_model_folder/'2_hidden_layers_512_128.imdb.pt')
model_load.load_state_dict(model_loaded_dict)



<All keys matched successfully>

In [None]:
train_set.tensors[0].shape[1]

5000

In [None]:
model_load

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=5000, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=256, bias=True)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=10, bias=True)
  (6): Sigmoid()
)

In [None]:
predictions, fbeta_test = get_fbeta_pred(test_loader, model_load)

In [None]:
fbeta_test

0.6821460861804313

F2 test score is 0.68

# EmbeddingBag

In [None]:
import torch.nn as nn
import torch.nn.functional as F
from torchtext.vocab import Vocab, vocab
from torchtext.vocab import build_vocab_from_iterator
from collections import Counter, OrderedDict
from spacy.matcher import Matcher
from spacy.tokens import Token

Creating Custom Dataset:

In [None]:
class CustomDataset(torch.utils.data.Dataset):

    def __init__(self, X, y):
        self.X = X
        self.y = y

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        if torch.is_tensor(idx):
            idx = idx.tolist()

        text = self.X[idx]
        text = np.array(text)
        labels = self.y[idx]
        labels = np.array(labels)
        sample = (labels,text)
        
        return sample

In [None]:
X_train.reset_index(drop=True,inplace=True)
X_valid.reset_index(drop=True,inplace=True)
X_test.reset_index(drop=True,inplace=True)
y_train.reset_index(drop=True,inplace=True)
y_valid.reset_index(drop=True,inplace=True)
y_test.reset_index(drop=True,inplace=True)

In [None]:
X_train

0        get info user connect tether get datum connect...
1        change input upper case js   < script type="te...
2        receive firebug console response stuff ajax ex...
3        use .size work project help work follow code p...
4        youtube player play ff 7.0 run problem ff 7.0 ...
                               ...                        
19995    add active class main tab list part(2 okay nee...
19996    string recognize valid date time c try possibl...
19997    flip counter jquery android jquery display fli...
19998    work dom ajax function $ body').on('click .kat...
19999    jquery extend access superclass way access sup...
Length: 20000, dtype: object

In [None]:
trainset = CustomDataset(X_train,y_train_tensor)
validset = CustomDataset(X_valid,y_valid_tensor)
testset = CustomDataset(X_test,y_test_tensor)


In [None]:
y_train

0           [1, 4]
1           [3, 5]
2           [2, 5]
3           [3, 5]
4           [3, 5]
           ...    
19995       [3, 5]
19996       [0, 9]
19997    [3, 4, 5]
19998       [3, 5]
19999       [3, 5]
Name: Tag_Number, Length: 20000, dtype: object

In [None]:
trainset.__getitem__([0,10])

(array([[0., 1., 0., 0., 1., 0., 0., 0., 0., 0.],
        [1., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32),
 array(['get info user connect tether get datum connected user tether user datum dnsmasq.lease file find user download datum regard',
        'want use default configuration smtp   configuration section like encrypt password   configuration section decryption function smtpclient instantiate configuration information run decryption function password thank tip'],
       dtype=object))

In [None]:
trainset.__len__()

20000

Create Vocabulary

In [None]:
counter = Counter()
for index in range(trainset.__len__()):
 # print(labels)
  labels, text = trainset.__getitem__(index)
  counter.update(str(text).split())

In [None]:
counter

Counter({'get': 3474,
         'info': 528,
         'user': 6435,
         'connect': 279,
         'tether': 5,
         'datum': 4270,
         'connected': 11,
         'dnsmasq.lease': 1,
         'file': 6711,
         'find': 2923,
         'download': 632,
         'regard': 161,
         'change': 4109,
         'input': 5858,
         'upper': 40,
         'case': 1841,
         'js': 1895,
         '<': 69922,
         'script': 6071,
         'type="text': 3503,
         'css': 1008,
         '>': 76529,
         '$': 38583,
         'function': 15104,
         "upper').keyup(function": 2,
         'this.value': 80,
         '=': 68076,
         'this.value.touppercase': 2,
         '/script': 1866,
         'html': 4286,
         'div': 10472,
         'id="search': 14,
         'type="radio': 447,
         'name="table': 7,
         'class="table': 20,
         'value="professor': 3,
         'tabindex="1': 26,
         'professor': 11,
         'value="department': 2,
  

In [None]:
#Sorting the words based on their frequency and creating OrderedDict from it in descending order
sorted_by_freq_tuples = sorted(counter.items(), key=lambda x: x[1], reverse=True)
ordered_dict = OrderedDict(sorted_by_freq_tuples)
vocab_dict = vocab(counter, min_freq=4)

In [None]:
vocab_dict.insert_token('<unk>', 0)
vocab_dict.set_default_index(0)

In [None]:
len(vocab_dict.get_stoi())

22962

## Collate function

In [None]:
# Creating a lambda function objects that will be used to get the indices of words from vocab
text_pipeline = lambda x: [vocab_dict[token] for token in str(x).split()]
label_pipeline = lambda x: x

In [None]:
def collate_batch(batch):
    label_list, text_list, offsets = [], [], [0]
    for (_label, _text) in batch:
         label_list.append(label_pipeline(_label))
         processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
         text_list.append(processed_text)
         offsets.append(processed_text.size(0))
    label_list = torch.tensor(np.array(label_list), dtype=torch.int64)
    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
    text_list = torch.cat(text_list)
    return  text_list, label_list, offsets

In [None]:
# check dataloader
batch_size=3
check_loader= torch.utils.data.DataLoader(dataset=trainset,
                                        batch_size=batch_size,
                                        shuffle=True,
                                        collate_fn=collate_batch,
                                        num_workers=2)

In [None]:
for text, label, offsets in check_loader:
  print(label, text, offsets)
  break

tensor([[0, 0, 0, 1, 0, 0, 0, 0, 0, 1],
        [1, 0, 0, 1, 0, 0, 0, 0, 0, 1],
        [0, 0, 0, 1, 0, 1, 0, 0, 0, 0]]) tensor([  301,   315,     8,   285,  5106,     3,   256,    79,  1368,   350,
         6693, 11227,   147,    18,  1368,  1411,  1406,  1127,   588,   252,
         4783,   354,  7534,  1368, 11227,   207,    55,   166,   368,   166,
          233,  1360,   166,   901,     3,  7347,  7348,   869,  7349,  7350,
         4524,  7351, 13945,  7353,  2080,  7354,  7355,  2080,  7354,     0,
         2080,  7354,  7356,     0,   437,     0,  4148,  9358,   689,  4762,
         4164,     0,  6255,   644,   186,   166,   677,   211,  1115,   130,
           56,    89,   545,     0,   644,   186,   166,   677,   211,  1115,
          130,    56,    89,   545,     0,  1179, 14096,  1486,    93,   285,
            0,   315,     3,  1731,  1788,   314,   526,  2078,   875,  1038,
            0,     0,     8,   354,    18,  1368,   233,   256,   632,  2595,
          365,   632,

## MLP Custom

In [None]:
class MLPCustom(nn.Module):
  def __init__(self, input_dim, vocab_size, hidden_dim1, hidden_dim2, output_dim, 
               non_linearity):

    self.input_dim = input_dim
    self.hidden_dim1 = hidden_dim1
    self.hidden_dim2 = hidden_dim2
    self.output_dim = output_dim
    self.vocab_size = vocab_size

    self.non_linearity = non_linearity

    super().__init__()

    self.embedding = nn.EmbeddingBag(num_embeddings= vocab_size, embedding_dim= input_dim)

    # hidden layer1
    self.hidden_layer1 = nn.Linear(self.input_dim, self.hidden_dim1)
    #torch.nn.init.normal_(self.hidden_layer1.weight, mean=0, std=0.01)
    #torch.nn.init.zeros_(self.hidden_layer.bias)

    # hideen layer2
    self.hidden_layer2 = nn.Linear(self.hidden_dim1, self.hidden_dim2)
    
    # output layer
    self.output_layer = nn.Linear(self.hidden_dim2, self.output_dim)

    # nonlinearity
    # relu = nn.RELU()


  def forward(self, input, offsets):
    embedOut = self.embedding(input,offsets)
    hout1 = self.non_linearity(self.hidden_layer1(embedOut))
    hout2 = self.non_linearity(self.hidden_layer2(hout1))
    ypred = self.output_layer(hout2)
    sigmoid = nn.Sigmoid()
    ypredSigmoid = sigmoid(ypred)
    return ypredSigmoid

## Train 2

In [None]:
def train2(train_loader, model, optimizer, loss_function, log_batch, log_interval, 
           grad_clipping, max_norm):

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_train
  global batch_ct_train

  # Training Loop loop
  # Initialize train_loss at the he start of the epoch
  running_train_loss = 0
  running_train_f2_sum = 0
  
  # put the model in training mode
  model.train()

  # Iterate on batches from the dataset using train_loader
  for input, targets, offsets in train_loader:
    # move inputs and outputs to GPUs
    input = input.to(device)
    targets = targets.to(device)
    offsets = offsets.to(device)
    # Forward pass
    output = model(input, offsets)
    loss = loss_function(output.float(), targets.float())
    # Correct prediction
    y_pred = (output > 0.5).float()


    example_ct_train +=  len(targets)
    batch_ct_train += 1

    # set gradients to zero 
    optimizer.zero_grad()

    # Backward pass
    loss.backward()

    # Gradient Clipping
    if grad_clipping:
      nn.utils.clip_grad_norm_(model.parameters(), max_norm=max_norm, norm_type=2)

    # Update parameters using their gradient
    optimizer.step()
          
    # Add train loss of a batch 
    running_train_loss += loss.item()

    # Add Corect counts of a batch
    running_train_f2_sum += fbeta_score(y_true=targets,y_pred=y_pred,beta=2,average=None,zero_division=0).mean()

    # log batch loss and accuracy
    if log_batch:
      if ((batch_ct_train + 1) % log_interval) == 0:
        wandb.log({f"Train Batch Loss  :": loss})
        wandb.log({f"Train Batch Acc :": running_train_f2_sum})

  
  # Calculate mean train loss for the whole dataset for a particular epoch
  train_loss = running_train_loss/len(train_loader)

  # Calculate f2 for the whole dataset for a particular epoch
  train_fbeta = running_train_f2_sum/len(train_loader)                                                                 

  return train_loss, train_fbeta

## Valid 2

In [None]:
#Valid Functions

def valid2(loader, model, optimizer, loss_function, log_batch, log_interval):

  """ 
  Function for training the model and plotting the graph for train & valid loss vs epoch.
  Input: iterator for train dataset, initial weights and bias, epochs, learning rate, batch size.
  Output: final weights, bias and train loss and valid loss for each epoch.
  """

  # initilalize variables as global
  # these counts will be updated every epoch
  global example_ct_valid
  global batch_ct_valid

  # Validation loop
  # Initialize train_loss at the he strat of the epoch
  running_valid_loss = 0
  running_valid_f2_sum = 0

  
  # put the model in evaluation mode
  model.eval()

  with torch.no_grad():
    for input,targets, offsets in loader:

      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)
      offsets = offsets.to(device)
      # Forward pass
      output = model(input, offsets)
      loss = loss_function(output.float(),targets.float())

      # Correct Predictions
      y_pred = (output > 0.5).float()
      #correct = torch.sum(y_pred == targets)

      # count of images and batches
      example_ct_valid +=  len(targets)
      batch_ct_valid += 1

      # Add valid loss of a batch 
      running_valid_loss += loss.item()

      # Add  fbeta score for each batch
      running_valid_f2_sum += fbeta_score(y_true= targets, y_pred=y_pred, beta=2, average=None,zero_division=0).mean()

      # log batch loss and accuracy
      if log_batch:
        if ((batch_ct_valid + 1) % log_interval) == 0:
          wandb.log({f"Valid Batch Loss  :": loss})
          wandb.log({f"Valid Batch Accuracy :": running_valid_f2_sum})


    # Calculate mean valid loss for the whole dataset for a particular epoch
    valid_loss = running_valid_loss/len(valid_loader)

    # Calculate accuracy for the whole dataset for a particular epoch
    valid_f2_sum = running_valid_f2_sum/len(valid_loader)
    
  return valid_loss, valid_f2_sum

## Train loop 2

In [None]:
# Model Training Loop

def train_loop2(train_loader, valid_loader, model, loss_function, optimizer, epochs, device, patience, early_stopping,
               file_model):

  # Create lists to store train and valid loss at each epoch

  train_loss_history = []
  valid_loss_history = []
  train_f2_history = []
  valid_f2_history = []
  delta = 0
  best_score = None
  valid_loss_min = np.Inf
  counter_early_stop=0
  early_stop=False


  # Iterate for the given number of epochs
  for epoch in range(epochs):
    t0 = datetime.now()
    # Get train loss and fbeta_score for one epoch

    train_loss, train_f2_mean = train2(train_loader, model, optimizer, loss_function, 
                                  wandb.config.log_batch, wandb.config.log_interval,
                                  wandb.config.grad_clipping, wandb.config.max_norm)
    valid_loss, valid_f2_mean = valid2(valid_loader, model, optimizer, loss_function,
                                    wandb.config.log_batch, wandb.config.log_interval)

    dt = datetime.now() - t0

    # Save history of the Losses and accuracy
    train_loss_history.append(train_loss)
    train_f2_history.append(train_f2_mean)
    valid_loss_history.append(valid_loss)
    valid_f2_history.append(valid_f2_mean)

    if early_stopping:
      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        counter_early_stop += 1
        print(f'Early stoping counter: {counter_early_stop} out of {patience}')
        if counter_early_stop > patience:
          early_stop = True

      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        counter_early_stop=0
        valid_loss_min = valid_loss

      if early_stop:
        print('Early Stopping')
        break

    else:

      score = -valid_loss
      if best_score is None:
        best_score=score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving Model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss

      elif score < best_score + delta:
        print(f'Validation loss has not decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Not Saving Model...')
      
      else:
        best_score = score
        print(f'Validation loss has decreased ({valid_loss_min:.6f} --> {valid_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), file_model)
        valid_loss_min = valid_loss


    # Log the train and valid loss to W&B
    wandb.log({f"Train epoch Loss :": train_loss, f"Valid epoch Loss :": valid_loss })
    wandb.log({f"Train epoch fbeta :": train_f2_mean, f"Valid epoch fbeta :": valid_f2_mean})


    # Print the train loss and accuracy for given number of epochs, batch size and number of samples
    print(f'Epoch : {epoch+1} / {epochs}')
    print(f'Time to complete {epoch+1} is {dt}')
    # print(f'Learning rate: {scheduler._last_lr[0]}')
    print(f'Train Loss: {train_loss : .4f} | Train fbeta: {train_f2_mean * 100 : .4f}%')
    print(f'Valid Loss: {valid_loss : .4f} | Valid fbeta: {valid_f2_mean * 100 : .4f}%')
    print()
    torch.cuda.empty_cache()

  return train_loss_history, train_f2_history, valid_loss_history, valid_f2_history


## META DATA

In [None]:
hyperparameters2 = dict(
    embed_dim = 500,
    vocab_size = len(vocab_dict),
    output_dim = 10,
    hidden_dim1 = 300,
    hidden_dim2 = 300,
    epochs = 50,
    
    batch_size = 128,
    learning_rate = 0.8,
    dataset="Query",
    architecture="embedbag_2_hidden_layer",
    log_interval = 25,
    log_batch = True,
    file_model = save_model_folder/'embedbag_2_hidden_layer_500_300.hw2.pt',
    grad_clipping = False,
    max_norm = 0,
    momentum = 0,
    patience = 10,
    early_stopping = True,
    scheduler_factor = 0,
    scheduler_patience = 0,
    weight_decay = 0
   )

# non_linearity = F.elu 
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
non_linearity = F.relu

In [None]:
#initialize wandb

wandb.init(name = 'MLP_embed_500_300', project = 'NLP_NN_Training', config = hyperparameters2)

VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
Train Batch Acc :,▂▁▂▃▁▃▄▂▄▆▁▃▅▇▄▆▁▃▅▇▄▆▁▃▅▇▂▇▁▄▆█▂▇▂▄▆█▂▄
Train Batch Loss :,██▇▅▄▄▄▃▂▃▃▃▂▂▃▂▂▂▂▂▂▂▂▂▂▁▁▂▁▂▁▂▁▁▂▁▁▁▁▁
Train epoch Loss :,█▇▆▅▅▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁▁▁▂▁▁▁▁▁▁
Train epoch fbeta :,▁▁▁▃▃▄▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇▇███▇██████
Valid Batch Accuracy :,▂▁▂▄▁▃▅▂▄▆▁▃▅▇▄▆▁▃▅▇▄▇▁▃▅▇▂▇▁▃▆█▂▇▁▄▆█▂▅
Valid Batch Loss :,██▅▅▃▄▂▂▅▃▃▃▂▃▂▂▂▂▂▂▂▁▂▂▆▁▂▁▃▂▂▂▁▂▁▁▂▂▂▂
Valid epoch Loss :,██▅▅▄▃▃▂▄▂▂▂▂▂▂▂▁▁▁▁▂▁▃▁▁▅▁▁▄▁▁▁▁▁▁▁▁▁▁▁
Valid epoch fbeta :,▁▁▂▃▄▅▅▆▆▇▇▆▇▇▇▇▇▇▇▇▇▇▆▇▇▆▇▇▇███████████

0,1
Train Batch Acc :,111.76005
Train Batch Loss :,0.0518
Train epoch Loss :,0.05441
Train epoch fbeta :,0.73444
Valid Batch Accuracy :,105.25118
Valid Batch Loss :,0.16701
Valid epoch Loss :,0.11909
Valid epoch fbeta :,0.68275


In [None]:
wandb.config.device = device
print(wandb.config.device )
wandb.config.non_linearity = non_linearity
print(wandb.config.non_linearity)

cpu
torch.nn.functional.relu


## DataLoader, Loss Function, Optimizer

In [None]:
# Fix seed value
SEED = 2345
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

# Data Loader
train_loader2 = torch.utils.data.DataLoader(trainset, batch_size=wandb.config.batch_size, 
                                           shuffle = True, collate_fn=collate_batch, num_workers=2)
valid_loader2 = torch.utils.data.DataLoader(validset, batch_size=wandb.config.batch_size, 
                                           shuffle = False, collate_fn=collate_batch, num_workers=2)
test_loader2 = torch.utils.data.DataLoader(testset, batch_size=wandb.config.batch_size,   
                                          shuffle = False, collate_fn=collate_batch, num_workers=2)

# cross entropy loss function
loss_function = nn.BCELoss()

# model 
model = MLPCustom(wandb.config.embed_dim, wandb.config.vocab_size, 
                  wandb.config.hidden_dim1, wandb.config.hidden_dim2, 
                  wandb.config.output_dim, non_linearity)


# def init_weights(m):
#   if type(m) == nn.Linear:
#         torch.nn.init.kaiming_normal_(m.weight)
#         torch.nn.init.zeros_(m.bias)

# put model to GPUs
model.to(device)

# Intialize stochiastic gradient descent optimizer
optimizer = torch.optim.SGD(model.parameters(), lr = wandb.config.learning_rate)

wandb.config.optimizer = optimizer


In [None]:
wandb.watch(model, log = 'all', log_freq=25, log_graph=True)

[34m[1mwandb[0m: logging graph, to disable use `wandb.watch(log_graph=False)`


[<wandb.wandb_torch.TorchGraph at 0x7f4550562b10>]

In [None]:
example_ct_train, batch_ct_train, example_ct_valid, batch_ct_valid = 0, 0, 0, 0
train_loss_history, train_acc_history, valid_loss_history, valid_acc_history = train_loop2(
    train_loader2, valid_loader2, model, loss_function, optimizer, 
    wandb.config.epochs, wandb.config.device,
    wandb.config.patience, wandb.config.early_stopping, wandb.config.file_model)

Validation loss has decreased (inf --> 0.255653). Saving Model...
Epoch : 1 / 50
Time to complete 1 is 0:00:20.698581
Train Loss:  0.3391 | Train fbeta:  21.8803%
Valid Loss:  0.2557 | Valid fbeta:  33.7025%

Validation loss has decreased (0.255653 --> 0.229001). Saving model...
Epoch : 2 / 50
Time to complete 2 is 0:00:21.178255
Train Loss:  0.2180 | Train fbeta:  44.7533%
Valid Loss:  0.2290 | Valid fbeta:  45.3354%

Validation loss has decreased (0.229001 --> 0.192592). Saving model...
Epoch : 3 / 50
Time to complete 3 is 0:00:20.818921
Train Loss:  0.1803 | Train fbeta:  52.9653%
Valid Loss:  0.1926 | Valid fbeta:  50.9257%

Validation loss has decreased (0.192592 --> 0.187221). Saving model...
Epoch : 4 / 50
Time to complete 4 is 0:00:21.802790
Train Loss:  0.1619 | Train fbeta:  55.9545%
Valid Loss:  0.1872 | Valid fbeta:  55.9227%

Validation loss has decreased (0.187221 --> 0.157558). Saving model...
Epoch : 5 / 50
Time to complete 5 is 0:00:20.976148
Train Loss:  0.1513 | Trai

## Load model

In [None]:
model_load2 = MLPCustom(wandb.config.embed_dim, wandb.config.vocab_size, wandb.config.hidden_dim1, wandb.config.hidden_dim2, wandb.config.output_dim, non_linearity)
model_load2.to(device)
model_load2.load_state_dict(torch.load(wandb.config.file_model))

<All keys matched successfully>

The Evaluation Metric used is Fbeta measure where beta = 2.0.

In [None]:

def get_fbeta_pred2(data_loader, model):
  """ 
  Function to get predictions for a given test set and calculate fbeta.
  """
  with torch.no_grad():
    # Array to store predicted labels
    predictions = torch.Tensor()
    predictions = predictions.to(device)

    # Array to store actual labels
    y = torch.Tensor()
    y = y.to(device)
    model.eval()

    # Iterate over batches from test set
    for input, targets, offsets  in data_loader:
      # move inputs and outputs to GPUs
      input = input.to(device)
      targets = targets.to(device)
      offsets = offsets.to(device)
      # Forward pass
      output = model(input, offsets)
      
      # Convert probabilities into labels 0 or 1
      preds_batch = (output > 0.5).float()

      # Add the predicted labels in this batch to the predictions array
      predictions = torch.cat((predictions, preds_batch)) 

      # Add the actual labels to the y array
      y = torch.cat((y, targets)) 

    # Check for complete dataset if actual and predicted labels are same or not
    # Calculate accuracy
    test_f2_sum = fbeta_score(y_true= y, y_pred=predictions, beta=2, average=None,
                              zero_division=0).mean()

  # Return array containing predictions and accuracy
  return predictions, test_f2_sum

In [None]:
predictions2, fbeta_test2 = get_fbeta_pred2(test_loader2, model_load2)

In [None]:
fbeta_test2

0.7411627999746758

The fbeta score for the test set is 0.74.