## Model evaluation

In [40]:
import numpy as np
import pandas as pd
from fastai.text.all import *
import torch
from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score

In [8]:
# load the model
learner = load_learner(r"..\model\classifier_model.pkl", cpu=False)

In [4]:
# Read the classifier data
fastai_text_classifier_data = torch.load(r"..\classifier_data.pkl")

In [10]:
# To know which class is in which order
# probailities returned for each prediction will be in the same order 
fastai_text_classifier_data.categorize.vocab

['Extremely Negative', 'Extremely Positive', 'Negative', 'Neutral', 'Positive']

### Training data

In [5]:
# Read csv data
train = pd.read_csv(r"../archive/Corona_NLP_train.csv", encoding='latin-1')

In [6]:
train.columns

Index(['UserName', 'ScreenName', 'Location', 'TweetAt', 'OriginalTweet',
       'Sentiment'],
      dtype='object')

In [15]:
# predict class for a text
print(train['OriginalTweet'][1])
train_pred_class = learner.predict(train['OriginalTweet'][1])
print(train_pred_class)

advice Talk to your neighbours family to exchange phone numbers create contact list with phone numbers of neighbours schools employer chemist GP set up online shopping accounts if poss adequate supplies of regular meds but not over order


('Positive', tensor(4), tensor([0.0077, 0.1782, 0.1028, 0.2581, 0.4532]))


In [26]:
# Predict probabilities for training data
train_dl = learner.dls.test_dl(train['OriginalTweet']) # instead of feeding a series of text i.e train['OriginalTweet'], we can also feed a single text like learner.dls.test_dl("this is such a nice day")
train_pred_proba = learner.get_preds(dl=train_dl)

In [27]:
train_pred_proba

(tensor([[0.0037, 0.0053, 0.0265, 0.9197, 0.0448],
         [0.0077, 0.1782, 0.1028, 0.2581, 0.4532],
         [0.0010, 0.2235, 0.0425, 0.0481, 0.6850],
         ...,
         [0.0199, 0.3096, 0.1744, 0.0234, 0.4727],
         [0.0219, 0.3463, 0.1588, 0.0264, 0.4466],
         [0.0675, 0.1622, 0.2838, 0.0610, 0.4255]]),
 None)

N.B.:- Instead of using the learner.predict we can also use the probalities to prectct the class, this is usually a more sophisticated idea when we manually want to set different probability threshold for diffrent class. In this case I am not manually setting threshold, so I will use argmax to get the index of the highest probability and get the class using fastai_text_classifier_data.categorize.vocab[index]

In [36]:
train_pred_classes = np.argmax(train_pred_proba[0], axis=1)
train_pred_classes = [fastai_text_classifier_data.categorize.vocab[index] for index in train_pred_classes]
train_pred_classes[:10]

['Neutral',
 'Positive',
 'Positive',
 'Positive',
 'Extremely Negative',
 'Positive',
 'Neutral',
 'Neutral',
 'Positive',
 'Negative']

#### training evaluation

In [44]:
# precision, recall, f1_score
print(f"precission: {precision_score(train['Sentiment'], train_pred_classes, average='weighted')}")
print(f"reacall: {recall_score(train['Sentiment'], train_pred_classes, average='weighted')}")
print(f"f1_score: {f1_score(train['Sentiment'], train_pred_classes, average='weighted')}")

precission: 0.605902180993759
reacall: 0.6062152246276453
f1_score: 0.6054566854753315
