In [1]:
import numpy as np  # for handling multi-dimensional array operation
import pandas as pd  # for reading data from csv 
import statsmodels.api as sm  # for finding the p-value
from sklearn.preprocessing import MinMaxScaler  # for normalization
from sklearn.model_selection import train_test_split as tts
from sklearn.metrics import accuracy_score 
from sklearn.utils import shuffle
import matplotlib.pyplot as plt

In [2]:
def precision(label, confusion_matrix):
    col = confusion_matrix[:, label]
    return confusion_matrix[label, label] / col.sum()
    
def recall(label, confusion_matrix):
    row = confusion_matrix[label, :]
    return confusion_matrix[label, label] / row.sum()

def precision_macro_average(confusion_matrix):
    rows, columns = confusion_matrix.shape
    sum_of_precisions = 0
    for label in range(rows):
        sum_of_precisions += precision(label, confusion_matrix)
    return sum_of_precisions / rows

def recall_macro_average(confusion_matrix):
    rows, columns = confusion_matrix.shape
    sum_of_recalls = 0
    for label in range(columns):
        sum_of_recalls += recall(label, confusion_matrix)
    return sum_of_recalls / columns

In [3]:
parent_path = '/home/tushar/accuracy_final_new_'

In [4]:
pred_col = 'pred'
actual_col = 'actual'

In [7]:
accuracy_df = pd.DataFrame()
for division in [1,2,3,4,5]:
    accuracy_df = accuracy_df.append(pd.read_pickle(parent_path+str(division)+'.pkl'))
accuracy_df = accuracy_df.rename(columns={'predicted_lable':'pred','true_label':'actual','words':'word'})


In [8]:
print(len(accuracy_df[accuracy_df[pred_col]==accuracy_df[actual_col]])/len(accuracy_df))

0.9463079806936028


In [9]:
unique_tags = accuracy_df[actual_col].unique()

In [10]:
unique_tags

array(['DET', 'NOUN', 'VERB', 'ADP', 'ADJ', '.', 'CONJ', 'PRON', 'PRT',
       'ADV', 'NUM', 'X'], dtype=object)

In [11]:
per_pos_df = pd.DataFrame()
for tag in unique_tags:
    temp_accuracy_df = accuracy_df[accuracy_df[actual_col]==tag]
    accuracy = len(temp_accuracy_df[temp_accuracy_df[pred_col]==temp_accuracy_df[actual_col]])/len(temp_accuracy_df)
    print(tag,len(temp_accuracy_df),accuracy)
    row = {}
    row['tag'] = tag
    row['count'] = len(temp_accuracy_df)
    row['accuracy'] = accuracy
    per_pos_df = per_pos_df.append(row,ignore_index=True)

DET 136206 0.9868287740628167
NOUN 273974 0.9134990911546351
VERB 181780 0.9453240180437892
ADP 143873 0.9661437517810847
ADJ 83132 0.8916782947601405
. 146764 0.9997955901992314
CONJ 37873 0.9944023446782668
PRON 49050 0.9840774719673803
PRT 29706 0.9002558405709284
ADV 55878 0.8902788217187444
NUM 14824 0.8888963842417701
X 1376 0.3372093023255814


In [16]:
confusion_matrix = accuracy_df.groupby([pred_col,actual_col])['word'].count().unstack().fillna(0)

In [17]:
per_pos_df

Unnamed: 0,accuracy,count,tag
0,0.986829,136206.0,DET
1,0.913499,273974.0,NOUN
2,0.945324,181780.0,VERB
3,0.966144,143873.0,ADP
4,0.891678,83132.0,ADJ
5,0.999796,146764.0,.
6,0.994402,37873.0,CONJ
7,0.984077,49050.0,PRON
8,0.900256,29706.0,PRT
9,0.890279,55878.0,ADV


In [18]:
print(per_pos_df.round(2).to_latex())  

\begin{tabular}{lrrl}
\toprule
{} &  accuracy &     count &   tag \\
\midrule
0  &      0.99 &  136206.0 &   DET \\
1  &      0.91 &  273974.0 &  NOUN \\
2  &      0.95 &  181780.0 &  VERB \\
3  &      0.97 &  143873.0 &   ADP \\
4  &      0.89 &   83132.0 &   ADJ \\
5  &      1.00 &  146764.0 &     . \\
6  &      0.99 &   37873.0 &  CONJ \\
7  &      0.98 &   49050.0 &  PRON \\
8  &      0.90 &   29706.0 &   PRT \\
9  &      0.89 &   55878.0 &   ADV \\
10 &      0.89 &   14824.0 &   NUM \\
11 &      0.34 &    1376.0 &     X \\
\bottomrule
\end{tabular}



In [19]:
i = 0
prec_recall = pd.DataFrame()
for idx in confusion_matrix.index:
    row = {}
    row['tag'] = idx
    row['recall'] = rec = recall(i,np.array(confusion_matrix).T)
    row['precision'] = prec = precision(i,np.array(confusion_matrix).T)
    row['F1 score'] = 2*(rec * prec) / (rec + prec)


    i+=1
    prec_recall = prec_recall.append(row,ignore_index=True)
    

In [20]:
print(confusion_matrix.astype(int).to_latex(index=True))  

\begin{tabular}{lrrrrrrrrrrrr}
\toprule
actual &       . &    ADJ &     ADP &    ADV &   CONJ &     DET &    NOUN &    NUM &   PRON &    PRT &    VERB &    X \\
pred &         &        &         &        &        &         &         &        &        &        &         &      \\
\midrule
.    &  146734 &    277 &      25 &    124 &      3 &       0 &    4351 &    171 &      4 &     11 &     475 &  108 \\
ADJ  &       0 &  74127 &      74 &   2204 &      0 &       1 &    4956 &     78 &      2 &     39 &     826 &   68 \\
ADP  &       1 &    250 &  139002 &   1739 &     15 &     646 &    1763 &     53 &    413 &   2310 &    1199 &   62 \\
ADV  &       0 &   2423 &    2060 &  49747 &    115 &      88 &     203 &      0 &      3 &    234 &     132 &    5 \\
CONJ &       0 &      0 &     146 &    107 &  37661 &      26 &       1 &      0 &      0 &      0 &       0 &    4 \\
DET  &       1 &   2017 &     176 &    280 &     75 &  134412 &    4237 &    397 &    301 &     23 &     748 &  122 

In [21]:
print(prec_recall.round(2).to_latex(index=True))  

\begin{tabular}{lrrrl}
\toprule
{} &  F1 score &  precision &  recall &   tag \\
\midrule
0  &      0.98 &       0.96 &    1.00 &     . \\
1  &      0.90 &       0.90 &    0.89 &   ADJ \\
2  &      0.95 &       0.94 &    0.97 &   ADP \\
3  &      0.90 &       0.90 &    0.89 &   ADV \\
4  &      0.99 &       0.99 &    0.99 &  CONJ \\
5  &      0.96 &       0.94 &    0.99 &   DET \\
6  &      0.93 &       0.95 &    0.91 &  NOUN \\
7  &      0.93 &       0.97 &    0.89 &   NUM \\
8  &      0.95 &       0.91 &    0.98 &  PRON \\
9  &      0.90 &       0.90 &    0.90 &   PRT \\
10 &      0.96 &       0.97 &    0.95 &  VERB \\
11 &      0.46 &       0.70 &    0.34 &     X \\
\bottomrule
\end{tabular}



In [22]:
prec_recall.mean()

F1 score     0.900381
precision    0.920600
recall       0.891532
dtype: float64