In [1]:
import sys
import re
import os
from pathlib import Path
from collections import namedtuple
import numpy as np

### Taking data set from Seeval 2016 - task 5 subset 1 : http://alt.qcri.org/semeval2016/task5/

In [2]:
import os
import sys
import pandas as pd
import numpy as np
import pickle
from collections import Counter, defaultdict
import re
import ast
pd.set_option('display.max_colwidth' , -1)

### English - Restaurant domain training data

In [3]:
eng_multi_aspects = pd.read_csv('../data/English_restaurants.csv')
eng_multi_aspects['aspects'] = eng_multi_aspects['aspects'].apply(lambda x: ast.literal_eval(x))
eng_multi_aspects['polarities'] = eng_multi_aspects['polarities'].apply(lambda x: ast.literal_eval(x))
eng_multi_aspects.head(2)

Unnamed: 0,aspects,polarities,text
0,[RESTAURANT#GENERAL],[negative],"Judging from previous posts this used to be a good place, but not any longer."
1,[SERVICE#GENERAL],[negative],"We, there were four of us, arrived at noon - the place was empty - and the staff acted like we were imposing on them and they were very rude."


In [4]:
eng_multi_aspects.aspects.apply(pd.Series).merge(eng_multi_aspects , right_index = True , left_index = True)\
.drop(['aspects' , 'polarities'] ,axis = 1).melt(id_vars = ['text']).drop(['variable'] , axis = 1).dropna().value.value_counts(normalize = True)

FOOD#QUALITY                0.338652
SERVICE#GENERAL             0.179099
RESTAURANT#GENERAL          0.168329
AMBIENCE#GENERAL            0.101715
FOOD#STYLE_OPTIONS          0.054647
RESTAURANT#MISCELLANEOUS    0.039091
FOOD#PRICES                 0.035899
RESTAURANT#PRICES           0.031911
DRINKS#QUALITY              0.018748
DRINKS#STYLE_OPTIONS        0.012764
LOCATION#GENERAL            0.011169
DRINKS#PRICES               0.007978
Name: value, dtype: float64

### Dutch  - Restaurant domain training data

In [5]:
du_multi_aspects = pd.read_csv('../data/Dutch_restaurants.csv')
du_multi_aspects['aspects'] = du_multi_aspects['aspects'].apply(lambda x: ast.literal_eval(x))
du_multi_aspects['polarities'] = du_multi_aspects['polarities'].apply(lambda x: ast.literal_eval(x))
du_multi_aspects.head(2)

Unnamed: 0,aspects,polarities,text
0,[SERVICE#GENERAL],[negative],Lange wachttijd.
1,[FOOD#STYLE_OPTIONS],[negative],"Zelfde dessert, 2 dagen na mekaar."


### Spanish Restaurant domain training data

In [6]:
spanish_multi_aspects = pd.read_csv('../data/Spanish_restaurants.csv')
spanish_multi_aspects['aspects'] = spanish_multi_aspects['aspects'].apply(lambda x: ast.literal_eval(x))
spanish_multi_aspects['polarities'] = spanish_multi_aspects['polarities'].apply(lambda x: ast.literal_eval(x))
spanish_multi_aspects.head(2)

Unnamed: 0,aspects,polarities,text
0,[RESTAURANT#GENERAL],[positive],Nos sentimos muy a gusto.
1,"[SERVICE#GENERAL, AMBIENCE#GENERAL, FOOD#QUALITY]","[positive, positive, positive]","Buen servicio, ambiente Acogedor y tranquilo, comida bien."


In [7]:
def extract_aspects(lst_aspects):
    res = []
    for x in lst_aspects:
        entity = x.split('#')[0] ; attribute = x.split('#')[1] ; res.append(entity)       
    return res
eng_multi_aspects['aspects2'] = eng_multi_aspects['aspects'].apply(lambda x: extract_aspects(x))
eng_multi_aspects['aspects2'] = eng_multi_aspects['aspects2'].apply(lambda x : list(set(x)))

du_multi_aspects['aspects2'] = du_multi_aspects['aspects'].apply(lambda x: extract_aspects(x))
du_multi_aspects['aspects2'] = du_multi_aspects['aspects2'].apply(lambda x : list(set(x)))

spanish_multi_aspects['aspects2'] = spanish_multi_aspects['aspects'].apply(lambda x: extract_aspects(x))
spanish_multi_aspects['aspects2'] = spanish_multi_aspects['aspects2'].apply(lambda x : list(set(x)))

In [8]:
eng_multi_aspects.aspects2.apply(pd.Series).merge(eng_multi_aspects , right_index = True , left_index = True)\
.drop(['aspects' , 'polarities', 'aspects2'] ,axis = 1).melt(id_vars = ['text']).drop(['variable'] , axis = 1).dropna().value.value_counts(normalize = True).reset_index()[['index']]\
.to_csv('../data/apsect_names.txt' , header = None , index = None , mode = 'w')

### Extract aspect embeddings

In [9]:
LASER_PATH = ".."
sys.path.append(LASER_PATH + '/source')
sys.path.append(LASER_PATH + '/source/lib')

DATA_PATH = Path("../data/")
CACHE_PATH = Path("cache/")
CACHE_PATH.mkdir(exist_ok=True)
MODEL_PATH = Path("../models")

os.environ["LASER"] = LASER_PATH 
SPACE_NORMALIZER = re.compile("\s+")
Batch = namedtuple('Batch', 'srcs tokens lengths')

In [10]:
from indexing import IndexLoad, IndexTextOpen, IndexTextQuery, IndexSearchKNN, IndexCreate, IndexSearchMultiple
from embed import SentenceEncoder, EncodeLoad, EncodeFile
from text_processing import Token, BPEfastApply

#### Following steps from https://medium.com/the-artificial-impostor/multilingual-similarity-search-using-pretrained-bidirectional-lstm-encoder-e34fac5958b0 for tokenization , BPE Fast and Embedding extractions 

In [11]:
encoder = SentenceEncoder(
    str(MODEL_PATH / "bilstm.93langs.2018-12-26.pt"),
    max_sentences=None,
    max_tokens=10000,
    cpu=False)
bpe_codes = str(MODEL_PATH / "93langs.fcodes")

for lang in ("en" ,"nl", 'es'):  ##"zh" for chinese , nl  for dutch and es for spanish
    Token(  #../data/apsect_names.txt
        str(DATA_PATH / f"apsect_names.txt"), ##english_resturant.txt
        str(CACHE_PATH / f"apsect_names.txt"),
        lang=lang,
        romanize=False,
        lower_case=True, gzip=False,
        verbose=True)
    BPEfastApply(
        str(CACHE_PATH / f"apsect_names.txt"),
        str(CACHE_PATH / f"apsect_names.bpe"),
        bpe_codes,
        verbose=True, over_write=True)
    EncodeFile(
        encoder,
        str(CACHE_PATH / f"apsect_names.bpe"),
        str(CACHE_PATH / f"apsect_names.enc"),
        verbose=True, over_write=True)    

 - Tokenizer: apsect_names.txt exists already
 - Tokenizer: apsect_names.txt exists already
 - Tokenizer: apsect_names.txt exists already


In [12]:
data_aspect, index_aspect = IndexCreate(
     str(CACHE_PATH / f"apsect_names.enc"), 'FlatL2', verbose=True, save_index=False)

 - embedding: cache/apsect_names.enc 6 examples of dim 1024
 - creating FAISS index


In [13]:
data_aspect.shape

(6, 1024)

## Extract Setence Embeddings

In [15]:
LASER_PATH = ".."
sys.path.append(LASER_PATH + '/source')
sys.path.append(LASER_PATH + '/source/lib')

DATA_PATH = Path("../data/tatoeba/v1/")
CACHE_PATH = Path("cache/")
CACHE_PATH.mkdir(exist_ok=True)
MODEL_PATH = Path("../models")

os.environ["LASER"] = LASER_PATH 
SPACE_NORMALIZER = re.compile("\s+")
Batch = namedtuple('Batch', 'srcs tokens lengths')

data_en, index_en = IndexCreate(
    str(CACHE_PATH / "en_resturant.enc"), 'FlatL2', verbose=True, save_index=False)
data_du, index_du = IndexCreate(
    str(CACHE_PATH / "nl_resturant.enc"), 'FlatL2', verbose=True, save_index=False)
data_spanish, index_spanish = IndexCreate(
    str(CACHE_PATH / "es_resturant.enc"), 'FlatL2', verbose=True, save_index=False)

 - embedding: cache/en_resturant.enc 1708 examples of dim 1024
 - creating FAISS index
 - embedding: cache/nl_resturant.enc 1317 examples of dim 1024
 - creating FAISS index
 - embedding: cache/es_resturant.enc 1626 examples of dim 1024
 - creating FAISS index


##### Because dataset of semeval is not exact translation of each other , some of the above results are not good. 

### Creating multi label classification task using LASER sentence embedding. 
We can have 6 aspect categories , present for each review. We will train a simple 1 layer Neural Network model using 1024 dimensional sentence embedding as input and 6 categories as output.  
Train the model on 1700 English sentences and Validate on 1300 Dutch sentences . We are getting around 85 % accuracy and 57% f1 score(Macro) 

In [19]:
eng_multi_aspects.head(2)

Unnamed: 0,polarities,text,aspects2
0,[negative],"Judging from previous posts this used to be a good place, but not any longer.",[RESTAURANT]
1,[negative],"We, there were four of us, arrived at noon - the place was empty - and the staff acted like we were imposing on them and they were very rude.",[SERVICE]


In [17]:
eng_multi_aspects.drop(columns=[ 'aspects'] , inplace=True)
du_multi_aspects.drop(columns=[ 'aspects'] , inplace=True)
spanish_multi_aspects.drop(columns=['aspects'] , inplace=True)

Unnamed: 0,polarities,text,aspects2
0,[negative],"Judging from previous posts this used to be a good place, but not any longer.",[RESTAURANT]
1,[negative],"We, there were four of us, arrived at noon - the place was empty - and the staff acted like we were imposing on them and they were very rude.",[SERVICE]
2,[negative],"They never brought us complimentary noodles, ignored repeated requests for sugar, and threw our dishes on the table.",[SERVICE]
3,"[negative, negative]",The food was lousy - too sweet or too salty and the portions tiny.,[FOOD]
4,[negative],"After all that, they complained to me about the small tip.",[SERVICE]


In [15]:
from sklearn.model_selection import train_test_split
train_aspects , val_aspects, train_df , val_df = train_test_split(eng_multi_aspects, data_en , test_size = 0.2 , random_state = 42)

In [16]:
from sklearn.preprocessing import MultiLabelBinarizer
mlb  = MultiLabelBinarizer()
tr_eng = mlb.fit_transform(train_aspects.aspects2)
val_eng = mlb.transform(val_aspects.aspects2)
y_du  = mlb.transform(du_multi_aspects.aspects2)
y_spainish  = mlb.transform(spanish_multi_aspects.aspects2)

In [17]:
train_aspects.reset_index(inplace=True , drop= True)
train_fn = pd.merge(train_aspects , pd.DataFrame(tr_eng , columns=mlb.classes_) , left_index=True , right_index=True)

val_aspects.reset_index(inplace=True , drop= True)
val_fn = pd.merge(val_aspects , pd.DataFrame(val_eng , columns=mlb.classes_) , left_index=True , right_index=True)

train_fn.drop(columns=['aspects2' ] , inplace=True)

# train_fn.to_csv('resturant_train_eng.csv' , index = False)
# val_fn.to_csv('resturant_val_eng.csv' , index = False)

In [18]:
from sklearn.preprocessing import StandardScaler
std_scale = StandardScaler().fit(train_df)
train_std = std_scale.transform(train_df) 
val_std = std_scale.transform(val_df)
dutch_std = std_scale.transform(data_du)
spanish_std = std_scale.transform(data_spanish)

In [19]:
train_std.shape , val_std.shape , dutch_std.shape , spanish_std.shape

((1366, 1024), (342, 1024), (1317, 1024), (1626, 1024))

In [20]:
import torch 
import torch
import torch.nn as nn

x_train,y_train,x_valid,y_valid , x_test , y_test  , x_test_sp , y_test_sp = map(torch.FloatTensor, (train_std,tr_eng,  val_std ,\
                                                                            val_eng, dutch_std,y_du, \
                                                                           spanish_std ,y_spainish ))
n,c = x_train.shape
y_train = y_train.type(torch.FloatTensor)
y_valid = y_valid.type(torch.FloatTensor)
y_test = y_test.type(torch.FloatTensor)
y_test_sp = y_test_sp.type(torch.FloatTensor)

print(y_train.shape , y_valid.shape , y_test.shape)
print(x_train.shape , x_valid.shape , y_test.shape)
batch_size = 64

torch.Size([1366, 6]) torch.Size([342, 6]) torch.Size([1317, 6])
torch.Size([1366, 1024]) torch.Size([342, 1024]) torch.Size([1317, 6])


In [21]:
# model = torch.nn.Sequential(
#     torch.nn.Linear(1024, 512),
#     torch.nn.Dropout(0.25),  # drop 10% of the neuron
#     torch.nn.ReLU(),
#     torch.nn.Linear(512, 384),
#     torch.nn.Dropout(0.25),  # drop 10% of the neuron
#     torch.nn.ReLU(),
#     torch.nn.Linear(384, 6),
# )

# print(model)

class Model(nn.Module):
    def __init__(self , p):
        super().__init__()
        self.hidden = nn.Linear(1024, 512)
        self.hidden2 = nn.Linear(512 , 256)
        self.hidden3 =  nn.Linear(256 , 128)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(p)
        self.fc = nn.Linear(128, 6)

    def forward(self, x):
        x = self.activation(self.dropout(self.hidden(x)))
        x = self.activation(self.dropout(self.hidden2(x)))
        x = self.activation(self.dropout(self.hidden3(x)))
        x = self.fc(x)
        return x

In [22]:
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

train_ds = TensorDataset(x_train, y_train)
train_dl = DataLoader(train_ds, batch_size=batch_size , shuffle=True)

valid_ds = TensorDataset(x_valid, y_valid )
valid_dl = DataLoader(valid_ds , batch_size= batch_size)

test_ds = TensorDataset(x_test , y_test)
test_dl = DataLoader(test_ds , batch_size=batch_size)

test_ds2 = TensorDataset(x_test_sp , y_test_sp)
test_dl2 = DataLoader(test_ds2 , batch_size=batch_size)

In [23]:
class WrappedDataLoader():
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func
        
    def __len__(self): return len(self.dl)
    
    def __iter__(self):
        batches = iter(self.dl)
        for b in batches: yield(self.func(*b))

In [24]:
dev = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
def preprocess(x,y): return x.to(dev),y.to(dev)

train_dl = WrappedDataLoader(train_dl, preprocess)
valid_dl = WrappedDataLoader(valid_dl, preprocess)
test_dl = WrappedDataLoader(test_dl , preprocess)
test_dl2 = WrappedDataLoader(test_dl2 , preprocess)

In [25]:
df_data_ratio = train_aspects.aspects2.apply(pd.Series).merge(train_aspects , right_index = True , left_index = True)\
.drop([ 'aspects2'] ,axis = 1).melt(id_vars = ['text']).drop(['variable'] , axis = 1).dropna().value.value_counts(normalize = True).reset_index()

In [26]:
df_data_ratio

Unnamed: 0,index,value
0,FOOD,0.361111
1,RESTAURANT,0.270531
2,SERVICE,0.198671
3,AMBIENCE,0.117754
4,DRINKS,0.038043
5,LOCATION,0.013889


In [27]:

def binary_accuracy(preds, y):
    rounded_preds = torch.round(torch.sigmoid(preds))
    correct = (rounded_preds == y).float() #convert into float for division 
    acc = correct.mean() #/ (len(correct))
    return acc


def fbeta_score(y_true, y_pred, beta, threshold, eps=1e-9):
    beta2 = beta**2

    y_pred = torch.ge(torch.sigmoid(y_pred).float(), threshold).float()
    y_true = y_true.float()

    true_positive = (y_pred * y_true).sum(dim=0)
    precision = true_positive.div(y_pred.sum(dim=0).add(eps))
    recall = true_positive.div(y_true.sum(dim=0).add(eps))
    
    return torch.mean(
        (precision*recall).
        div(precision.mul(beta2) + recall + eps).
        mul(1 + beta2)) , torch.mean(precision) , torch.mean(recall)


def f1_score(y_pred,y_true, threshold=0.5):
    f1 , precision , recall = fbeta_score(y_true, y_pred, 1, threshold) #; print('f1 score' , f1)
    return f1 , precision , recall

In [28]:
def train_model(model, iterator, optimizer, criterion):
    
    epoch_loss = 0
    epoch_acc = 0  
    epoch_f1 = 0 ; epoch_precision = 0 ; epoch_recall = 0
    model.train()
    ct = 0
    for x, y in iterator:
        optimizer.zero_grad()
        predictions = model(x)
        loss = criterion(predictions, y)
        acc = binary_accuracy(predictions, y)
        f1 , precision , recall = f1_score(predictions , y)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        epoch_acc += acc.item()
        epoch_f1 += f1.item() 
        epoch_precision += precision.item()  
        epoch_recall += recall.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator) , epoch_f1/len(iterator), epoch_precision/len(iterator), epoch_recall/len(iterator)

In [29]:
def validate_model(model, iterator, criterion):
    
    epoch_loss = 0
    epoch_acc = 0 
    epoch_f1 = 0; epoch_precision = 0 ; epoch_recall = 0
    model.eval()
    with torch.no_grad():
        for x ,y  in iterator:

            predictions = model(x)#.squeeze(1)
            loss = criterion(predictions,y)
            acc = binary_accuracy(predictions, y) ; f1 , precision , recall = f1_score(predictions , y)
            epoch_loss += loss.item()
            epoch_acc += acc.item()
            epoch_f1 += f1.item()   ; epoch_precision += precision.item()  ; epoch_recall += recall.item()
    return epoch_loss / len(iterator), epoch_acc / len(iterator) , epoch_f1/len(iterator), epoch_precision/len(iterator), epoch_recall/len(iterator)

In [30]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_uniform(m.weight)
        m.bias.data.fill_(0.01)

        
weight_list =  [1/df_data_ratio[df_data_ratio['index']==c]['value'].values[0]  for c in mlb.classes_]
weights = torch.tensor( weight_list)
weights =weights.to(dev)

from torch import optim


### Apply grid search on LR , Weight Decay , Dropout parameters , save the parameters with best f1-score on validatation data .

In [31]:
import random 
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

In [34]:
best_valid_f1 = -float('inf') ; best_valid_loss = float('inf')
loss_func = nn.BCEWithLogitsLoss(weight=weights) 
loss_func = loss_func.to(dev)
for drp in [0.2, 0.3,0.4,0.5,0.6]:
    for wd in [0.1 , 0.05 , 0.01 , 0.005 , 0.001]:
        for learning_rate in [1e-2 , 5e-3 , 1e-3]:
            model = Model(drp); model.apply(init_weights)
            model = model.to(dev)
            optimizer = optim.Adam(model.parameters() , lr = learning_rate, weight_decay=wd) #[a+'_pred' for a in aspects]
            model = model.to(dev)
            epochs = 10
            for epoch in range(1, epochs + 1):
                train_loss , train_acc , train_f1 , train_precision , train_recall = train_model(model, train_dl, optimizer, loss_func)
                valid_loss , valid_acc , valid_f1 , valid_precision , valid_recall  = validate_model(model, valid_dl, loss_func)
                if (valid_loss < best_valid_loss) & (valid_f1 > best_valid_f1)  & (abs(train_f1- valid_f1) <= 0.05):
                    best_valid_f1 = valid_f1 ; best_valid_loss = valid_loss
                    print('train data' , train_acc , train_f1 , train_precision , train_recall)
                    print('valid data' , valid_acc ,  valid_f1 , valid_precision , valid_recall)


                    print("Parameters: " ,'Dropout: ' ,  drp , 'weight decay: ' ,wd ,' learning rate : ' ,learning_rate )
                    if os.path.isfile('utils/multi_label_problem.pt'):
                        os.remove('utils/multi_label_problem.pt') ; print('chk')
                                           
                    torch.save(model.state_dict(), 'utils/multi_label_problem.pt')

  This is separate from the ipykernel package so we can avoid doing imports until


train data 0.8562973656437614 0.3714965026487004 0.4546071710911664 0.3500112132592635
valid data 0.8745659987131754 0.3745238035917282 0.4289509505033493 0.37096420923868817
Parameters:  Dropout:  0.2 weight decay:  0.1  learning rate :  0.01
chk
train data 0.8555118116465482 0.38598748635162006 0.4850203611633994 0.3531473285772584
valid data 0.8849431872367859 0.41120830178260803 0.5105532705783844 0.35407356917858124
Parameters:  Dropout:  0.2 weight decay:  0.1  learning rate :  0.01
chk
train data 0.8760976439172571 0.478346202861179 0.6261945150115273 0.42765871503136377
valid data 0.8880208631356558 0.47735116879145306 0.5990719298521677 0.4179966151714325
Parameters:  Dropout:  0.2 weight decay:  0.1  learning rate :  0.005
chk
train data 0.882005436853929 0.48358452455563977 0.6040721914984963 0.4380798786878586
valid data 0.8914930721124014 0.49457883834838867 0.5916983187198639 0.46525020400683087
Parameters:  Dropout:  0.2 weight decay:  0.05  learning rate :  0.01
chk
tra

In [35]:
model.load_state_dict(torch.load('utils/multi_label_problem.pt'))
model = model.to(dev)
validate_model(model, valid_dl, loss_func)

(1.7336950500806172,
 0.9144570827484131,
 0.5815974275271097,
 0.6596053044001261,
 0.5424553056557974)

NameError: name 'model' is not defined

In [38]:
val_preds = []
val_label = []
with torch.no_grad():
    for x ,y  in valid_dl:
        predictions = model(x)
        rounded_preds = torch.round(torch.sigmoid(predictions))  #torch.round
        preds = rounded_preds.data.cpu().numpy()
        val_preds.append(preds)
        val_label.append(y.data.cpu().numpy())

from sklearn.metrics import f1_score , confusion_matrix , accuracy_score , precision_score , recall_score , roc_auc_score
print("F1 score",f1_score(np.vstack(val_label)  , np.vstack(val_preds) , average='macro' ))
print("Precision score",precision_score( np.vstack(val_label)  , np.vstack(val_preds) , average='macro' ))
print("Recall score",recall_score(np.vstack(val_label)  , np.vstack(val_preds) , average='macro' ))
print("Accuracy score" , np.mean( np.vstack(val_preds) == np.vstack(val_label)))

F1 score 0.6245610777652689
Precision score 0.7023696749724148
Recall score 0.5666044906639803
Accuracy score 0.9132553606237817


In [47]:
test_preds = []
true_label = []
with torch.no_grad():
    for x ,y  in test_dl:
        predictions = model(x)#.squeeze(1)
        rounded_preds = torch.round(torch.sigmoid(predictions))  #torch.round
        preds = rounded_preds.data.cpu().numpy()
        test_preds.append(preds)
        true_label.append(y.data.cpu().numpy())

In [48]:
aspects = mlb.classes_.tolist()
"""
Merging prediction value with original test data and observe the metrics on overall level
"""
dutch_pred = pd.DataFrame(np.vstack(test_preds) ,index=du_multi_aspects.index , columns= [a+'_pred' for a in aspects])
dutch_pred2 = pd.merge(du_multi_aspects, dutch_pred , left_index=True ,right_index = True)

from sklearn.metrics import f1_score , confusion_matrix , accuracy_score , precision_score , recall_score , roc_auc_score

print("F1 score",f1_score(y_du , dutch_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))
print("Accuracy score" , np.mean(y_du == dutch_pred2[[a+'_pred' for a in aspects]].as_matrix()))
print("Precision score",precision_score(y_du , dutch_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))
print("Recall score",recall_score(y_du , dutch_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))


F1 score 0.5464933540468624
Accuracy score 0.8764869653252341
Precision score 0.6988147708882922
Recall score 0.48241459832236133


  # Remove the CWD from sys.path while we load stuff.
  # This is added back by InteractiveShellApp.init_path()
  if sys.path[0] == '':
  del sys.path[0]


In [49]:
test_preds = []
true_label = []
with torch.no_grad():
    for x ,y  in test_dl2:
        predictions = model(x)#.squeeze(1)
        rounded_preds = torch.round(torch.sigmoid(predictions))  #torch.round
        preds = rounded_preds.data.cpu().numpy()
        test_preds.append(preds)
        true_label.append(y.data.cpu().numpy())
        
        
aspects = mlb.classes_.tolist()
"""
Merging prediction value with original test data and observe the metrics on overall level
"""
spanish_pred = pd.DataFrame(np.vstack(test_preds) ,index=spanish_multi_aspects.index , columns= [a+'_pred' for a in aspects])
spanish_pred2 = pd.merge(spanish_multi_aspects, spanish_pred , left_index=True ,right_index = True)

from sklearn.metrics import f1_score , confusion_matrix , accuracy_score , precision_score , recall_score , roc_auc_score

print("F1 score",f1_score(y_spainish , spanish_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))
print("Accuracy score" , np.mean(y_spainish == spanish_pred2[[a+'_pred' for a in aspects]].as_matrix()))
print("Precision score",precision_score(y_spainish , spanish_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))
print("Recall score",recall_score(y_spainish , spanish_pred2[[a+'_pred' for a in aspects]].as_matrix() , average='macro' ))

F1 score 0.5536945586868443
Accuracy score 0.8819188191881919
Precision score 0.7265248425337454
Recall score 0.47608903361016713




In [107]:
all_pred = dutch_pred2[dutch_pred2.columns[-6:]].as_matrix()
all_true = y_du
true_pos = (all_pred * all_true).sum(axis = 0)
precision = true_pos/all_pred.sum(axis = 0)
recall = true_pos/all_true.sum(axis = 0)
accuracy = all_pred.sum(axis = 0)/ all_true.sum(axis =0)

  """Entry point for launching an IPython kernel.


In [108]:
precision , recall

(array([0.68085106, 0.8490566 , 0.76570048, 0.20930233, 0.66349206,
        0.86646884]),
 array([0.35955056, 0.48913043, 0.57952468, 0.33333333, 0.59206799,
        0.6952381 ]))

In [109]:
(2*precision*recall / (precision + recall))

array([0.47058824, 0.62068966, 0.65972945, 0.25714286, 0.6257485 ,
       0.77146631])