# Evaluation pipeline: classifier baselines

In [None]:
import torch
from pytorch_pretrained_bert import BertTokenizer,BertForMaskedLM
from sentence_transformers import SentenceTransformer
from itertools import chain
import pandas as pd
import numpy as np
import spacy
import operator
from pathlib import Path
from scipy import stats
from gensim.models.wrappers import FastText
from gensim.models import Word2Vec
from tools import animacy_evaluation,processing
import unidecode
from collections import Counter
import sklearn
import pathlib
import pickle
from tools import processing

In [None]:
from tools import classifiers

### Select classifying options

In [None]:
# Select training corpus (i.e. dataset that has been used for training
# the classifier):
training_corpus = "stories" # Options: "stories" or "combined"

# Select testing corpus (i.e. dataset to which classifier will be applied.
# Its training set will be used to tune parameters, and optimal parameters
# will be applied to its test set):
testing_corpus = "stories" # Options: "stories" or "machines19thC_animacy"

# Absolute path of the root directory of the github repository
abspath = str(Path("../").resolve()) + "/"

### Load data and classify

In [None]:
# ----------------------------------------
# Load validation dataset:
dataset_validdf = pd.read_pickle(abspath + "data/" + testing_corpus + "/train.pkl")

# ----------------------------------------
# Load test dataset:
dataset_testdf = pd.read_pickle(abspath + "data/" + testing_corpus + "/test.pkl")

In [None]:
# ----------------------------------------
# Classify with three different classifiers:
# * tfidf_svm: trainied in `train_svm_classifiers.ipynb`
# * webm_svm: trainied in `train_svm_classifiers.ipynb`
# * bert: trained in `train_bert_classifier.ipynb`
for classifier in ["tfidf_svm", "wemb_svm", "bert"]:
    
    # Classify according to the input data:
    # * targetExpression: the target expression itself, i.e. the expression
    #                     we are interested in knowing its animacy.
    # * context3wmasked: the **masked** target expression and three words
    #                    to the left and to the right.
    # * context3w: the target expression and three words to the left and right.
    for type_of_training_data in ["targetExpression", "context3wmasked", "context3w"]:
        
        exp_path = abspath + "experiments/" + testing_corpus + "/"
        Path(exp_path).mkdir(parents=True, exist_ok=True)

        # -------------------------------------------------
        # 1) Use validation set to find optimal animacy threshold per dataset:
        
        # Use classifier to return an animacy score for the sentences in the validation set:
        y_pred = classifiers.classify(abspath,classifier,dataset_validdf[type_of_training_data],type_of_training_data,training_corpus)
        y_true = [x for x in dataset_validdf['animated'].tolist()]
        
        # Each result-per-threshold is stored in the `df_results` dataframe:
        df_results = pd.DataFrame(columns = ['threshold', 'precision', 'recall', 'fscore', 'micro_fscore', 'map'])
        
        # Find the optimal threshold for animacy scores from the validation set, and store in `df_results`:
        threshold_list = list(np.arange(0, 1.05, 0.1))
        for th in threshold_list:
            th = float(round(th,2))
            precision, recall, fscore, micro_fscore,map_ = animacy_evaluation.results(y_true,y_pred,th)
            df_results = df_results.append({'threshold':th, 'precision':round(precision,3), 'recall':round(recall,3), 'fscore':round(fscore,3), 'micro_fscore':round(micro_fscore,3), 'map':round(map_,3)}, ignore_index=True)

        # Sort validation set results-per-threshold according to highest f1-score, and store:
        df_results.sort_values(by='fscore', ascending=False).to_csv(exp_path + "classifier_" + training_corpus + "_" + classifier + "_" + type_of_training_data + ".tsv", sep="\t")

        # -------------------------------------------------
        # 2) Apply classifier and best animacy threhold to test set:
        
        # Read stored best parameters:
        parameters_best = pd.read_csv(exp_path + "classifier_" + training_corpus + "_" + classifier + "_" + type_of_training_data + ".tsv", sep="\t").iloc[0]
        obs_threshold = parameters_best['threshold']

        # Apply classifier:
        y_pred = classifiers.classify(abspath,classifier,dataset_testdf[type_of_training_data],type_of_training_data,training_corpus)
        y_true = [x for x in dataset_testdf['animated'].tolist()]

        # Evaluate:
        precision, recall, fscore, micro_fscore,map_ = animacy_evaluation.results(y_true,y_pred,obs_threshold)
        
        # Print scenario:
        print("\nType of training data:", type_of_training_data)
        print("Classifier:", classifier)
        print("Corpus used to train the classifier:", training_corpus)
        print("Corpus used for parameter tuning and testing:", testing_corpus)
        print("Results:")
        print(type_of_training_data, classifier, "(t=" + str(round(obs_threshold,2)) + ") & " + str(round(precision,3)) + " & " + str(round(recall,3)) + " & " + str(round(fscore,3)) + " & " + str(round(map_,3)))