# Reproducibility

In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import regex as re
import torch
import tqdm
from pandas.api.types import CategoricalDtype
from sklearn.metrics import accuracy_score

In [9]:
# import sentences
sentences_en_tr = pd.read_csv('./../reflection-classification/data/sentences/en/train/sentences.tsv',sep='\t')
sentences_en_val = pd.read_csv('./../reflection-classification/data/sentences/en/val/sentences.tsv',sep='\t')
sentences_en_te = pd.read_csv('./../reflection-classification/data/sentences/en/test/sentences.tsv',sep='\t')
print(f'In English\nLenght training set : {len(sentences_en_tr)}')
print(f'Lenght validation set : {len(sentences_en_val)}')
print(f'Lenght testing set : {len(sentences_en_te)}')

sentences_en_te.head()

In English
Lenght training set : 6096
Lenght validation set : 339
Lenght testing set : 339


Unnamed: 0,idx,context,sentence,y,confidence,y_requires_context
0,4196,"In the math class, examples usually count for ...","In the second, general class, the new material...",Reflection,"[5, 4]",
1,5933,The didactic circle proceeded in a similar way...,"In the second diary, they start with a problem.",Difficulty,"[2, 3]",
2,6672,I like this teacher's approach to children. He...,"In the second semester, I chose a secondary me...",Experience,"[3, 5]",
3,2832,I was very surprised to find out that at least...,"In French, therefore, he has to translate ever...",Other,"[5, 5]",
4,1040,I didn't know at all if I had days when I prac...,No one in the choir room offered me a chair.,Experience,"[4, 2]",


In [2]:
from transformers import AutoConfig, AutoModelForSequenceClassification, AutoTokenizer

LABELS = ["Other", "Belief", "Perspective", "Feeling", "Experience",
          "Reflection", "Difficulty", "Intention", "Learning"]

class NeuralClassifier:

    def __init__(self, model_path: str, uses_context: bool, device: str):
        self.config = AutoConfig.from_pretrained(model_path)
        self.device = device
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path, config=self.config).to(device)
        self.tokenizer = AutoTokenizer.from_pretrained(model_path)
        self.uses_context = uses_context

    def predict_sentence(self, sentence: str, context: str = None):
        if context is None and self.uses_context:
            raise ValueError("You need to pass in context argument, including the sentence")

        features = self.tokenizer(sentence, text_pair=context,
                                  padding="max_length", truncation=True, return_tensors='pt')
        outputs = self.model(**features.to(self.device), return_dict=True)
        argmax = outputs.logits.argmax(dim=-1).detach().cpu().tolist()[0]
        labels = LABELS[argmax]

        return labels

  from .autonotebook import tqdm as notebook_tqdm


In [28]:
# use the GPU's to speed up the code if there are any GPU's available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cpu


## Load the pretrained model from Hugging Face

In [5]:
classifier = NeuralClassifier(model_path="MU-NLPC/XLM-R-large-reflective-conf4", 
                            uses_context=False,
                            #device="cpu")
                            device=device)

# test_sentences = ["And one day I will be a real teacher and I will try to do the best I can for the children.",
#                   "I felt really well!",
#                   "gfagdhj gjfdjgh dg"]

# y_pred = [classifier.predict_sentence(sentence) for sentence in test_sentences]

# print(y_pred)

#>>> ['Intention', 'Feeling', 'Other']

['Intention', 'Feeling', 'Other']


In [27]:
# evaluation on the test dataset (~ 1min on GPU, > 20 min on cpu)
labels = sentences_en_te['y'].values
predictions = []

for idx, sentence in enumerate(sentences_en_te['sentence'][:20]):  # run on less sentences if only cpu
    predictions.append(classifier.predict_sentence(sentence))
    if (idx%30) == 0:
        print(f'{np.round(100*(idx+1)/len(sentences_en_te["sentence"][:20]),3)} % labeled')

print(predictions)

5.0 % labeled
15.0 % labeled
25.0 % labeled
35.0 % labeled
45.0 % labeled
55.0 % labeled
65.0 % labeled
75.0 % labeled
85.0 % labeled
95.0 % labeled
['Experience', 'Other', 'Experience', 'Other', 'Experience', 'Experience', 'Other', 'Reflection', 'Difficulty', 'Reflection', 'Experience', 'Reflection', 'Experience', 'Other', 'Feeling', 'Experience', 'Other', 'Other', 'Experience', 'Difficulty']


In [26]:
print(f'Accuracy : {accuracy_score(labels[:20],predictions)}')

Accuracy : 0.7142857142857143
