In [16]:
import os, re
import torch
import pandas as pd
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from sklearn import metrics

In [17]:
class BinaryPredictionEvaluation:
    def __init__(self, model_name):
        self.model_name = model_name
        self.model_folder = f"./models/{model_name}"
        self.checkpoint = self.get_latest_checkpoint(self.model_folder)

        self.tokenizer = AutoTokenizer.from_pretrained(self.checkpoint)
        self.model = AutoModelForSequenceClassification.from_pretrained(self.checkpoint)

    def get_latest_checkpoint(self, base_folder):
        checkpoints = [d for d in os.listdir(base_folder) if os.path.isdir(os.path.join(base_folder, d)) and "checkpoint-" in d]
        checkpoint_numbers = [int(re.search(r'\d+', cp).group()) for cp in checkpoints]

        if not checkpoint_numbers:
            raise ValueError("No checkpoints found in the specified folder.")

        latest_checkpoint_number = max(checkpoint_numbers)
        latest_checkpoint = f"checkpoint-{latest_checkpoint_number}"
        return os.path.join(base_folder, latest_checkpoint)

    def evaluate(self):
        print("Evaluating", self.model_name + "!")
        df = pd.read_csv(self.model_folder + '/validation_data.csv')

        y_true = []
        y_pred = []

        for index, row in df.iterrows():
            text = row['Response']
            label = row['Level']
            inputs = self.tokenizer(text, return_tensors="pt")
            with torch.no_grad():
                logits = self.model(**inputs).logits
                predicted_class_id = logits.argmax().item()
                prediction = self.model.config.id2label[predicted_class_id]

                y_true.append(label)
                y_pred.append(prediction)
        
        encoded_y_true = [0 if label == "Low" else 1 for label in y_true]
        encoded_y_pred = [0 if label == "Low" else 1 for label in y_pred]

        # Mean accuracy
        print("Mean Accuracy:\n\t", metrics.accuracy_score(y_true, y_pred))

        # Confusion matrix
        print("Confusion Matrix:\n\t", metrics.confusion_matrix(y_true, y_pred))

        # F1 Score
        print("F1 Score:\n\t", metrics.f1_score(encoded_y_true, encoded_y_pred))

        # Precision
        print("Precision:\n\t", metrics.precision_score(encoded_y_true, encoded_y_pred))

        # Recall
        print("Recall:\n\t", metrics.recall_score(encoded_y_true, encoded_y_pred))

        # ROC AUC Score
        print("ROC AUC:\n\t", metrics.roc_auc_score(encoded_y_true, encoded_y_pred))
        
        # Cohen's Kappa Score
        print("Cohen's Kappa:\n\t", metrics.cohen_kappa_score(y_true, y_pred))

        print("\n\n")

In [18]:
# Initialize a TextClassifier instance for each dataset
Binary_evaluator = BinaryPredictionEvaluation("Binary")
Emotion_evaluator = BinaryPredictionEvaluation("Emotion")
Social_evaluator = BinaryPredictionEvaluation("Social")
Motivation_evaluator = BinaryPredictionEvaluation("Motivation")

# Train each model
Binary_evaluator.evaluate()
Emotion_evaluator.evaluate()
Social_evaluator.evaluate()
Motivation_evaluator.evaluate()


Evaluating Binary!
Mean Accuracy:
	 0.9238095238095239
Confusion Matrix:
	 [[48  3]
 [ 5 49]]
F1 Score:
	 0.923076923076923
Precision:
	 0.9056603773584906
Recall:
	 0.9411764705882353
ROC AUC:
	 0.9242919389978214
Cohen's Kappa:
	 0.8476605005440696



Evaluating Emotion!
Mean Accuracy:
	 0.8333333333333334
Confusion Matrix:
	 [[16  2]
 [ 4 14]]
F1 Score:
	 0.8421052631578948
Precision:
	 0.8
Recall:
	 0.8888888888888888
ROC AUC:
	 0.8333333333333334
Cohen's Kappa:
	 0.6666666666666667



Evaluating Social!
Mean Accuracy:
	 0.7142857142857143
Confusion Matrix:
	 [[19  0]
 [10  6]]
F1 Score:
	 0.7916666666666666
Precision:
	 0.6551724137931034
Recall:
	 1.0
ROC AUC:
	 0.6875
Cohen's Kappa:
	 0.39446366782006925



Evaluating Motivation!
Mean Accuracy:
	 0.4857142857142857
Confusion Matrix:
	 [[ 4 18]
 [ 0 13]]
F1 Score:
	 0.3076923076923077
Precision:
	 1.0
Recall:
	 0.18181818181818182
ROC AUC:
	 0.5909090909090909
Cohen's Kappa:
	 0.1416893732970026



