In [1]:
%load_ext autoreload
%autoreload 2

# TODO: Think about plotting subset sizes against predicted probability? (confidence)

from pprint import pprint

from sklearn.metrics import classification_report
from utils.inference import compute_metrics


DATASET_NAME = "esnli"
LABEL_SPACE = ["entailment", "neutral", "contradiction"]
MODEL_NAME = "deberta_large"
SEED = 42
POOLER = "mean_with_attention"
LAYER = 24

def evaluate(y_true, y_pred, is_multiclass: bool, prefix: str='test'):
    # Print some metrics
    testset_perfm = compute_metrics(
        y_true=y_true, y_pred=y_pred, is_multiclass=is_multiclass, prefix=prefix
    )
    pprint(testset_perfm)
    print(classification_report(y_true=y_true, y_pred=y_pred))


In [2]:
## Load Embeddings
from utils.io import (
    load_dataset_from_hf,
    load_labels_at_split,
    load_embeddings,
)
import numpy as np
train_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="train",
    pooler=POOLER,
    layer=LAYER
)

eval_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="eval",
    pooler=POOLER,
    layer=LAYER
)

test_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="test",
    pooler=POOLER,
    layer=LAYER
)

train_eval_embeddings = np.vstack([train_embeddings, eval_embeddings])

## Load Datasets and Labels
dataset = load_dataset_from_hf(dataset=DATASET_NAME)
train_labels = load_labels_at_split(dataset, "train")
eval_labels = load_labels_at_split(dataset, "eval")
train_eval_labels = np.concatenate([train_labels, eval_labels])
test_labels = load_labels_at_split(dataset, "test")

from datasets import DatasetDict, concatenate_datasets
train_eval_dataset = concatenate_datasets([dataset["train"], dataset["eval"]])
dataset_dict = DatasetDict(
    {"train": train_eval_dataset, "test": dataset["test"]}
)

In [7]:
import pickle
from sklearn.linear_model import LogisticRegression


l2 = 500
logit_clf = LogisticRegression(penalty="l2", C= 1 / l2)
logit_clf.fit(train_eval_embeddings, train_eval_labels)
predictions = logit_clf.predict(test_embeddings)


evaluate(
    y_pred=predictions, 
    y_true=test_labels, 
    is_multiclass=np.unique(test_labels).size > 2
)

# Path to save the model
model_path = 'LogisticRegression.pkl'

# Save the trained model to a .pkl file
with open(model_path, 'wb') as file:
    pickle.dump(logit_clf, file)

print(f"Model saved to {model_path}")

{'test_0_f1': 0.9241727259237276,
 'test_0_precision': 0.92376149510531,
 'test_0_recall': 0.9245843230403801,
 'test_1_f1': 0.8861586314152411,
 'test_1_precision': 0.8872625350358144,
 'test_1_recall': 0.8850574712643678,
 'test_2_f1': 0.9498379379533879,
 'test_2_precision': 0.9491054904380012,
 'test_2_recall': 0.950571516836577,
 'test_accuracy': 0.9201954397394136,
 'test_f1': 0.9200564317641189,
 'test_macro_f1': 0.9200564317641189,
 'test_macro_precision': 0.9200431735263752,
 'test_macro_recall': 0.920071103713775,
 'test_micro_f1': 0.9201954397394136,
 'test_micro_precision': 0.9201954397394136,
 'test_micro_recall': 0.9201954397394136,
 'test_precision': 0.9200431735263752,
 'test_recall': 0.920071103713775,
 'test_weighted_f1': 0.9201734304348423,
 'test_weighted_precision': 0.9201528184388009,
 'test_weighted_recall': 0.9201954397394136}
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      3368
           1       0.89     

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [None]:
from classifiers.RandomClassifier import RandomClassifier


random_clf = RandomClassifier()
random_clf.fit(train_eval_embeddings, train_eval_labels)
predictions = random_clf.predict(test_embeddings)

evaluate(
    y_pred=predictions, 
    y_true=test_labels, 
    is_multiclass=np.unique(test_labels).size > 2
)