In [1]:
%load_ext autoreload
%autoreload 2

from pprint import pprint
from sklearn.metrics import classification_report
from utils.inference import compute_metrics


DATASET_NAME = "esnli"
LABEL_SPACE = ["entailment", "neutral", "contradiction"]
MODEL_NAME = "deberta_large"
SEED = 42
POOLER = "mean_with_attention"
LAYER = 24

def evaluate(y_true, y_pred, is_multiclass: bool, prefix: str='test'):
    # Print some metrics
    testset_perfm = compute_metrics(
        y_true=y_true, y_pred=y_pred, is_multiclass=is_multiclass, prefix=prefix
    )
    pprint(testset_perfm)
    print(classification_report(y_true=y_true, y_pred=y_pred))


In [2]:
## Load Embeddings
from utils.io import (
    load_dataset_from_hf,
    load_labels_at_split,
    load_embeddings,
)
import numpy as np
train_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="train",
    pooler=POOLER,
    layer=LAYER
)

eval_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="eval",
    pooler=POOLER,
    layer=LAYER
)

test_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="test",
    pooler=POOLER,
    layer=LAYER
)

train_eval_embeddings = np.vstack([train_embeddings, eval_embeddings])

## Load Datasets and Labels
dataset = load_dataset_from_hf(dataset=DATASET_NAME)
train_labels = load_labels_at_split(dataset, "train")
eval_labels = load_labels_at_split(dataset, "eval")
train_eval_labels = np.concatenate([train_labels, eval_labels])
test_labels = load_labels_at_split(dataset, "test")

from datasets import DatasetDict, concatenate_datasets
train_eval_dataset = concatenate_datasets([dataset["train"], dataset["eval"]])
dataset_dict = DatasetDict(
    {"train": train_eval_dataset, "test": dataset["test"]}
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/samsoup/.cache/huggingface/token
Login successful




In [4]:
import pickle
from sklearn.linear_model import LogisticRegression


l2 = 1
logit_clf = LogisticRegression(penalty="l2", C= 1 / l2, max_iter=1000)
logit_clf.fit(train_eval_embeddings, train_eval_labels)
predictions = logit_clf.predict(test_embeddings)


evaluate(
    y_pred=predictions, 
    y_true=test_labels, 
    is_multiclass=np.unique(test_labels).size > 2
)

# Path to save the model
model_path = 'LogisticRegression.pkl'

# Save the trained model to a .pkl file
with open(model_path, 'wb') as file:
    pickle.dump(logit_clf, file)

print(f"Model saved to {model_path}")

{'test_0_f1': 0.9247216035634744,
 'test_0_precision': 0.9248589248589248,
 'test_0_recall': 0.9245843230403801,
 'test_1_f1': 0.8873065015479876,
 'test_1_precision': 0.8842949706880593,
 'test_1_recall': 0.8903386144765455,
 'test_2_f1': 0.9499457616612428,
 'test_2_precision': 0.9530472636815921,
 'test_2_recall': 0.9468643805993203,
 'test_accuracy': 0.9207043973941368,
 'test_f1': 0.9206579555909015,
 'test_macro_f1': 0.9206579555909015,
 'test_macro_precision': 0.9207337197428588,
 'test_macro_recall': 0.9205957727054153,
 'test_micro_f1': 0.9207043973941368,
 'test_micro_precision': 0.9207043973941368,
 'test_micro_recall': 0.9207043973941368,
 'test_precision': 0.9207337197428588,
 'test_recall': 0.9205957727054153,
 'test_weighted_f1': 0.920773251199328,
 'test_weighted_precision': 0.9208554928854882,
 'test_weighted_recall': 0.9207043973941368}
              precision    recall  f1-score   support

           0       0.92      0.92      0.92      3368
           1       0.88 

In [13]:
logit_clf.thresh

AttributeError: 'LogisticRegression' object has no attribute 'thresh'

In [3]:
from classifiers.RandomClassifier import RandomClassifier


random_clf = RandomClassifier()
random_clf.fit(train_eval_embeddings, train_eval_labels)
predictions = random_clf.predict(test_embeddings)

evaluate(
    y_pred=predictions, 
    y_true=test_labels, 
    is_multiclass=np.unique(test_labels).size > 2
)

{'test_0_f1': 0.34888922021768304,
 'test_0_precision': 0.3504043126684636,
 'test_0_recall': 0.3473871733966746,
 'test_1_f1': 0.3348751156336725,
 'test_1_precision': 0.33241505968778695,
 'test_1_recall': 0.33737185461323393,
 'test_2_f1': 0.32285050348567,
 'test_2_precision': 0.3238036047234307,
 'test_2_recall': 0.3219029966017918,
 'test_accuracy': 0.3357084690553746,
 'test_f1': 0.3355382797790085,
 'test_macro_f1': 0.3355382797790085,
 'test_macro_precision': 0.33554099235989376,
 'test_macro_recall': 0.3355540082039001,
 'test_micro_f1': 0.3357084690553746,
 'test_micro_precision': 0.3357084690553746,
 'test_micro_recall': 0.3357084690553746,
 'test_precision': 0.33554099235989376,
 'test_recall': 0.3355540082039001,
 'test_weighted_f1': 0.3357175255192449,
 'test_weighted_precision': 0.3357449176193115,
 'test_weighted_recall': 0.3357084690553746}
              precision    recall  f1-score   support

           0       0.35      0.35      0.35      3368
           1       0

In [None]:
from lightgbm import LGBMModel

num_labels = np.unique(test_labels).size
objective='binary' if num_labels == 2 else 'multiclass'
clf = LGBMModel(
    objective=objective,
    num_classes=1 if objective=="binary" else num_labels,
    learning_rate=1.0,      # Set to 1.0 for a single tree
    n_estimators=1,         # Build only one tree
    min_child_samples=20,   # minimum samples in leaf 
    n_jobs=-1,
    random_state=42
)
clf.fit(train_eval_embeddings, train_eval_labels)

prediction_probas = clf.predict(test_embeddings)
if objective == "binary":
    predictions = (prediction_probas >= 0.5).astype(int) # threshold using 0.5
else:
    predictions = np.argmax(prediction_probas, axis=1)

from pprint import pprint
from sklearn.metrics import classification_report
from utils.inference import compute_metrics

# Print some metrics
testset_perfm = compute_metrics(
    y_true=test_labels, y_pred=predictions, is_multiclass=objective=="multiclass", prefix="test"
)
pprint(testset_perfm)
print(classification_report(y_true=test_labels, y_pred=predictions))

import pickle
# Save model to file
model_filename = 'LGBM.pkl'
with open(model_filename, 'wb') as f:
    pickle.dump(clf, f)


[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 2.165868 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 261120
[LightGBM] [Info] Number of data points in the train set: 559203, number of used features: 1024
[LightGBM] [Info] Start training from score -1.096779
[LightGBM] [Info] Start training from score -1.100782
[LightGBM] [Info] Start training from score -1.098280
{'test_0_f1': 0.9010137149672034,
 'test_0_precision': 0.9047904191616767,
 'test_0_recall': 0.8972684085510689,
 'test_1_f1': 0.8536133415688697,
 'test_1_precision': 0.8486337120049124,
 'test_1_recall': 0.8586517552034794,
 'test_2_f1': 0.9260519801980198,
 'test_2_precision': 0.9274868298729471,
 'test_2_recall': 0.92462156317578,
 'test_accuracy': 0.8936278501628665,
 'test_f1': 0.8935596789113642,
 'test_macro_f1': 0.8935596789113642,
 'test_macro_precision': 0.8936369870131786,
 'test_macro_recall': 0.8935139089767761,
 'test_micro

In [5]:
from classifiers.KMeansClassifier import KMeansClassifier


clf = KMeansClassifier(algorithm='elkan', init='k-means++', n_clusters=3,random_state=42)
clf.fit(train_eval_embeddings, train_eval_labels)
predictions = clf.predict(test_embeddings)

evaluate(
    y_pred=predictions, 
    y_true=test_labels, 
    is_multiclass=np.unique(test_labels).size > 2
)

import pickle
# Save model to file
model_filename = 'LMeans.pkl'
with open(model_filename, 'wb') as f:
    pickle.dump(clf, f)

{'test_0_f1': 0.9159077109896782,
 'test_0_precision': 0.9369565217391305,
 'test_0_recall': 0.8957838479809976,
 'test_1_f1': 0.8747252747252747,
 'test_1_precision': 0.8277870216306157,
 'test_1_recall': 0.9273066169617894,
 'test_2_f1': 0.9395348837209302,
 'test_2_precision': 0.9769846564376251,
 'test_2_recall': 0.9048501699104109,
 'test_accuracy': 0.9091001628664495,
 'test_f1': 0.9100559564786277,
 'test_macro_f1': 0.9100559564786277,
 'test_macro_precision': 0.9139093999357905,
 'test_macro_recall': 0.909313544951066,
 'test_micro_f1': 0.9091001628664495,
 'test_micro_precision': 0.9091001628664495,
 'test_micro_recall': 0.9091001628664495,
 'test_precision': 0.9139093999357905,
 'test_recall': 0.909313544951066,
 'test_weighted_f1': 0.910198722369559,
 'test_weighted_precision': 0.9143745236904454,
 'test_weighted_recall': 0.9091001628664495}
              precision    recall  f1-score   support

           0       0.94      0.90      0.92      3368
           1       0.83   