This notebook is intended to check some results where the validity is not 100%

In [39]:
import pandas as pd

def create_classifier_df(do_CE=False):
    # Create a list of classifiers and their base approaches
    classifiers = ["Logistic", "KNN", "SVM", "DT", "LMeans"]
    base_approaches = {
        "Logistic": ["Fast", "Slow"],
        "KNN": ["Greedy"],
        "SVM": ["Greedy"],
        "DT": ["Greedy"],
        "LMeans": ["Greedy"]
    }

    # Initialize the columns
    columns = ["Classifier", "Approach", "Coverage", "Validity", "Median Size"]

    # Create the data list
    data = []

    # Populate the data list
    for classifier in classifiers:
        for approach in base_approaches[classifier]:
            data.append([classifier, approach, "", "", ""])
            
            # Append CE fallback approach if do_CE is True
            if do_CE:
                ce_fallback_approach = f"{approach} + CE fallback"
                data.append([classifier, ce_fallback_approach, "", "", ""])

    # Create the DataFrame
    df = pd.DataFrame(data, columns=columns)
    return df

df = create_classifier_df(do_CE=False)
df

Unnamed: 0,Classifier,Approach,Coverage,Validity,Median Size
0,Logistic,Fast,,,
1,Logistic,Slow,,,
2,KNN,Greedy,,,
3,SVM,Greedy,,,
4,DT,Greedy,,,
5,LMeans,Greedy,,,


In [2]:
DATASET_NAME = "esnli"
LABEL_SPACE = ["entailment", "neutral", "contradiction"]
MODEL_NAME = "deberta_large"
SEED = 42
POOLER = "mean_with_attention"
LAYER = 24


In [9]:
## Load Embeddings
from utils.io import (
    load_dataset_from_hf,
    load_labels_at_split,
    load_embeddings,
    load_wrapperbox
)
import numpy as np


train_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="train",
    pooler=POOLER,
    layer=LAYER
)

eval_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="eval",
    pooler=POOLER,
    layer=LAYER
)

test_embeddings = load_embeddings(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    split="test",
    pooler=POOLER,
    layer=LAYER
)

train_eval_embeddings = np.vstack([train_embeddings, eval_embeddings])


## Load Datasets and Labels
dataset = load_dataset_from_hf(dataset=DATASET_NAME)
train_labels = load_labels_at_split(dataset, "train")
eval_labels = load_labels_at_split(dataset, "eval")
train_eval_labels = np.concatenate([train_labels, eval_labels])
test_labels = load_labels_at_split(dataset, "test")

from datasets import DatasetDict, concatenate_datasets
train_eval_dataset = concatenate_datasets([dataset["train"], dataset["eval"]])
dataset_dict = DatasetDict(
    {"train": train_eval_dataset, "test": dataset["test"]}
)

Token has not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /home/samsoup/.cache/huggingface/token
Login successful




In [3]:
from utils.io import load_pickle


# load_pickle("evaluate_yang_fast_compiled/esnli_deberta_large_yang_fast_0to77_is_valid_subsets.pickle")
len(load_pickle("evaluate_yang_fast_compiled/esnli_deberta_large_yang2023_fast_is_valid.pickle"))

9824

In [None]:
## Format results for Yang fast


# Use loc to set the values
df.loc[(df['Classifier'] == classifier) & (df['Approach'] == approach), 'Coverage (% identified)'] = new_coverage
df.loc[(df['Classifier'] == classifier) & (df['Approach'] == approach), 'Validity (% identified and leads to flip)'] = new_validity
df.loc[(df['Classifier'] == classifier) & (df['Approach'] == approach), 'Median Size'] = new_median_size

In [4]:
## Format metrics for KNN
from utils.io import load_json, load_pickle, load_wrapperbox
import numpy as np

wrapper_name = "KNN"
clf = load_wrapperbox(
    dataset=DATASET_NAME,
    model=MODEL_NAME,
    seed=SEED,
    pooler=POOLER,
    wrapperbox=wrapper_name
)

filename = f"{DATASET_NAME}_{MODEL_NAME}_{wrapper_name}.pickle"
flip_list = load_pickle(filename)

# (array([7035, 8141]),), pred: array([0, 0])
is_valid = load_pickle(f"{DATASET_NAME}_{MODEL_NAME}_{wrapper_name}_is_valid.pickle")
metrics = load_json(f"{DATASET_NAME}_{MODEL_NAME}_{wrapper_name}_greedy_metrics.json") 
# # load_json()
np.where(np.array(is_valid) == False)


https://scikit-learn.org/stable/model_persistence.html#security-maintainability-limitations


(array([7035, 8141]),)

In [19]:
flip_list[7035][-1]

298664

In [36]:
neigh_dists, n_neighbors = clf.kneighbors(test_embeddings[[7035, 8141]], return_distance=True, n_neighbors=train_eval_labels.size)
n_neighbors, neigh_dists

(array([[231273, 542352,  69907, ..., 154552, 522585,   3251],
        [418123, 465440, 215959, ..., 154552, 522585,   3251]]),
 array([[ 3.49112333,  3.57319655,  3.70255389, ..., 43.81287158,
         43.84857804, 43.88398069],
        [ 2.56989404,  2.5710627 ,  2.5827666 , ..., 42.79257519,
         42.8970961 , 42.91349945]]))

In [37]:
remain_neighbors = n_neighbors[0][4493:4498]
remain_neighbors, train_eval_labels[remain_neighbors], neigh_dists[0][4493:4498]

(array([123723, 180407, 523520, 109213, 141267]),
 array([0, 0, 1, 1, 1]),
 array([6.24151053, 6.24163567, 6.24172639, 6.24200721, 6.24232326]))

In [30]:
from sklearn import clone


train_mask = np.ones(train_eval_embeddings.shape[0], dtype=bool)
train_mask[flip_list[7035]] = False
reduced_embeddings = train_eval_embeddings[train_mask]
reduced_labels = train_eval_labels[train_mask]
old_pred = clf.predict(test_embeddings[7035].reshape(1, -1))[0]
new_clf = clone(clf)
new_clf.fit(reduced_embeddings, reduced_labels)
new_pred = new_clf.predict(test_embeddings[7035].reshape(1, -1))[0]


In [38]:
new_clf.kneighbors(test_embeddings[7035].reshape(1, -1), return_distance=True)

(array([[6.24151053, 6.24163567, 6.24172639, 6.24200721, 6.24232326]]),
 array([[122723, 178963, 519331, 108361, 140136]]))

In [20]:
np.where(n_neighbors[0] == 298664)

(array([4492]),)

In [26]:
from MinimalSubsetToFlipPredictions.evaluate import retrain_and_evaluate_validity

old_pred, new_pred, is_valid_subset = retrain_and_evaluate_validity(
    clf=clf,
    train_embeddings=train_eval_embeddings,
    train_labels=train_eval_labels,
    x_test=test_embeddings[7035],
    indices_to_exclude=flip_list[7035]
)

old_pred, new_pred, is_valid_subset

(0, 0, False)

In [8]:
len(flip_list[7035])

4493

In [4]:
from utils.io import load_pickle
import numpy as np

l = load_pickle("esnli_deberta_large_yang_slow_baseline.pickle")

sums = [e is not None and len(e) > 0 for e in l]
np.sum(np.array(sums))

9824