In [1]:
import os

os.environ["OMP_NUM_THREADS"] = "10"  # has to be done before any package is imported

import glob
import pickle
import numpy as np
from sklearn.linear_model import LogisticRegression
import matplotlib.pyplot as plt
from tqdm import tqdm
import pandas as pd
import pandas as pd
from pathlib import Path

In [2]:
DATASET = "GoEmo"
print("## USING THE GOEMOTION DATASET ##")

# VECTOR_TYPE = "training_based"
VECTOR_TYPE = "activations"

# For a fair comparison of the ROC curves between the activation and the steering vectors we need to only use the activation vectors, where we have found steering vectors
COMPARISON_TYPE = "all"
# COMPARISON_TYPE = "fair"

## USING THE GOEMOTION DATASET ##


In [3]:
TRAINED_STEERING_VECTOR_PATH = os.getenv("TRAINED_VECTORS_PATH_GoEmo")
TRAINED_STEERING_VECTOR_FILES = glob.glob(f"./store_activations/GoEmo/*")
TRAINED_STEERING_VEC_MIN_LOSS = 5

if VECTOR_TYPE == "training_based":
    print("## LOADING TRAINED STEERING VECTORS ##")
elif VECTOR_TYPE == "activations":
    if COMPARISON_TYPE == "fair":
        print("## LOADING ACTIVATION VECTORS in the fair setting##")
    else:
        print("## LOADING ACTIVATION VECTORS ##")
else:
    print("Options for VECTOR_TYPE are -training_based- or -activations-")
    exit(-1)

## LOADING ACTIVATION VECTORS ##


In [4]:
### LOADING ACTIVATION VECTORS for train and test set
go_emo_train = pd.read_pickle("./store_activation/GoEmo/GoEmo_activations_train.pkl")
go_emo_test = pd.read_pickle("./store_activation/GoEmo/GoEmo_activations_test.pkl")

# we dont have activations for all entries
go_emo_train = [entry for entry in go_emo_train if len(entry) == 3]
go_emo_test = [entry for entry in go_emo_test if len(entry) == 3]

go_emo_train_tmp = np.array(go_emo_train, dtype=object)
go_emo_train_tmp_dic = list(go_emo_train_tmp[:, 1])
df_train_tmp = pd.DataFrame(go_emo_train_tmp_dic, columns=["text", "labels", "id"])

go_emo_test_tmp = np.array(go_emo_test, dtype=object)
go_emo_test_tmp_dic = list(go_emo_test_tmp[:, 1])
df_test_tmp = pd.DataFrame(go_emo_test_tmp_dic, columns=["text", "labels", "id"])

In [5]:
### LOADING TRAINED STEERING VECTORS
labels = [25, 17, 14, 2, 26, 11]
means = []
total_mean = []

df_goemo = pd.read_pickle("./datasets/pkl/go_emotions.pkl")
go_emo_train_steering = []
go_emo_test_steering = []

go_emo_train_actis_fair = []
go_emo_test_actis_fair = []

In [6]:


for file in tqdm(TRAINED_STEERING_VECTOR_FILES, desc="Loading trained steering vecs"):
    with open(file, "rb") as f:
        a = pickle.load(f)

        for key, value in a.items():
            target_sentence = key
            steering_vector = value[0]
            for vec_i, vec in enumerate(
                steering_vector
            ):  # the vectors were saved as tensors with device=cuda. shape is 1,4096 and therefore squeeze
                steering_vector[vec_i] = (
                    steering_vector[vec_i].detach().cpu().numpy().squeeze()
                )
            # activations = value[1]
            loss = value[2].detach().cpu().numpy().item()
            epoch = value[3]
            # gen_text = value[4]
            label = value[5]

            dsl_entry = df_goemo[df_goemo["text"] == target_sentence]

            if loss < TRAINED_STEERING_VEC_MIN_LOSS:

                if not (df_train_tmp[df_train_tmp["text"] == target_sentence]).empty:
                    found = df_train_tmp[df_train_tmp["text"] == target_sentence]
                    go_emo_train_actis_fair.append(go_emo_train[found.index[0]])
                    go_emo_train_steering.append(
                        [
                            label.item(),
                            dsl_entry.to_dict(orient="list"),
                            steering_vector,
                            loss,
                        ]
                    )

                elif not (df_test_tmp[df_test_tmp["text"] == target_sentence]).empty:
                    found = df_test_tmp[df_test_tmp["text"] == target_sentence]
                    go_emo_test_actis_fair.append(go_emo_test[found.index[0]])
                    go_emo_test_steering.append(
                        [
                            label.item(),
                            dsl_entry.to_dict(orient="list"),
                            steering_vector,
                            loss,
                        ]
                    )






Loading trained steering vecs: 0it [00:00, ?it/s]


In [18]:
# taken from https://scikit-learn.org/stable/auto_examples/model_selection/plot_roc.html
def plot_classification(
    y_train, y_test, y_score, n_classes, target_names, layer_indices
):
    from itertools import cycle
    from sklearn.preprocessing import LabelBinarizer
    from sklearn.metrics import roc_curve, auc
    from sklearn.metrics import RocCurveDisplay

    label_binarizer = LabelBinarizer().fit(y_train)
    y_onehot_test = label_binarizer.transform(y_test)
    y_onehot_test.shape  # (n_samples, n_classes)
    fig, ax = plt.subplots(figsize=(6, 6))

    # store the fpr, tpr, and roc_auc for all averaging strategies
    fpr, tpr, roc_auc = dict(), dict(), dict()
    # Compute micro-average ROC curve and ROC area
    fpr["micro"], tpr["micro"], _ = roc_curve(y_onehot_test.ravel(), y_score.ravel())
    roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])

    print(f"Micro-averaged One-vs-Rest ROC AUC score:\n{roc_auc['micro']:.2f}")

    plt.plot(
        fpr["micro"],
        tpr["micro"],
        # label=f"micro-average ROC curve (AUC = {roc_auc['micro']:.2f})",
        label=f"micro-average (AUC = {roc_auc['micro']:.2f})",
        color="deeppink",
        linestyle=":",
        linewidth=4,
    )

    colors = cycle(["aqua", "darkorange", "cornflowerblue", "red", "purple", "green"])
    for class_id, color in zip(range(n_classes), colors):
        RocCurveDisplay.from_predictions(
            y_onehot_test[:, class_id],
            y_score[:, class_id],
            # name=f"ROC curve for {target_names[class_id]}",
            name=f"{target_names[class_id]}",
            color=color,
            ax=ax,
        )

    plt.plot([0, 1], [0, 1], "k--")  # , label="ROC curve for chance level (AUC = 0.5)")
    plt.axis("square")
    plt.grid(color="lightgray", linestyle="-", linewidth=1)
    plt.xlabel("False Positive Rate", fontsize=15)
    plt.ylabel("True Positive Rate", fontsize=15)
    # plt.title("Extension of Receiver Operating Characteristic\nto One-vs-Rest multiclass")
    plt.legend(loc="lower right", fontsize=13)
    fig_name_indices = ""
    for layer_idx in layer_indices:
        fig_name_indices += f"{layer_idx}_"
    fig_name = (
        f"ROC_goemo_{fig_name_indices}steering.pdf"
        if VECTOR_TYPE == "training_based"
        else f"ROC_goemo_{fig_name_indices}actis_{COMPARISON_TYPE}.pdf"
    )
    # fig_name = f"test.pdf"
    plt.savefig(f"./images/{fig_name}")
    plt.clf()

In [19]:
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

In [20]:
# logistic regression iterating over all layers
def single_layer_classification(layer_index):
    """Training a logistic regression classifier with single layers as input.

    :param int layer_index: Which layer should be used.
    """

    if (VECTOR_TYPE == "activations") and (COMPARISON_TYPE == "all"):
        Y_train = []
        X_train = []
        for entry in go_emo_train:
            Y_train.append(labels.index(entry[1]["labels"][0]))
            X_train.append(entry[2][layer_index])

        Y_test = []
        X_test = []
        for entry in go_emo_test:
            Y_test.append(labels.index(entry[1]["labels"][0]))
            X_test.append(entry[2][layer_index])

    else:
        Y_25, Y_17, Y_14, Y_2, Y_26, Y_11 = [], [], [], [], [], []
        X_25, X_17, X_14, X_2, X_26, X_11 = [], [], [], [], [], []

        entry_list = (
            go_emo_train_steering
            if VECTOR_TYPE == "training_based"
            else go_emo_train_actis_fair
        )

        for entry in entry_list:
            class_label = entry[1]["labels"][0]

            if class_label == 25:
                Y_25.append(labels.index(entry[1]["labels"][0]))
                X_25.append(entry[2][layer_index - 18])
            elif class_label == 17:
                Y_17.append(labels.index(entry[1]["labels"][0]))
                X_17.append(entry[2][layer_index - 18])
            elif class_label == 14:
                Y_14.append(labels.index(entry[1]["labels"][0]))
                X_14.append(entry[2][layer_index - 18])
            elif class_label == 2:
                Y_2.append(labels.index(entry[1]["labels"][0]))
                X_2.append(entry[2][layer_index - 18])
            elif class_label == 26:
                Y_26.append(labels.index(entry[1]["labels"][0]))
                X_26.append(entry[2][layer_index - 18])
            elif class_label == 11:
                Y_11.append(labels.index(entry[1]["labels"][0]))
                X_11.append(entry[2][layer_index - 18])
            else:
                print(f"Didn't find {class_label}")

        X_train, X_test = [], []
        Y_train, Y_test = [], []
        split_ratio = 0.5
        for tup in [
            (X_25, Y_25),
            (X_17, Y_17),
            (X_14, Y_14),
            (X_2, Y_2),
            (X_26, Y_26),
            (X_11, Y_11),
        ]:
            end_train_idx = int(split_ratio * len(tup[0])) + 1
            X_train.extend(tup[0][0:end_train_idx])
            Y_train.extend(tup[1][0:end_train_idx])
            X_test.extend(tup[0][end_train_idx:-1])
            Y_test.extend(tup[1][end_train_idx:-1])
            
    clf = make_pipeline(StandardScaler(), 
                    LogisticRegression(multi_class="multinomial", max_iter=50000, class_weight="balanced"))
    
    # Fit the model
    clf.fit(X_train, Y_train)
    print(f"Layer {layer_index} classification score: {clf.score(X_test, Y_test)}")
    
    plot_classification(
        Y_train,
        Y_test,
        clf.predict_proba(X_test),
        6,
        ["sadness", "joy", "fear", "anger", "surprise", "disgust"],
        [layer_index],
    )

In [21]:
# logistic regression with concatenated layers, sliding window
def multi_layer_classification(num_layers=3, specific_layers=None):
    """Training a logistic regression classifier with multiple layers as input.
    Currently it only works for the activation-based vectors.

    :param int num_layers: How many layers per classifier, defaults to 3
    :param array specific_layers: Which layers should be used , defaults to None
    """

    layer_indices_list = []
    if specific_layers is not None:
        layer_indices_list = [specific_layers]
    else:
        for i in range(0, 33):
            layer_indices_list.append(np.arange(i, i + num_layers))

    for layer_indices in layer_indices_list:

        Y_train = []
        X_train = []
        for entry in go_emo_train:
            Y_train.append(labels.index(entry[1]["labels"][0]))
            entries = []
            for layer_index in layer_indices:
                entries.append(entry[2][layer_index])
            X_train.append(np.concatenate(entries))

        Y_test = []
        X_test = []
        for entry in go_emo_test:
            Y_test.append(labels.index(entry[1]["labels"][0]))
            entries = []
            for layer_index in layer_indices:
                entries.append(entry[2][layer_index])
            X_test.append(np.concatenate(entries))

        clf = LogisticRegression(
            multi_class="multinomial", max_iter=10000, class_weight="balanced"
        ).fit(X_train, Y_train)
        print(
            f"Layer {layer_indices[0]} classification score: {clf.score(X_test,Y_test)}"
        )
        plot_classification(
            Y_train,
            Y_test,
            clf.predict_proba(X_test),
            6,
            ["sadness", "joy", "fear", "anger", "surprise", "disgust"],
            layer_indices,
        )

In [22]:
#import warnings
#warnings.filterwarnings("ignore")

In [24]:
# layers = [18,19,20] # commented for tests
# 
layers = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32] 
for layer in layers:
    print(layer)
    try:
        single_layer_classification(layer)
    except Exception as e:
        print(f"Error in layer {layer}: {e}")

1




Layer 1 classification score: 0.5018050541516246
Micro-averaged One-vs-Rest ROC AUC score:
0.75
2




Layer 2 classification score: 0.4963898916967509
Micro-averaged One-vs-Rest ROC AUC score:
0.76
3




Layer 3 classification score: 0.5180505415162455
Micro-averaged One-vs-Rest ROC AUC score:
0.79
4




Layer 4 classification score: 0.5252707581227437
Micro-averaged One-vs-Rest ROC AUC score:
0.80
5




Layer 5 classification score: 0.5595667870036101
Micro-averaged One-vs-Rest ROC AUC score:
0.83
6




Layer 6 classification score: 0.5703971119133574
Micro-averaged One-vs-Rest ROC AUC score:
0.83
7




Layer 7 classification score: 0.5884476534296029
Micro-averaged One-vs-Rest ROC AUC score:
0.83
8




Layer 8 classification score: 0.5830324909747292
Micro-averaged One-vs-Rest ROC AUC score:
0.84
9




Layer 9 classification score: 0.5830324909747292
Micro-averaged One-vs-Rest ROC AUC score:
0.84
10




Layer 10 classification score: 0.5974729241877257
Micro-averaged One-vs-Rest ROC AUC score:
0.85
11




Layer 11 classification score: 0.592057761732852
Micro-averaged One-vs-Rest ROC AUC score:
0.84
12




Layer 12 classification score: 0.5649819494584838
Micro-averaged One-vs-Rest ROC AUC score:
0.83
13




Layer 13 classification score: 0.6028880866425993
Micro-averaged One-vs-Rest ROC AUC score:
0.83
14




Layer 14 classification score: 0.5884476534296029
Micro-averaged One-vs-Rest ROC AUC score:
0.84
15




Layer 15 classification score: 0.5848375451263538
Micro-averaged One-vs-Rest ROC AUC score:
0.83
16




Layer 16 classification score: 0.5794223826714802
Micro-averaged One-vs-Rest ROC AUC score:
0.82
17




Layer 17 classification score: 0.592057761732852
Micro-averaged One-vs-Rest ROC AUC score:
0.83
18




Layer 18 classification score: 0.6227436823104693
Micro-averaged One-vs-Rest ROC AUC score:
0.85
19




Layer 19 classification score: 0.6407942238267148
Micro-averaged One-vs-Rest ROC AUC score:
0.86
20




Layer 20 classification score: 0.6389891696750902
Micro-averaged One-vs-Rest ROC AUC score:
0.87
21




Layer 21 classification score: 0.6552346570397112
Micro-averaged One-vs-Rest ROC AUC score:
0.88
22


  fig, ax = plt.subplots(figsize=(6, 6))


Layer 22 classification score: 0.6570397111913358
Micro-averaged One-vs-Rest ROC AUC score:
0.89
23




Layer 23 classification score: 0.6407942238267148
Micro-averaged One-vs-Rest ROC AUC score:
0.88
24




Layer 24 classification score: 0.6570397111913358
Micro-averaged One-vs-Rest ROC AUC score:
0.89
25
Error in layer 25: list index out of range
26
Error in layer 26: list index out of range
27
Error in layer 27: list index out of range
28
Error in layer 28: list index out of range
29
Error in layer 29: list index out of range
30
Error in layer 30: list index out of range
31
Error in layer 31: list index out of range
32
Error in layer 32: list index out of range


<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>

<Figure size 600x600 with 0 Axes>