## Import libraries

In [None]:
import os
import json

import numpy as np
from sklearn.decomposition import PCA

from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [None]:
def read_memmap(filepath):
    with open(filepath.replace(".dat", ".conf"), "r") as fin_config:
        memmap_configs = json.load(fin_config)
        return np.memmap(filepath, mode="r", shape=tuple(memmap_configs["shape"]), dtype=memmap_configs["dtype"])

In [None]:
result_dir = "../../../results"

## Linear probing

In [None]:
pca_dim = 4

for dataset_name in ["100TFQA", "CommonsenseQA", "QASC", "GSM8K"]:
    for model_name in ["Llama-3.1-8B-Instruct", "Phi-3.5-mini-instruct"]:
        for prompting_strategy in ["zero-shot", "zero-shot-cot", "few-shot", "few-shot-cot"]:
            output_dir = f"{result_dir}/{dataset_name}/{model_name}"
            layer_wise_path = os.path.join(output_dir, f"{prompting_strategy}_layer_wise_hidden_states.dat")
            head_wise_path = os.path.join(output_dir, f"{prompting_strategy}_head_wise_hidden_states.dat")

            input_dir = "../../../preprocessed_datasets"
            input_path = os.path.join(input_dir, f"{dataset_name}_test_4_shot.jsonl")

            try:
                layer_wise_hidden_states = read_memmap(layer_wise_path)
            except:
                continue

            num_samples, num_formats, num_layers, hidden_size = layer_wise_hidden_states.shape
            for layer_idx in range(num_layers):
                if layer_idx not in [31]:
                    continue
                # Step 1: Prepare input
                X = layer_wise_hidden_states[:,:,layer_idx,:].reshape(-1, hidden_size)
                Y = np.tile(np.arange(num_formats), num_samples)

                # Step 2: PCA Projection
                pca = PCA(n_components=pca_dim)
                X_pca = pca.fit_transform(X)
                # print(np.sum(pca.explained_variance_ratio_))

                # Step 2: Train-test split (2-fold)
                X_fold1, X_fold2, y_fold1, y_fold2 = train_test_split(X_pca, Y, test_size=0.5, random_state=0)

                layer_accuracy_scores = []
                for i in range(2):
                    # Step 3: Standardization
                    scaler = StandardScaler()
                    if i == 0:
                        X_train = scaler.fit_transform(X_fold1)
                        X_test = scaler.transform(X_fold2)
                        y_train, y_test = y_fold1, y_fold2
                    else:
                        X_train = scaler.fit_transform(X_fold2)
                        X_test = scaler.transform(X_fold1)
                        y_train, y_test = y_fold2, y_fold1

                    # Step 4: Train a linear probing model
                    clf = LogisticRegression(solver="lbfgs", max_iter=1000)
                    clf.fit(X_train, y_train)

                    # Step 5: Evaluate the model
                    y_pred = clf.predict(X_test)
                    accuracy = accuracy_score(y_test, y_pred)
                    layer_accuracy_scores.append(accuracy)
                    # print(classification_report(y_test, y_pred))
                    # print(confusion_matrix(y_test, y_pred))
                print(f"Layer {layer_idx}: {np.mean(layer_accuracy_scores):.4f}")
                print(f"{dataset_name} / {model_name} / {prompting_strategy} (above)")
                print()