In [None]:
import torch
import numpy as np
import pandas as pd
from datasets import load_dataset, Dataset
from tqdm.auto import tqdm
import os
import pickle
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from transformers import (
    DistilBertModel,
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification
)

print(" Step 1: Data Preparation for Probing Fine Tuned Model ")

try:
    print("Loading MRC Psycholinguistic Database...")
    mrc_dataset = load_dataset("StephanAkkerman/MRC-psycholinguistic-database")['train']

    # extracting features 
    words = mrc_dataset['Word']
    img = np.array(mrc_dataset['Imageability'], dtype=float)
    conc = np.array(mrc_dataset['Concreteness'], dtype=float)
    nsyl = np.array(mrc_dataset['Number of Syllables'], dtype=float)

    valid = ~np.isnan(img) & ~np.isnan(conc) & ~np.isnan(nsyl)
    words = [words[i] for i in range(len(words)) if valid[i]]
    img = img[valid]
    conc = conc[valid]
    nsyl = nsyl[valid]
    
    # limiting  dataset to subset for faster processing
    SUBSET_SIZE = 5000  
    if len(words) > SUBSET_SIZE:
        indices = np.arange(len(words))
        np.random.seed(42)
        np.random.shuffle(indices)
        words = [words[i] for i in indices[:SUBSET_SIZE]]
        img = img[indices[:SUBSET_SIZE]]
        conc = conc[indices[:SUBSET_SIZE]]
        nsyl = nsyl[indices[:SUBSET_SIZE]]

    print(f"Total words in probing subset: {len(words)}")
    
    # creating a dataset object
    probing_dataset = Dataset.from_dict({
        'words': words,
        'img': img.tolist(),
        'conc': conc.tolist(),
        'nsyl': nsyl.tolist()
    })

except Exception as e:
    print(f"Error preparing dataset: {e}")
    exit()


  from .autonotebook import tqdm as notebook_tqdm


 Step 1: Data Preparation for Probing Fine Tuned Model 
Loading MRC Psycholinguistic Database...
Total words in probing subset: 5000


In [None]:
print("\n Step 2: Probing Setup ")
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# path to fine tuned models
MODEL_DIR_FINE_TUNED = "/home/sharmajidotdev/manish/models/readability"
PROBE_RESULTS_DIR = "/home/sharmajidotdev/manish/probing_results_mrc"
os.makedirs(PROBE_RESULTS_DIR, exist_ok=True)

def get_single_word_embedding_per_layer(word, model, tokenizer, device):
    try:
        base_model = model.distilbert
        base_model.eval()

        encoded = tokenizer(word, return_tensors='pt').to(device)
        with torch.no_grad():
            output = base_model(**encoded, output_hidden_states=True)
        hidden_states = output.hidden_states
        return [layer[0, 0, :].cpu().numpy() for layer in hidden_states]
    except Exception as e:
        return None

def get_embeddings_for_set(word_list, label_list, desc):
    all_embeddings = [[] for _ in range(7)]
    all_labels = []
    for i in tqdm(range(len(word_list)), desc=f"{desc} Embeddings"):
        pass



 Step 2: Probing Setup 
Using device: cuda


In [None]:
print("\n Step 3: Probing Fine Tuned Models ---")

probe_results_img = {f"layer_{i}": {'mae': [], 'r2': []} for i in range(7)}
probe_results_conc = {f"layer_{i}": {'mae': [], 'r2': []} for i in range(7)}
probe_results_nsyl = {f"layer_{i}": {'mae': [], 'r2': []} for i in range(7)}

N_SPLITS = 5
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

for fold_idx in range(N_SPLITS):
    print(f"\n--- Probing Fine-Tuned Model from Fold {fold_idx + 1} ---")

   
    ft_model_fkgl = DistilBertForSequenceClassification.from_pretrained(os.path.join(MODEL_DIR_FINE_TUNED, "Fkgl", f"best_fold_{fold_idx}"))
    ft_model_fkgl.to(device)
    ft_model_fre = DistilBertForSequenceClassification.from_pretrained(os.path.join(MODEL_DIR_FINE_TUNED, "Fre", f"best_fold_{fold_idx}"))
    ft_model_fre.to(device)
    
    # probing for imageability
    print("\nProbing for Imageability (IMG) on FKGL-tuned model...")
    all_embs_img = [[] for _ in range(7)]
    all_labels_img = []
    for i in tqdm(range(len(probing_dataset['words'])), desc="IMG Probing"):
        embs = get_single_word_embedding_per_layer(probing_dataset['words'][i], ft_model_fkgl, tokenizer, device)
        if embs:
            for l_idx, emb in enumerate(embs):
                all_embs_img[l_idx].append(emb)
            all_labels_img.append(probing_dataset['img'][i])

    concatenated_embs_img = [np.vstack(embs) for embs in all_embs_img]
    y_img = np.array(all_labels_img)
    for l_idx in range(7):
        X_train, X_val, y_train, y_val = train_test_split(concatenated_embs_img[l_idx], y_img, test_size=0.2, random_state=42)
        probe = LinearRegression(n_jobs=-1).fit(X_train, y_train)
        preds = probe.predict(X_val)
        probe_results_img[f"layer_{l_idx}"]['mae'].append(mean_absolute_error(y_val, preds))
        probe_results_img[f"layer_{l_idx}"]['r2'].append(r2_score(y_val, preds))

    # probing for concreteness
    print("\nProbing for Concreteness (CONC) on FKGL-tuned model...")
    all_embs_conc = [[] for _ in range(7)]
    all_labels_conc = []
    for i in tqdm(range(len(probing_dataset['words'])), desc="CONC Probing"):
        embs = get_single_word_embedding_per_layer(probing_dataset['words'][i], ft_model_fkgl, tokenizer, device)
        if embs:
            for l_idx, emb in enumerate(embs):
                all_embs_conc[l_idx].append(emb)
            all_labels_conc.append(probing_dataset['conc'][i])

    concatenated_embs_conc = [np.vstack(embs) for embs in all_embs_conc]
    y_conc = np.array(all_labels_conc)
    for l_idx in range(7):
        X_train, X_val, y_train, y_val = train_test_split(concatenated_embs_conc[l_idx], y_conc, test_size=0.2, random_state=42)
        probe = LinearRegression(n_jobs=-1).fit(X_train, y_train)
        preds = probe.predict(X_val)
        probe_results_conc[f"layer_{l_idx}"]['mae'].append(mean_absolute_error(y_val, preds))
        probe_results_conc[f"layer_{l_idx}"]['r2'].append(r2_score(y_val, preds))

    # extra step
    # probing for number of syllables
    print("\nProbing for Number of Syllables (nSyl) on FRE-tuned model...")
    all_embs_nsyl = [[] for _ in range(7)]
    all_labels_nsyl = []
    for i in tqdm(range(len(probing_dataset['words'])), desc="NSYL Probing"):
        embs = get_single_word_embedding_per_layer(probing_dataset['words'][i], ft_model_fre, tokenizer, device)
        if embs:
            for l_idx, emb in enumerate(embs):
                all_embs_nsyl[l_idx].append(emb)
            all_labels_nsyl.append(probing_dataset['nsyl'][i])

    concatenated_embs_nsyl = [np.vstack(embs) for embs in all_embs_nsyl]
    y_nsyl = np.array(all_labels_nsyl)
    for l_idx in range(7):
        X_train, X_val, y_train, y_val = train_test_split(concatenated_embs_nsyl[l_idx], y_nsyl, test_size=0.2, random_state=42)
        probe = LinearRegression(n_jobs=-1).fit(X_train, y_train)
        preds = probe.predict(X_val)
        probe_results_nsyl[f"layer_{l_idx}"]['mae'].append(mean_absolute_error(y_val, preds))
        probe_results_nsyl[f"layer_{l_idx}"]['r2'].append(r2_score(y_val, preds))
        
    # saving probing results for this fold
    current_fold_results = {
        'img': probe_results_img,
        'conc': probe_results_conc,
        'nsyl': probe_results_nsyl
    }
    results_file_path = os.path.join(PROBE_RESULTS_DIR, f"fold_{fold_idx}_probing_results.pkl")
    with open(results_file_path, 'wb') as f:
        pickle.dump(current_fold_results, f)
    print(f"Saved probing results for Fold {fold_idx+1} to {results_file_path}")



 Step 3: Probing Fine Tuned Models ---

--- Probing Fine-Tuned Model from Fold 1 ---

Probing for Imageability (IMG) on FKGL-tuned model...


IMG Probing: 100%|██████████| 5000/5000 [00:26<00:00, 185.43it/s]



Probing for Concreteness (CONC) on FKGL-tuned model...


CONC Probing: 100%|██████████| 5000/5000 [00:26<00:00, 192.00it/s]



Probing for Number of Syllables (nSyl) on FRE-tuned model...


NSYL Probing: 100%|██████████| 5000/5000 [00:26<00:00, 192.05it/s]


Saved probing results for Fold 1 to //home/sharmajidotdev/manish/probing_results_mrc/fold_0_probing_results.pkl

--- Probing Fine-Tuned Model from Fold 2 ---

Probing for Imageability (IMG) on FKGL-tuned model...


IMG Probing: 100%|██████████| 5000/5000 [00:26<00:00, 188.26it/s]



Probing for Concreteness (CONC) on FKGL-tuned model...


CONC Probing: 100%|██████████| 5000/5000 [00:25<00:00, 194.74it/s]



Probing for Number of Syllables (nSyl) on FRE-tuned model...


NSYL Probing: 100%|██████████| 5000/5000 [00:25<00:00, 195.55it/s]


Saved probing results for Fold 2 to //home/sharmajidotdev/manish/probing_results_mrc/fold_1_probing_results.pkl

--- Probing Fine-Tuned Model from Fold 3 ---

Probing for Imageability (IMG) on FKGL-tuned model...


IMG Probing: 100%|██████████| 5000/5000 [00:25<00:00, 199.49it/s]



Probing for Concreteness (CONC) on FKGL-tuned model...


CONC Probing: 100%|██████████| 5000/5000 [00:25<00:00, 197.28it/s]



Probing for Number of Syllables (nSyl) on FRE-tuned model...


NSYL Probing: 100%|██████████| 5000/5000 [00:25<00:00, 198.31it/s]


Saved probing results for Fold 3 to //home/sharmajidotdev/manish/probing_results_mrc/fold_2_probing_results.pkl

--- Probing Fine-Tuned Model from Fold 4 ---

Probing for Imageability (IMG) on FKGL-tuned model...


IMG Probing: 100%|██████████| 5000/5000 [00:25<00:00, 198.13it/s]



Probing for Concreteness (CONC) on FKGL-tuned model...


CONC Probing: 100%|██████████| 5000/5000 [00:26<00:00, 189.96it/s]



Probing for Number of Syllables (nSyl) on FRE-tuned model...


NSYL Probing: 100%|██████████| 5000/5000 [00:25<00:00, 199.83it/s]


Saved probing results for Fold 4 to //home/sharmajidotdev/manish/probing_results_mrc/fold_3_probing_results.pkl

--- Probing Fine-Tuned Model from Fold 5 ---

Probing for Imageability (IMG) on FKGL-tuned model...


IMG Probing: 100%|██████████| 5000/5000 [00:25<00:00, 198.20it/s]



Probing for Concreteness (CONC) on FKGL-tuned model...


CONC Probing: 100%|██████████| 5000/5000 [00:25<00:00, 198.23it/s]



Probing for Number of Syllables (nSyl) on FRE-tuned model...


NSYL Probing: 100%|██████████| 5000/5000 [00:25<00:00, 199.98it/s]


Saved probing results for Fold 5 to //home/sharmajidotdev/manish/probing_results_mrc/fold_4_probing_results.pkl


In [6]:
import pickle
import os


file_path = os.path.join("probing_results_mrc/fold_0_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    # printing contents of dictionary
    print("\n Successfully loaded results from pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")


 Successfully loaded results from pickle file.
{'img': {'layer_0': {'mae': [57.68554726028442], 'r2': [-0.0018860426594220492]}, 'layer_1': {'mae': [74.18200390625], 'r2': [0.1552451113455685]}, 'layer_2': {'mae': [69.49881713867188], 'r2': [0.2351678415354158]}, 'layer_3': {'mae': [63.87974560546875], 'r2': [0.31318013776715203]}, 'layer_4': {'mae': [63.10501547241211], 'r2': [0.320938944600652]}, 'layer_5': {'mae': [65.35499395751953], 'r2': [0.28954012101569915]}, 'layer_6': {'mae': [65.99402333068848], 'r2': [0.2674454719934056]}}, 'conc': {'layer_0': {'mae': [49.7856877746582], 'r2': [-0.0016943585725441856]}, 'layer_1': {'mae': [68.77723388671875], 'r2': [0.10103681510127627]}, 'layer_2': {'mae': [64.50417651367188], 'r2': [0.17565636306282628]}, 'layer_3': {'mae': [59.574143432617184], 'r2': [0.26162629065377074]}, 'layer_4': {'mae': [58.585062652587894], 'r2': [0.2750231572140036]}, 'layer_5': {'mae': [60.961970581054686], 'r2': [0.22989584855368095]}, 'layer_6': {'mae': [61.1

In [7]:
import pickle
import os


file_path = os.path.join("probing_results_mrc/fold_1_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    # printing contents of dictionary
    print("\n Successfully loaded results from pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")


 Successfully loaded results from pickle file.
{'img': {'layer_0': {'mae': [57.68554726028442, 57.68547592544556], 'r2': [-0.0018860426594220492, -0.0018861026978154172]}, 'layer_1': {'mae': [74.18200390625, 74.2060703125], 'r2': [0.1552451113455685, 0.1549061114151037]}, 'layer_2': {'mae': [69.49881713867188, 69.75066194152832], 'r2': [0.2351678415354158, 0.2401214027187688]}, 'layer_3': {'mae': [63.87974560546875, 63.90389703369141], 'r2': [0.31318013776715203, 0.32284884427422966]}, 'layer_4': {'mae': [63.10501547241211, 62.762173881530764], 'r2': [0.320938944600652, 0.3487336124458039]}, 'layer_5': {'mae': [65.35499395751953, 67.21907653808594], 'r2': [0.28954012101569915, 0.26327397881747094]}, 'layer_6': {'mae': [65.99402333068848, 67.22021569824219], 'r2': [0.2674454719934056, 0.2638259858205416]}}, 'conc': {'layer_0': {'mae': [49.7856877746582, 49.785639873504635], 'r2': [-0.0016943585725441856, -0.0016943994983706911]}, 'layer_1': {'mae': [68.77723388671875, 68.49631005859375

In [8]:
import pickle
import os


file_path = os.path.join("probing_results_mrc/fold_2_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    # printing contents of dictionary
    print("\n Successfully loaded results from pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")


 Successfully loaded results from pickle file.
{'img': {'layer_0': {'mae': [57.68554726028442, 57.68547592544556, 57.6855634727478], 'r2': [-0.0018860426594220492, -0.0018861026978154172, -0.0018860290144655512]}, 'layer_1': {'mae': [74.18200390625, 74.2060703125, 74.223466796875], 'r2': [0.1552451113455685, 0.1549061114151037, 0.15610773788642207]}, 'layer_2': {'mae': [69.49881713867188, 69.75066194152832, 69.33259375], 'r2': [0.2351678415354158, 0.2401214027187688, 0.2337276223191972]}, 'layer_3': {'mae': [63.87974560546875, 63.90389703369141, 63.97260827636719], 'r2': [0.31318013776715203, 0.32284884427422966, 0.32595952717555754]}, 'layer_4': {'mae': [63.10501547241211, 62.762173881530764, 63.309162658691406], 'r2': [0.320938944600652, 0.3487336124458039, 0.3487606959404186]}, 'layer_5': {'mae': [65.35499395751953, 67.21907653808594, 65.1996748046875], 'r2': [0.28954012101569915, 0.26327397881747094, 0.31024247696870866]}, 'layer_6': {'mae': [65.99402333068848, 67.22021569824219, 

In [9]:
import pickle
import os


file_path = os.path.join("probing_results_mrc/fold_3_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    # printing contents of dictionary
    print("\n Successfully loaded results from pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")


 Successfully loaded results from pickle file.
{'img': {'layer_0': {'mae': [57.68554726028442, 57.68547592544556, 57.6855634727478, 57.68550997161865], 'r2': [-0.0018860426594220492, -0.0018861026978154172, -0.0018860290144655512, -0.0018860740430082235]}, 'layer_1': {'mae': [74.18200390625, 74.2060703125, 74.223466796875, 74.446306640625], 'r2': [0.1552451113455685, 0.1549061114151037, 0.15610773788642207, 0.15532090364375462]}, 'layer_2': {'mae': [69.49881713867188, 69.75066194152832, 69.33259375, 69.43986096191406], 'r2': [0.2351678415354158, 0.2401214027187688, 0.2337276223191972, 0.2345976574478842]}, 'layer_3': {'mae': [63.87974560546875, 63.90389703369141, 63.97260827636719, 64.3610574951172], 'r2': [0.31318013776715203, 0.32284884427422966, 0.32595952717555754, 0.3183182963407478]}, 'layer_4': {'mae': [63.10501547241211, 62.762173881530764, 63.309162658691406, 63.267041595458984], 'r2': [0.320938944600652, 0.3487336124458039, 0.3487606959404186, 0.34793138689444436]}, 'layer_5

In [10]:
import pickle
import os


file_path = os.path.join("probing_results_mrc/fold_4_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    # printing contents of dictionary
    print("\n Successfully loaded results from pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")


 Successfully loaded results from pickle file.
{'img': {'layer_0': {'mae': [57.68554726028442, 57.68547592544556, 57.6855634727478, 57.68550997161865, 57.685471061706544], 'r2': [-0.0018860426594220492, -0.0018861026978154172, -0.0018860290144655512, -0.0018860740430082235, -0.001886106791376907]}, 'layer_1': {'mae': [74.18200390625, 74.2060703125, 74.223466796875, 74.446306640625, 74.4329287109375], 'r2': [0.1552451113455685, 0.1549061114151037, 0.15610773788642207, 0.15532090364375462, 0.15597929881050865]}, 'layer_2': {'mae': [69.49881713867188, 69.75066194152832, 69.33259375, 69.43986096191406, 69.30650720214844], 'r2': [0.2351678415354158, 0.2401214027187688, 0.2337276223191972, 0.2345976574478842, 0.23479133568307764]}, 'layer_3': {'mae': [63.87974560546875, 63.90389703369141, 63.97260827636719, 64.3610574951172, 64.33193334960937], 'r2': [0.31318013776715203, 0.32284884427422966, 0.32595952717555754, 0.3183182963407478, 0.32081942250822604]}, 'layer_4': {'mae': [63.105015472412

In [4]:
print("\n Step 4: Final Summary ")
avg_probe_mae_img = [np.mean(probe_results_img[f"layer_{i}"]['mae']) for i in range(7)]
avg_probe_r2_img = [np.mean(probe_results_img[f"layer_{i}"]['r2']) for i in range(7)]
avg_probe_mae_conc = [np.mean(probe_results_conc[f"layer_{i}"]['mae']) for i in range(7)]
avg_probe_r2_conc = [np.mean(probe_results_conc[f"layer_{i}"]['r2']) for i in range(7)]
avg_probe_mae_nsyl = [np.mean(probe_results_nsyl[f"layer_{i}"]['mae']) for i in range(7)]
avg_probe_r2_nsyl = [np.mean(probe_results_nsyl[f"layer_{i}"]['r2']) for i in range(7)]

print("Final Probing Results on Fine-Tuned Models (Average across 5 folds)")
print("------------------------------------------------------------------")
print("Imageability Probe MAE:", avg_probe_mae_img)
print("Imageability Probe R2 Score:", avg_probe_r2_img)
print("\nConcreteness Probe MAE:", avg_probe_mae_conc)
print("Concreteness Probe R2 Score:", avg_probe_r2_conc)
print("\nNumber of Syllables Probe MAE:", avg_probe_mae_nsyl)
print("Number of Syllables Probe R2 Score:", avg_probe_r2_nsyl)


 Step 4: Final Summary 
Final Probing Results on Fine-Tuned Models (Average across 5 folds)
------------------------------------------------------------------
Imageability Probe MAE: [np.float64(57.68551353836059), np.float64(74.2981552734375), np.float64(69.46568819885255), np.float64(64.08984835205078), np.float64(63.011207717895516), np.float64(65.79935848388672), np.float64(67.12717821350097)]
Imageability Probe R2 Score: [np.float64(-0.0018860710412176296), np.float64(0.15551183262027152), np.float64(0.23568117194086874), np.float64(0.3202252456131826), np.float64(0.3441544032192376), np.float64(0.29678534986399957), np.float64(0.2637191228624701)]

Concreteness Probe MAE: [np.float64(49.78567456054687), np.float64(68.76251777343751), np.float64(64.57841984863282), np.float64(59.7552654296875), np.float64(58.74308222961427), np.float64(60.122992564392085), np.float64(62.1957493347168)]
Concreteness Probe R2 Score: [np.float64(-0.001694369862513767), np.float64(0.10273677165256075