In [11]:
import pandas as pd
import numpy as np
import os
import pickle
import textstat
import conllu
import requests
import torch
from datasets import Dataset
from tqdm.auto import tqdm
from sklearn.model_selection import KFold
from sklearn.metrics import mean_absolute_error, r2_score
from sklearn.linear_model import LinearRegression
from transformers import (
    DistilBertTokenizerFast,
    DistilBertForSequenceClassification,
    DistilBertModel
)
from sklearn.model_selection import train_test_split

print("Step 1: Data Preparation for Probing ")
file_path = "en_ewt-ud-train.conllu"

try:
    print(f"Loading data from local file: {file_path}...")
    with open(file_path, "r", encoding="utf-8") as f:
        conllu_text = f.read()
    
    sentences = []
    fkgl_scores = []
    fre_scores = []
    
    for line in conllu_text.splitlines():
        if line.startswith("# text = "):
            sentences.append(line.replace("# text = ", "").strip())
    
    print(f"Total sentences extracted: {len(sentences)}")

    SUBSET_SIZE = 2000
    if len(sentences) > SUBSET_SIZE:
        # shuffling indices and selecting subset
        indices = np.arange(len(sentences))
        np.random.seed(42)
        np.random.shuffle(indices)
        sentences = [sentences[i] for i in indices[:SUBSET_SIZE]]
    
    for sent in tqdm(sentences, desc="Processing sentences"):
        if sent and sent.strip():
            fkgl = textstat.flesch_kincaid_grade(sent)
            fre = textstat.flesch_reading_ease(sent)
        else:
            fkgl = 0.0
            fre = 0.0
        fkgl_scores.append(fkgl)
        fre_scores.append(fre)
        
    print(f"\nExample sentence: {sentences[0]}")
    print(f"FKGL: {fkgl_scores[0]}")
    print(f"FRE: {fre_scores[0]}")

    probing_dataset = Dataset.from_dict({
        'text': sentences,
        'fkgl_score': fkgl_scores,
        'fre_score': fre_scores
    })

except Exception as e:
    print(f"Error preparing dataset: {e}")
    exit()


Step 1: Data Preparation for Probing 
Loading data from local file: en_ewt-ud-train.conllu...
Total sentences extracted: 12544


Processing sentences: 100%|██████████| 2000/2000 [00:00<00:00, 10182.75it/s]


Example sentence: How was the play?
FKGL: -2.2299999999999986
FRE: 118.17500000000001





In [12]:
print("\n Step 2: Probing Setup")
model_name = 'distilbert-base-uncased'
tokenizer = DistilBertTokenizerFast.from_pretrained(model_name)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

MODEL_DIR_FINE_TUNED = "/media/manisha/DATA/dissertation_project/models/readability"
PROBE_RESULTS_DIR = "/media/manisha/DATA/dissertation_project/probing_readability_results"
os.makedirs(PROBE_RESULTS_DIR, exist_ok=True)


def get_sentence_embedding(sentence, model):
    encoded = tokenizer(
        sentence,
        return_tensors="pt",
        truncation=True,
        max_length=256,
        padding="max_length"
    )
    input_ids = encoded["input_ids"].to(device)
    attention_mask = encoded["attention_mask"].to(device)
    
    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask, output_hidden_states=True)
        hidden_states = outputs.hidden_states
    
    all_layer_embeddings = []
    for layer in hidden_states:
        token_embeddings = layer[0]  # remove batch dim
        sentence_embedding = token_embeddings.mean(dim=0).cpu().numpy()
        all_layer_embeddings.append(sentence_embedding)
    return all_layer_embeddings




 Step 2: Probing Setup
Using device: cpu


In [None]:
print("\n Step 3: Probing Fine-Tuned Models ")

probe_results_fkgl = {'mae': [], 'r2': []}
probe_results_fre = {'mae': [], 'r2': []}

N_SPLITS = 5
kf = KFold(n_splits=N_SPLITS, shuffle=True, random_state=42)

for fold_idx in range(N_SPLITS):
    print(f"\n Probing Fine-Tuned Model from Fold {fold_idx + 1} ")

    # loading fine-tuned FKGL and FRE models for this fold
    ft_model_fkgl = DistilBertForSequenceClassification.from_pretrained(os.path.join(MODEL_DIR_FINE_TUNED, "Fkgl", f"best_fold_{fold_idx}"))
    ft_model_fre = DistilBertForSequenceClassification.from_pretrained(os.path.join(MODEL_DIR_FINE_TUNED, "Fre", f"best_fold_{fold_idx}"))

    base_fkgl_model = ft_model_fkgl.distilbert
    base_fkgl_model.eval()
    base_fre_model = ft_model_fre.distilbert
    base_fre_model.eval()

    # probing for FKGL on FKGL-tuned model 
    print("\nExtracting embeddings from FKGL-tuned model and training probes for FKGL...")
    all_embs_fkgl_ft = [[] for _ in range(7)]
    all_labels_fkgl_ft = []
    for sent_idx in tqdm(range(len(probing_dataset)), desc="FKGL Probing"):
        sentence_embs = get_sentence_embedding(probing_dataset['text'][sent_idx], base_fkgl_model)
        for layer_idx, emb in enumerate(sentence_embs):
            all_embs_fkgl_ft[layer_idx].append(emb)
        all_labels_fkgl_ft.append(probing_dataset['fkgl_score'][sent_idx])

    concatenated_embs_fkgl_ft = [np.vstack(embs) for embs in all_embs_fkgl_ft]
    y_fkgl_ft = np.array(all_labels_fkgl_ft)

    for l_idx in range(7):
        X = concatenated_embs_fkgl_ft[l_idx]
        y = y_fkgl_ft
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        probe = LinearRegression(n_jobs=-1).fit(X_train, y_train)
        y_pred = probe.predict(X_val)
        probe_results_fkgl['mae'].append(mean_absolute_error(y_val, y_pred))
        probe_results_fkgl['r2'].append(r2_score(y_val, y_pred))

    # probing for FRE on FRE-tuned model 
    print("\nExtracting embeddings from FRE-tuned model and training probes for FRE...")
    all_embs_fre_ft = [[] for _ in range(7)]
    all_labels_fre_ft = []
    for sent_idx in tqdm(range(len(probing_dataset)), desc="FRE Probing"):
        sentence_embs = get_sentence_embedding(probing_dataset['text'][sent_idx], base_fre_model)
        for layer_idx, emb in enumerate(sentence_embs):
            all_embs_fre_ft[layer_idx].append(emb)
        all_labels_fre_ft.append(probing_dataset['fre_score'][sent_idx])

    concatenated_embs_fre_ft = [np.vstack(embs) for embs in all_embs_fre_ft]
    y_fre_ft = np.array(all_labels_fre_ft)

    for l_idx in range(7):
        X = concatenated_embs_fre_ft[l_idx]
        y = y_fre_ft
        X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
        probe = LinearRegression(n_jobs=-1).fit(X_train, y_train)
        y_pred = probe.predict(X_val)
        probe_results_fre['mae'].append(mean_absolute_error(y_val, y_pred))
        probe_results_fre['r2'].append(r2_score(y_val, y_pred))

    # saving probing results for this fold
    current_fold_results = {
        'fkgl': {'mae': [probe_results_fkgl['mae'][-1]], 'r2': [probe_results_fkgl['r2'][-1]]},
        'fre': {'mae': [probe_results_fre['mae'][-1]], 'r2': [probe_results_fre['r2'][-1]]}
    }
    results_file_path = os.path.join(PROBE_RESULTS_DIR, f"fold_{fold_idx}_probing_results.pkl")
    with open(results_file_path, 'wb') as f:
        pickle.dump(current_fold_results, f)
    print(f"Saved probing results for Fold {fold_idx+1} to {results_file_path}")




 Step 3: Probing Fine-Tuned Models 

 Probing Fine-Tuned Model from Fold 1 

Extracting embeddings from FKGL-tuned model and training probes for FKGL...


FKGL Probing: 100%|██████████| 2000/2000 [12:37<00:00,  2.64it/s]



Extracting embeddings from FRE-tuned model and training probes for FRE...


FRE Probing: 100%|██████████| 2000/2000 [13:24<00:00,  2.49it/s]


Saved probing results for Fold 1 to /media/manisha/DATA/dissertation_project/probing_readability_results/fold_0_probing_results.pkl

 Probing Fine-Tuned Model from Fold 2 

Extracting embeddings from FKGL-tuned model and training probes for FKGL...


FKGL Probing: 100%|██████████| 2000/2000 [13:39<00:00,  2.44it/s]



Extracting embeddings from FRE-tuned model and training probes for FRE...


FRE Probing: 100%|██████████| 2000/2000 [13:31<00:00,  2.46it/s]


Saved probing results for Fold 2 to /media/manisha/DATA/dissertation_project/probing_readability_results/fold_1_probing_results.pkl

 Probing Fine-Tuned Model from Fold 3 

Extracting embeddings from FKGL-tuned model and training probes for FKGL...


FKGL Probing: 100%|██████████| 2000/2000 [13:06<00:00,  2.54it/s]



Extracting embeddings from FRE-tuned model and training probes for FRE...


FRE Probing: 100%|██████████| 2000/2000 [13:09<00:00,  2.53it/s]


Saved probing results for Fold 3 to /media/manisha/DATA/dissertation_project/probing_readability_results/fold_2_probing_results.pkl

 Probing Fine-Tuned Model from Fold 4 

Extracting embeddings from FKGL-tuned model and training probes for FKGL...


FKGL Probing: 100%|██████████| 2000/2000 [13:04<00:00,  2.55it/s]



Extracting embeddings from FRE-tuned model and training probes for FRE...


FRE Probing: 100%|██████████| 2000/2000 [14:02<00:00,  2.37it/s]


Saved probing results for Fold 4 to /media/manisha/DATA/dissertation_project/probing_readability_results/fold_3_probing_results.pkl

 Probing Fine-Tuned Model from Fold 5 

Extracting embeddings from FKGL-tuned model and training probes for FKGL...


FKGL Probing: 100%|██████████| 2000/2000 [14:52<00:00,  2.24it/s]



Extracting embeddings from FRE-tuned model and training probes for FRE...


FRE Probing: 100%|██████████| 2000/2000 [14:59<00:00,  2.22it/s]


Saved probing results for Fold 5 to /media/manisha/DATA/dissertation_project/probing_readability_results/fold_4_probing_results.pkl


In [None]:
import pickle
import os

file_path = os.path.join("probing_readability_results/fold_0_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    print("Successfully loaded results from the pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")

Successfully loaded results from the pickle file.
{'fkgl': {'mae': [4.9219338785962465], 'r2': [0.5057721681406596]}, 'fre': {'mae': [35.00506759337448], 'r2': [0.45969566579847876]}}


In [None]:
import pickle
import os

file_path = os.path.join("probing_readability_results/fold_1_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    print("Successfully loaded results from the pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")

Successfully loaded results from the pickle file.
{'fkgl': {'mae': [4.966901423163486], 'r2': [0.5166838429786087]}, 'fre': {'mae': [35.946583663366155], 'r2': [0.4462363840378545]}}


In [None]:
import pickle
import os

file_path = os.path.join("probing_readability_results/fold_2_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    print("Successfully loaded results from the pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")

Successfully loaded results from the pickle file.
{'fkgl': {'mae': [4.986419403011531], 'r2': [0.4864674028827063]}, 'fre': {'mae': [35.78134271946314], 'r2': [0.4544334249441604]}}


In [None]:
import pickle
import os

file_path = os.path.join("probing_readability_results/fold_3_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    print("Successfully loaded results from the pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")

Successfully loaded results from the pickle file.
{'fkgl': {'mae': [4.868628751805681], 'r2': [0.5061155317752646]}, 'fre': {'mae': [35.45475915371132], 'r2': [0.4523035083888456]}}


In [None]:
import pickle
import os

file_path = os.path.join("probing_readability_results/fold_4_probing_results.pkl") 

try:
    with open(file_path, 'rb') as f:
        loaded_results = pickle.load(f)

    print("Successfully loaded results from the pickle file.")
    print(loaded_results)

except FileNotFoundError:
    print(f"Error: The file '{file_path}' was not found.")
except Exception as e:
    print(f"An error occurred while loading the pickle file: {e}")

Successfully loaded results from the pickle file.
{'fkgl': {'mae': [4.851790768455783], 'r2': [0.5235318279700298]}, 'fre': {'mae': [35.04876806737689], 'r2': [0.4904187483331319]}}


In [19]:
print("\n Step 4: Final Summary ")

avg_probe_mae_fkgl_ft = [np.mean([probe_results_fkgl['mae'][fold_idx * 7 + l_idx] for fold_idx in range(5)]) for l_idx in range(7)]
avg_probe_r2_fkgl_ft = [np.mean([probe_results_fkgl['r2'][fold_idx * 7 + l_idx] for fold_idx in range(5)]) for l_idx in range(7)]
avg_probe_mae_fre_ft = [np.mean([probe_results_fre['mae'][fold_idx * 7 + l_idx] for fold_idx in range(5)]) for l_idx in range(7)]
avg_probe_r2_fre_ft = [np.mean([probe_results_fre['r2'][fold_idx * 7 + l_idx] for fold_idx in range(5)]) for l_idx in range(7)]

print("Final Probing Results on Fine-Tuned Models (Average across 5 folds)")
print("------------------------------------------------------------------")
print("FKGL Probe MAE:", avg_probe_mae_fkgl_ft)
print("FKGL Probe R2 Score:", avg_probe_r2_fkgl_ft)
print("\nFRE Probe MAE:", avg_probe_mae_fre_ft)
print("FRE Probe R2 Score:", avg_probe_r2_fre_ft)


 Step 4: Final Summary 
Final Probing Results on Fine-Tuned Models (Average across 5 folds)
------------------------------------------------------------------
FKGL Probe MAE: [np.float64(5.555387296536177), np.float64(4.700048560150932), np.float64(4.900910309580529), np.float64(4.7309735988843205), np.float64(4.685972081031418), np.float64(4.7612972925276065), np.float64(4.919134845006545)]
FKGL Probe R2 Score: [np.float64(0.34255324253564673), np.float64(0.46941276145043087), np.float64(0.4834561656992068), np.float64(0.511117092371004), np.float64(0.5823825657010069), np.float64(0.5339884442720914), np.float64(0.5077141547494538)]

FRE Probe MAE: [np.float64(39.39733959275429), np.float64(34.30090285071914), np.float64(34.624123772559145), np.float64(33.71717724920616), np.float64(34.083621528788335), np.float64(33.56014000628933), np.float64(35.447304239458404)]
FRE Probe R2 Score: [np.float64(0.29676646393810946), np.float64(0.4238114001969482), np.float64(0.45071553410782983), n