In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
from transformers import AutoTokenizer, TFAutoModel
import os
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, roc_curve, f1_score
from PIL import Image
from keras.callbacks import EarlyStopping

IMG_DIR = '../../..' 

# Path to CSV file
CSV_FILE_PATH = 'dyslexia_data_with_text_duplicates_removed.csv' 

# Image dimensions for ResNet50 input
IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_CHANNELS = 3 # ResNet50 expects 3 channels (RGB)

# Batch size for training
BATCH_SIZE = 16 # Small because this dataset is small - only 95 unique samples

# Number of epochs for training the classifier head
EPOCHS = 50

# Learning rate for the classifier head
LEARNING_RATE = 1e-4

seed = 11
# --- Load and Prepare Data ---
print("Loading data from CSV...")
try:
    df = pd.read_csv(CSV_FILE_PATH)
except FileNotFoundError:
    print(f"Error: CSV file not found at {CSV_FILE_PATH}. Please check the path.")
    exit()

if 'file_path' not in df.columns or 'text' not in df.columns or 'presence_of_dyslexia' not in df.columns:
    print("Error: CSV must contain 'file_path', 'text', and 'presence_of_dyslexia' columns.")
    exit()

df['text'] = df['text'].fillna('').astype(str) # Fill NaN with empty string, then convert to str

# Convert labels to integer type
df['presence_of_dyslexia'] = df['presence_of_dyslexia'].astype(int)

print(f"Total samples: {len(df)}")

# --- Image Preprocessing (Pre-loading all images) ---
print("Loading and preprocessing all images...")
all_images = []
for index, row in df.iterrows():
    image_path = row['file_path']
    full_image_path = os.path.join(IMG_DIR, image_path)
    try:
        img = tf.io.read_file(full_image_path)
        # Decode image 
        img = tf.image.decode_jpeg(img, channels=3) 
        # Resize image
        img = tf.image.resize(img, [IMG_HEIGHT, IMG_WIDTH])
        # Apply ResNet50 specific preprocessing
        img = tf.keras.applications.resnet50.preprocess_input(img)
        all_images.append(img)

    except Exception as e:
        print(f"Error loading image {full_image_path}: {e}. Appending black image placeholder.")
        all_images.append(np.zeros((IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), dtype=np.float32))

all_images = np.array(all_images)
print(f"All images loaded. Shape: {all_images.shape}")

# --- Text Tokenization (Pre-tokenizing all text) ---
print("Loading BERT tokenizer and tokenizing all text...")
tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")

all_input_ids = []
all_attention_masks = []

for text_entry in df['text']:
    encoded_input = tokenizer(
        text_entry,
        padding='max_length',
        truncation=True,
        max_length=128, # Consistent max length
        return_tensors='np' 
    )
    # Access the first (and only) item in the batch dimension [0]
    all_input_ids.append(encoded_input['input_ids'][0])
    all_attention_masks.append(encoded_input['attention_mask'][0])

all_input_ids = np.array(all_input_ids)
all_attention_masks = np.array(all_attention_masks)
print(f"All text tokenized. Input IDs shape: {all_input_ids.shape}, Attention Masks shape: {all_attention_masks.shape}")

# --- Split Preprocessed Data into Train, Validation, and Test Sets ---
print("Splitting data into training, validation, and test sets...")

X_images_train, X_images_val_test, \
X_input_ids_train, X_input_ids_val_test, \
X_attention_masks_train, X_attention_masks_val_test, \
y_train, y_val_test = train_test_split(
    all_images,
    all_input_ids,
    all_attention_masks,
    df['presence_of_dyslexia'].values,
    test_size=0.3,    # 30% for validation and test combined
    random_state=seed,
    stratify=df['presence_of_dyslexia'].values
)

# Then, split the temporary validation/test set equally.
# Validation and test sets each containing 15% of the original data
X_images_val, X_images_test, \
X_input_ids_val, X_input_ids_test, \
X_attention_masks_val, X_attention_masks_test, \
y_val, y_test = train_test_split(
    X_images_val_test,
    X_input_ids_val_test,
    X_attention_masks_val_test,
    y_val_test,
    test_size=0.5,  # 50% of the val_test set, which is 15% of the original data
    random_state=seed,
    stratify=y_val_test
)

print(f"Training samples: {len(y_train)}")
print(f"Validation samples: {len(y_val)}")
print(f"Test samples: {len(y_test)}")

# --- Create TensorFlow Datasets from NumPy arrays ---
def create_tf_dataset_from_np(images, input_ids, attention_masks, labels, augment=False):
    """
    Creates a TensorFlow Dataset from NumPy arrays.
    Applies augmentation as a TensorFlow operation if specified.
    """
    ds = tf.data.Dataset.from_tensor_slices(
        (
            {'image_input': images, 'input_ids': input_ids, 'attention_mask': attention_masks},
            labels
        )
    )

    if augment:
        def apply_augmentation(inputs, label):
            img = inputs['image_input']
            # Apply random augmentation for training images using TF ops
            img = tf.image.random_flip_left_right(img)
            img = tf.image.random_brightness(img, max_delta=0.2)
            img = tf.image.random_contrast(img, lower=0.8, upper=1.2)
            # More complex augmentations could be added but this isn't used currently
            
            inputs['image_input'] = img
            return inputs, label
        ds = ds.map(apply_augmentation, num_parallel_calls=tf.data.AUTOTUNE)

    ds = ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)
    return ds

train_dataset = create_tf_dataset_from_np(X_images_train, X_input_ids_train, X_attention_masks_train, y_train, augment=False)
val_dataset = create_tf_dataset_from_np(X_images_val, X_input_ids_val, X_attention_masks_val, y_val, augment=False)
test_dataset = create_tf_dataset_from_np(X_images_test, X_input_ids_test, X_attention_masks_test, y_test, augment=False)

print("TensorFlow Datasets created from preprocessed NumPy arrays.")

# --- Custom Keras Layer for BERT Model ---
class BertEmbeddingLayer(tf.keras.layers.Layer):
    def __init__(self, model_name, **kwargs):
        super().__init__(**kwargs)
        self.bert_model = TFAutoModel.from_pretrained(model_name) 
        self.bert_model.trainable = False

    def call(self, inputs):
        input_ids = inputs['input_ids']
        attention_mask = inputs['attention_mask']
        # The training=False argument is crucial to prevent issues with symbolic tensors
        bert_output = self.bert_model(input_ids, attention_mask=attention_mask, training=False)
        # The [CLS] token output is typically at index 0 of the last_hidden_state
        return bert_output.last_hidden_state[:, 0, :]

    def get_config(self):
        config = super().get_config()
        # Ensure the model_name here matches what was used in __init__
        config.update({"model_name": "bert-base-cased"}) 
        return config

# --- Define the Multi-Modal Model ---
print("Building multi-modal model architecture...")

# Image Branch (ResNet50)
resnet_input = keras.Input(shape=(IMG_HEIGHT, IMG_WIDTH, IMG_CHANNELS), name='image_input')
resnet_model = keras.applications.ResNet50(
    include_top=False,
    weights='imagenet',
    input_tensor=resnet_input
)

# Freeze ResNet50 layers
resnet_model.trainable = False
print("ResNet50 model loaded and layers frozen.")

# Extract features from ResNet50 output
image_features = resnet_model.output
image_features = layers.GlobalAveragePooling2D()(image_features)

# Text Branch (BERT) using the custom layer
bert_input_ids = keras.Input(shape=(128,), dtype=tf.int32, name='input_ids')
bert_attention_mask = keras.Input(shape=(128,), dtype=tf.int32, name='attention_mask')

bert_embedding_extractor = BertEmbeddingLayer("bert-base-cased", name="bert_feature_extractor")
text_features = bert_embedding_extractor({'input_ids': bert_input_ids, 'attention_mask': bert_attention_mask})
print("BERT model wrapped in custom layer and layers frozen.")

# Concatenate features from both modalities
concatenated_features = layers.Concatenate()([image_features, text_features])

# Classifier Head (New layers to be trained)
classifier_head = layers.Dense(256, activation='relu')(concatenated_features)
classifier_head = layers.Dropout(0.5)(classifier_head)
'''ALTERNATIVE is to add in extra layers to make the classifier head more complex, to try in future work
classifier_head = layers.Dropout(0.3)(classifier_head)
classifier_head = layers.Dense(128, activation='relu')(classifier_head)
classifier_head = layers.Dropout(0.3)(classifier_head)'''
output_layer = layers.Dense(1, activation='sigmoid', name='output')(classifier_head)

# Create the final multi-modal model
model = keras.Model(
    inputs={'image_input': resnet_input, 'input_ids': bert_input_ids, 'attention_mask': bert_attention_mask},
    outputs=output_layer
)

# --- Compile and Train the Model ---
print("Compiling model...")
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss=keras.losses.BinaryCrossentropy(),
    metrics=['accuracy']
)

model.summary()

# Define the EarlyStopping callback
early_stopping_callback = EarlyStopping(
    monitor='val_loss',
    patience=10,
    restore_best_weights=True,
    verbose=1
)

print("Training model...")
history = model.fit(
    train_dataset,
    epochs=EPOCHS,
    validation_data=val_dataset,
    callbacks=[early_stopping_callback]
)

# --- Evaluation on Validation Set ---
print("\nEvaluating model on validation set...")
val_loss, val_accuracy = model.evaluate(val_dataset)
print(f"Validation Loss: {val_loss:.4f}")
print(f"Validation Accuracy: {val_accuracy:.4f}")

# Generate predictions and classification report for validation set
all_preds_val = []
all_labels_val = []
for inputs, labels in val_dataset:
    predictions = model.predict(inputs)
    all_preds_val.extend(predictions.flatten().tolist())
    all_labels_val.extend(labels.numpy().flatten().tolist())

binary_preds_val = np.array(all_preds_val) > 0.5

print("\nValidation Classification Report:")
print(classification_report(all_labels_val, binary_preds_val))

print("\nValidation Confusion Matrix:")
print(confusion_matrix(all_labels_val, binary_preds_val))

# --- Evaluation on Test Set ---
print("\nEvaluating model on TEST set...")
test_loss, test_accuracy = model.evaluate(test_dataset)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")

# Generate predictions and classification report for test set
all_preds_test = []
all_labels_test = []
for inputs, labels in test_dataset:
    predictions = model.predict(inputs)
    all_preds_test.extend(predictions.flatten().tolist())
    all_labels_test.extend(labels.numpy().flatten().tolist())

binary_preds_test = np.array(all_preds_test) > 0.5

print("\nTest Classification Report:")
print(classification_report(all_labels_test, binary_preds_test))

print("\nTest Confusion Matrix:")
cm = confusion_matrix(all_labels_test, binary_preds_test)
print(cm)

# --- Calculate and Print Additional Metrics for Test Results ---
print("\nAdditional Metrics for Test Results:")

# Extract values from the confusion matrix
# cm = [[TN, FP], [FN, TP]]
tn, fp, fn, tp = cm.ravel()

# Sensitivity (Recall)
sensitivity = tp / (tp + fn)
print(f"Sensitivity (Recall): {sensitivity:.4f}")

# Specificity
specificity = tn / (tn + fp)
print(f"Specificity: {specificity:.4f}")

# AUC-ROC
# roc_auc_score requires probabilities, not binary predictions for `y_score`
# all_preds_test contains the raw probabilities (0-1)
auc_roc = roc_auc_score(all_labels_test, all_preds_test)
print(f"AUC-ROC: {auc_roc:.4f}")

# F1 Score
f1 = f1_score(all_labels_test, binary_preds_test)
print(f"F1 Score: {f1:.4f}")

print("\nModel training and evaluation complete.")

2025-08-21 22:58:05.432886: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-08-21 22:58:05.450002: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX512F AVX512_VNNI AVX512_BF16, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


Loading data from CSV...
Total samples: 95
Loading and preprocessing all images...
All images loaded. Shape: (95, 224, 224, 3)
Loading BERT tokenizer and tokenizing all text...
All text tokenized. Input IDs shape: (95, 128), Attention Masks shape: (95, 128)
Splitting data into training, validation, and test sets...




Training samples: 66
Validation samples: 14
Test samples: 15
TensorFlow Datasets created from preprocessed NumPy arrays.
Building multi-modal model architecture...
ResNet50 model loaded and layers frozen.


Some weights of the PyTorch model were not used when initializing the TF 2.0 model TFBertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing TFBertModel from a PyTorch model trained on another task or with another architecture (e.g. initializing a TFBertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFBertModel from a PyTorch model that you expect to be exactly identical (e.g. initializing a TFBertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFBertModel were initialized from the PyTorch model.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFBertModel for predictions w

BERT model wrapped in custom layer and layers frozen.
Compiling model...


Training model...
Epoch 1/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 965ms/step - accuracy: 0.5863 - loss: 0.7566 - val_accuracy: 0.8571 - val_loss: 0.4926
Epoch 2/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 544ms/step - accuracy: 0.6964 - loss: 0.5027 - val_accuracy: 1.0000 - val_loss: 0.3369
Epoch 3/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 580ms/step - accuracy: 0.8092 - loss: 0.4033 - val_accuracy: 1.0000 - val_loss: 0.2520
Epoch 4/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 552ms/step - accuracy: 0.7953 - loss: 0.4099 - val_accuracy: 1.0000 - val_loss: 0.1900
Epoch 5/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 547ms/step - accuracy: 0.9923 - loss: 0.1776 - val_accuracy: 1.0000 - val_loss: 0.1484
Epoch 6/50
[1m5/5[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 545ms/step - accuracy: 0.8985 - loss: 0.2523 - val_accuracy: 1.0000 - val_loss: 0.1165
Epoch 7/50
[1m5/5[0

2025-08-21 23:00:38.210562: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 514ms/step - accuracy: 0.9333 - loss: 0.0907
Test Loss: 0.0907
Test Accuracy: 0.9333
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step

Test Classification Report:
              precision    recall  f1-score   support

           0       1.00      0.88      0.93         8
           1       0.88      1.00      0.93         7

    accuracy                           0.93        15
   macro avg       0.94      0.94      0.93        15
weighted avg       0.94      0.93      0.93        15


Test Confusion Matrix:
[[7 1]
 [0 7]]

Additional Metrics for Test Results:
Sensitivity (Recall): 1.0000
Specificity: 0.8750
AUC-ROC: 1.0000
F1 Score: 0.9333

Model training and evaluation complete.


2025-08-21 23:00:40.682175: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
