Block 1: Imports and Setup

In [6]:
import tensorflow as tf
import numpy as np
import pandas as pd
import os
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Suppress messy TensorFlow warnings
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

print("Libraries loaded.")

Libraries loaded.


Block 2: Configuration
Put your file paths here. This makes it easy to change folders without scrolling through logic code.

In [7]:
# --- CONFIGURATION ---

# Define your models and their specific input sizes
models_config = {
    # Replace these paths with your actual .h5 file locations
    "Model_AffectNet": {"path": "./affectnet_model.keras", "size": 96},
    "Model_FER2013":   {"path": "./fer2013_model.keras",   "size": 96},
    "Model_CKPlus":    {"path": "./ckplus_model.keras",    "size": 96}
}

# Define your test datasets paths
datasets_config = {
    # Replace with the path to the 'test' or 'valid' folder of each dataset
    "Test_AffectNet": "./Sorted_data/test",
    "Test_FER2013":   "./fer2013/test",
    "Test_CKPlus":    "./CK_Plus_Ready/test"
}

print("Configuration defined.")

Configuration defined.


Block 3: Helper Functions
These are the logic functions. You usually don't need to touch these after running them once.

In [8]:
def get_true_and_pred_labels(model, dataset_path, target_size):
    """
    Runs prediction and returns lists of True vs Predicted emotion names.
    """
    # 1. Create Data Generator
    # Note: If your model expects 0-255 inputs, remove 'rescale=1./255'
    datagen = ImageDataGenerator(rescale=1./255)

    # 2. Flow from directory
    # shuffle=False is CRITICAL to keep predictions aligned with true labels
    test_gen = datagen.flow_from_directory(
        dataset_path,
        target_size=(target_size, target_size),
        batch_size=32,
        class_mode='categorical',
        shuffle=False
    )

    # 3. Get True Labels (Strings)
    # class_indices is {'anger': 0, 'happy': 1...}
    dataset_class_map = {v: k for k, v in test_gen.class_indices.items()}
    true_indices = test_gen.classes
    true_labels = [dataset_class_map[i] for i in true_indices]

    # 4. Predict
    print(f"   Running predictions on {len(true_labels)} images...")
    predictions = model.predict(test_gen, verbose=0)
    pred_indices = np.argmax(predictions, axis=1)

    # 5. Map Predictions to Strings
    # We assume model output 0 -> 'anger' (alphabetical order)
    # If the model was trained with specific custom mapping, this part might need adjustment.
    model_class_map = dataset_class_map

    pred_labels = [model_class_map.get(i, "Unknown") for i in pred_indices]

    return true_labels, pred_labels

def calculate_accuracy(true_labels, pred_labels):
    if not true_labels: return 0.0
    correct = sum([1 for t, p in zip(true_labels, pred_labels) if t == p])
    return correct / len(true_labels)

Block 4: Execution Loop (The Heavy Work)
This block loads the models and runs the tests. If it crashes, you only need to rerun this part.

In [9]:
# Initialize empty table
results_table = pd.DataFrame(index=models_config.keys(), columns=datasets_config.keys())

print("Starting Cross-Dataset Evaluation...\n")

for model_name, m_conf in models_config.items():
    print(f"=== EVALUATING {model_name} ===")

    try:
        # Load Model
        print(f"Loading model from {m_conf['path']}...")
        curr_model = load_model(m_conf['path'])

        for data_name, data_path in datasets_config.items():
            print(f"Testing on {data_name}...", end="")

            try:
                true_lbls, pred_lbls = get_true_and_pred_labels(
                    curr_model,
                    data_path,
                    m_conf['size']
                )

                acc = calculate_accuracy(true_lbls, pred_lbls)
                results_table.loc[model_name, data_name] = f"{acc:.1%}"
                print(f" -> Accuracy: {acc:.1%}")

            except Exception as e:
                print(f"\n   [Error] Could not test on {data_name}: {e}")
                results_table.loc[model_name, data_name] = "Error"

        # Clear memory to prevent crashing
        tf.keras.backend.clear_session()
        del curr_model
        print("\n")

    except Exception as e:
        print(f"[Critical Error] Failed to load {model_name}: {e}\n")

print("Evaluation Complete.")

Starting Cross-Dataset Evaluation...

=== EVALUATING Model_AffectNet ===
Loading model from ./affectnet_model.keras...


I0000 00:00:1765289174.845900   13188 gpu_device.cc:2020] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 5520 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 4070 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 8.9


Testing on Test_AffectNet...Found 2423 images belonging to 7 classes.
   Running predictions on 2423 images...


2025-12-09 16:06:20.854563: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:473] Loaded cuDNN version 91500


 -> Accuracy: 16.5%
Testing on Test_FER2013...Found 7178 images belonging to 7 classes.
   Running predictions on 7178 images...
 -> Accuracy: 24.7%
Testing on Test_CKPlus...Found 96 images belonging to 7 classes.
   Running predictions on 96 images...
 -> Accuracy: 8.3%


=== EVALUATING Model_FER2013 ===
Loading model from ./fer2013_model.keras...
Testing on Test_AffectNet...Found 2423 images belonging to 7 classes.
   Running predictions on 2423 images...
 -> Accuracy: 15.8%
Testing on Test_FER2013...Found 7178 images belonging to 7 classes.
   Running predictions on 7178 images...
 -> Accuracy: 13.3%
Testing on Test_CKPlus...Found 96 images belonging to 7 classes.
   Running predictions on 96 images...
 -> Accuracy: 5.2%


=== EVALUATING Model_CKPlus ===
Loading model from ./ckplus_model.keras...
Testing on Test_AffectNet...Found 2423 images belonging to 7 classes.
   Running predictions on 2423 images...
 -> Accuracy: 13.4%
Testing on Test_FER2013...Found 7178 images belonging to 7

Block 5: Display Results
Run this to see your final table.

In [10]:
print("Final Cross-Corpus Accuracy Table:")
display(results_table)

# Optional: Save to CSV
# results_table.to_csv("cross_corpus_results.csv")

Final Cross-Corpus Accuracy Table:


Unnamed: 0,Test_AffectNet,Test_FER2013,Test_CKPlus
Model_AffectNet,16.5%,24.7%,8.3%
Model_FER2013,15.8%,13.3%,5.2%
Model_CKPlus,13.4%,2.6%,3.1%
