In [1]:
import os
import shutil
import gc
import numpy as np
import pandas as pd
import tensorflow as tf
import cv2
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import Sequence
from sklearn.preprocessing import LabelEncoder

# --- 1. IMPORT PREPROCESSING ---
from tensorflow.keras.applications.efficientnet import preprocess_input as eff_preprocess
from tensorflow.keras.applications.resnet50 import preprocess_input as res_preprocess
from tensorflow.keras.applications.xception import preprocess_input as xcp_preprocess

# --- 2. CONFIGURATION & DATA LOADING ---
MODEL_DIR = "models_zoo_1"
DATA_DIR = r"C:\Users\User\Multimodel AI\data_1" 
BATCH_SIZE = 16

print("--- Loading Metadata ---")
test_df = pd.read_csv(os.path.join(DATA_DIR, "test_face.csv"))
test_aud = pd.read_csv(os.path.join(DATA_DIR, "test_audio.csv"))

# --- 3. FIX: ENCODE LABELS ---
le = LabelEncoder()
all_labels = ['angry', 'disgust', 'fear', 'happy', 'neutral', 'sad', 'surprise']
le.fit(all_labels)

test_df['label_str'] = test_df['label'].astype(str)
test_df['label_encoded'] = le.transform(test_df['label_str'])

test_aud['label_str'] = test_aud['label'].astype(str)
test_aud['label_encoded'] = le.transform(test_aud['label_str'])

label_column = 'label_str'

# --- 4. AUDIO DATA GENERATOR ---
class NpyDataGenerator(Sequence):
    def __init__(self, df, preprocess_f, batch_size=16, target_size=(224,224), shuffle=False):
        self.df = df
        self.preprocess_f = preprocess_f
        self.batch_size = batch_size
        self.target_size = target_size
        self.indices = np.arange(len(self.df))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.df) / self.batch_size))

    def on_epoch_end(self):
        if hasattr(self, 'shuffle') and self.shuffle: 
            np.random.shuffle(self.indices)

    def __getitem__(self, index):
        batch_indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        batch_df = self.df.iloc[batch_indices]
        X, y = [], []
        for _, row in batch_df.iterrows():
            spec = np.load(row['audio_path'])
            # Ensure resize happens first
            if spec.shape[:2] != self.target_size:
                spec = cv2.resize(spec, self.target_size)
            
            # Ensure 3 channels
            if len(spec.shape) == 2: 
                spec = np.stack((spec,)*3, axis=-1)
            
            # Preprocess
            X.append(self.preprocess_f(spec.astype(np.float32)))
            y.append(tf.keras.utils.to_categorical(row['label_encoded'], 7))
            
        return np.array(X), np.array(y)

# --- 5. PREPROCESSING WRAPPERS ---
def eff_audio_wrapper(img):
    # EfficientNet: Ensure 0-255 range just in case, though it handles scaling internally.
    if np.max(img) <= 1.0:
        img = img * 255.0
    return eff_preprocess(img)

def resnet_audio_wrapper(img):
    # CRITICAL FIX for ResNet: It requires 0-255 inputs to perform mean subtraction correctly.
    # If inputs are 0-1 (common in .npy), we scale them up.
    if np.max(img) <= 1.0:
        img = img * 255.0
    return res_preprocess(img)

def get_preprocess_func(model_name, modality):
    # 1. Handle Audio Specifics
    if modality == "AUDIO":
        if "ResNet" in model_name:
            return resnet_audio_wrapper  # Matches 'Audio_ResNet50_Augmented_Balanced'
        if "EfficientNet" in model_name:
            return eff_audio_wrapper

    # 2. Handle Face (Image) Specifics
    if "ResNet" in model_name: return res_preprocess
    if "Xception" in model_name: return xcp_preprocess
    
    # Default (EfficientNet and others)
    return eff_preprocess

# --- 6. EVALUATION FUNCTION ---
def evaluate_candidate(name, modality, test_df_data):
    path = os.path.join(MODEL_DIR, f"{name}.keras")
    if not os.path.exists(path):
        print(f"   [Skipped] {name} not found.")
        return -1.0

    print(f"   Testing {name}...", end="")
    try:
        model = load_model(path, compile=False)
        model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
        
        pre_func = get_preprocess_func(name, modality) 
        
        # Determine target size from model input
        try:
            input_shape = model.input_shape[1:]
            target_size = (input_shape[0], input_shape[1])
        except:
            target_size = (224, 224) # Fallback
        
        if modality == "AUDIO":
            gen = NpyDataGenerator(test_df_data, pre_func, BATCH_SIZE, target_size=target_size)
        else:
            datagen = ImageDataGenerator(preprocessing_function=pre_func)
            gen = datagen.flow_from_dataframe(
                test_df_data, x_col='face_path', y_col=label_column,
                target_size=target_size, batch_size=BATCH_SIZE, 
                class_mode='categorical', shuffle=False, verbose=0
            )
            
        loss, acc = model.evaluate(gen, verbose=0)
        print(f" Score: {acc*100:.2f}%")
        
        # Cleanup to save memory
        del model
        tf.keras.backend.clear_session()
        gc.collect()
        return acc
    except Exception as e:
        print(f" Error: {e}")
        return -1.0

# --- 7. EXECUTION ---
AUDIO_CANDIDATES = [
    "Audio_Baseline_CNN", 
    "Audio_ResNet50", 
    "Audio_CRNN_LSTM", 
    "Audio_EfficientNet_Refined",
    "Audio_ResNet50_Augmented_Balanced",
    "Audio_EfficientNet_Balanced"
]

FACE_CANDIDATES = [
    "Face_Baseline_CNN", 
    "Face_Xception", 
    "Face_ResNet50", 
    "efficientnet_improved",
    "Face_EfficientNet_Balanced"
]

print("\n--- EVALUATING AUDIO MODELS ---")
best_audio_name = None; best_audio_score = -1.0
for name in AUDIO_CANDIDATES:
    score = evaluate_candidate(name, "AUDIO", test_aud)
    if score > best_audio_score:
        best_audio_score = score; best_audio_name = name

print("\n--- EVALUATING FACE MODELS ---")
best_face_name = None; best_face_score = -1.0
for name in FACE_CANDIDATES:
    score = evaluate_candidate(name, "FACE", test_df)
    if score > best_face_score:
        best_face_score = score; best_face_name = name


--- Loading Metadata ---

--- EVALUATING AUDIO MODELS ---
   Testing Audio_Baseline_CNN...

  self._warn_if_super_not_called()


 Score: 40.55%

   Testing Audio_ResNet50... Score: 16.83%
   Testing Audio_CRNN_LSTM... Score: 43.50%
   Testing Audio_EfficientNet_Refined... Score: 55.02%
   Testing Audio_ResNet50_Augmented_Balanced... Score: 52.95%
   Testing Audio_EfficientNet_Balanced... Score: 56.00%

--- EVALUATING FACE MODELS ---
   Testing Face_Baseline_CNN...Found 1016 validated image filenames belonging to 7 classes.
 Score: 1.57%
   Testing Face_Xception...Found 1016 validated image filenames belonging to 7 classes.
 Score: 37.50%
   Testing Face_ResNet50...Found 1016 validated image filenames belonging to 7 classes.
 Error: Graph execution error:

Detected at node StatefulPartitionedCall/Face_ResNet50_1/conv5_block1_0_conv_1/BiasAdd defined at (most recent call last):
<stack traces unavailable>
Operation received an exception:Status: 1, message: could not create a memory object, in file tensorflow/core/kernels/mkl/mkl_conv_ops.cc:1112
	 [[{{node StatefulPartitionedCall/Face_ResNet50_1/conv5_block1_0_conv