# Language Identification from MFCC Images (En/Es/Fr)

This notebook loads MFCC images from `MFCC_images/En|Es|Fr`, builds three models (VGG16, a simple CNN, and an AlexNet-like model), trains them, and reports accuracy and AUC.

Notes:
- Images are expected under `MFCC_images/<ClassName>/*.png` (RGB, 224x224).
- If you don't have `matplotlib`, `scikit-learn`, or `tensorflow` installed, run the install cell first.

In [6]:
# Clean, pinned installation to resolve NumPy ABI mismatch
# IMPORTANT: After running this cell, restart the kernel (Kernel > Restart) before importing TensorFlow.
import sys, subprocess, importlib

PINNED = [
    'numpy==1.26.4',            # last stable 1.x (many wheels built against this)
    'tensorflow==2.15.0',        # works with numpy 1.26 on Windows (Python <=3.11 recommended)
    'matplotlib==3.8.2',
    'pillow==10.2.0'
]

# Force reinstall to overwrite any previously compiled against NumPy 2.x
for spec in PINNED:
    pkg = spec.split('==')[0]
    subprocess.check_call([sys.executable, '-m', 'pip', 'install', '--no-cache-dir', '--force-reinstall', spec])

# Verify NumPy version
import numpy as np
print('NumPy version after pin:', np.__version__)
major = int(np.__version__.split('.')[0])
if major >= 2:
    raise RuntimeError('NumPy is still >=2; downgrade failed. Please create a fresh environment.')

print('\nIf you still get the dtype size error:')
print('1. Restart kernel now.')
print('2. If using Anaconda base with Python 3.12, create a new env with Python 3.10 or 3.11:')
print('   conda create -n langid python=3.11 numpy=1.26.4 tensorflow=2.15.0 matplotlib pillow -y')
print('   conda activate langid')
print('3. Reopen this notebook and re-run from the top.')


CalledProcessError: Command '['c:\\Users\\Lukita\\anaconda3\\python.exe', '-m', 'pip', 'install', '--no-cache-dir', '--force-reinstall', 'tensorflow==2.15.0']' returned non-zero exit status 1.

In [3]:
# Imports and configuration
import os, math, datetime, random
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import matplotlib.pyplot as plt

DATA_DIR = 'MFCC_images'  # expects subfolders En/Es/Fr
IMG_SIZE = (224, 224)
BATCH_SIZE = 16
VAL_SPLIT = 0.2
SEED = 42
EPOCHS = 10
assert os.path.isdir(DATA_DIR), f'Folder not found: {DATA_DIR}'
classes = sorted([d for d in os.listdir(DATA_DIR) if os.path.isdir(os.path.join(DATA_DIR, d))])
print('Classes:', classes)
NUM_CLASSES = len(classes)


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject

In [None]:
# Datasets (train/val) from directory
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    validation_split=VAL_SPLIT,
    subset='training',
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE)
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    DATA_DIR,
    validation_split=VAL_SPLIT,
    subset='validation',
    seed=SEED,
    image_size=IMG_SIZE,
    batch_size=BATCH_SIZE)

train_ds = train_ds.prefetch(tf.data.AUTOTUNE)
val_ds = val_ds.prefetch(tf.data.AUTOTUNE)


In [None]:
# Define three models: VGG16-transfer, SimpleCNN, AlexNet-like
def build_vgg16(num_classes):
    base = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=IMG_SIZE + (3,))
    base.trainable = False
    inputs = keras.Input(shape=IMG_SIZE + (3,))
    x = tf.keras.applications.vgg16.preprocess_input(inputs)
    x = base(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return keras.Model(inputs, outputs)

def build_simple_cnn(num_classes):
    inputs = keras.Input(shape=IMG_SIZE + (3,))
    x = layers.Rescaling(1./255)(inputs)
    for f in [32, 64, 128]:
        x = layers.Conv2D(f, 3, padding='same', activation='relu')(x)
        x = layers.MaxPooling2D()(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return keras.Model(inputs, outputs)

def build_alexnet_like(num_classes):
    inputs = keras.Input(shape=IMG_SIZE + (3,))
    x = layers.Rescaling(1./255)(inputs)
    x = layers.Conv2D(96, 11, strides=4, activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2)(x)
    x = layers.Conv2D(256, 5, padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2)(x)
    x = layers.Conv2D(384, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(384, 3, padding='same', activation='relu')(x)
    x = layers.Conv2D(256, 3, padding='same', activation='relu')(x)
    x = layers.MaxPooling2D(pool_size=3, strides=2)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(4096, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    x = layers.Dense(4096, activation='relu')(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(num_classes, activation='softmax')(x)
    return keras.Model(inputs, outputs)


In [None]:
# Train/evaluate helper (TensorFlow-only metrics)
def macro_auc_tf(y_true_oh, y_prob):
    # Compute per-class AUC and average (macro)
    auc_vals = []
    for i in range(y_true_oh.shape[1]):
        m = tf.keras.metrics.AUC()
        m.update_state(y_true_oh[:, i], y_prob[:, i])
        auc_vals.append(m.result().numpy())
    return float(np.nanmean(auc_vals))


def train_and_evaluate(model_name, build_fn, train_ds, val_ds, epochs=EPOCHS):
    model = build_fn(NUM_CLASSES)
    model.compile(
        optimizer=keras.optimizers.Adam(1e-4),
        loss='sparse_categorical_crossentropy' if train_ds.element_spec[1].shape.rank == 0 else 'categorical_crossentropy',
        metrics=[keras.metrics.CategoricalAccuracy(name='acc'), keras.metrics.AUC(name='auc_macro', multi_label=True)],
    )
    ckpt_path = f'model_{model_name}_best.keras'
    callbacks = [
        keras.callbacks.EarlyStopping(monitor='val_acc', patience=3, mode='max', restore_best_weights=True),
        keras.callbacks.ModelCheckpoint(ckpt_path, monitor='val_acc', mode='max', save_best_only=True)
    ]
    hist = model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=callbacks)

    # Evaluate on validation
    eval_res = model.evaluate(val_ds, verbose=0)
    metrics = dict(zip(model.metrics_names, eval_res))

    # TensorFlow-based macro AUC over classes
    y_true = []
    y_prob = []
    for xb, yb in val_ds:
        if len(yb.shape) == 1:
            yb = tf.one_hot(yb, depth=NUM_CLASSES)
        y_true.append(yb.numpy())
        y_prob.append(model.predict(xb, verbose=0))
    y_true = np.concatenate(y_true, axis=0)
    y_prob = np.concatenate(y_prob, axis=0)
    auc_ovr = macro_auc_tf(y_true, y_prob)

    return model, hist.history, metrics, auc_ovr

In [None]:
# Run training for all three models
results = {}
for name, builder in [('vgg16', build_vgg16), ('cnn', build_simple_cnn), ('alexnet', build_alexnet_like)]:
    print(f'\n=== Training {name} ===')
    model, history, metrics, auc_ovr = train_and_evaluate(name, builder, train_ds, val_ds, epochs=EPOCHS)
    results[name] = {'history': history, 'metrics': metrics, 'auc_ovr': auc_ovr}
    print(f"{name} val metrics: {metrics}; AUC(OVR, macro)={auc_ovr:.4f}")

results

In [None]:
# Plot history and confusion matrix for best model (TF-only)
import pandas as pd

def plot_history(hist, title):
    plt.figure(figsize=(10,4))
    plt.subplot(1,2,1)
    plt.plot(hist.get('acc', []), label='train acc')
    plt.plot(hist.get('val_acc', []), label='val acc')
    plt.legend(); plt.title(title + ' accuracy')
    plt.subplot(1,2,2)
    plt.plot(hist.get('auc_macro', []), label='train auc')
    plt.plot(hist.get('val_auc_macro', []), label='val auc')
    plt.legend(); plt.title(title + ' AUC')
    plt.tight_layout(); plt.show()

# find best by val_acc
best_name = max(results.keys(), key=lambda n: results[n]['metrics'].get('val_acc', 0))
print('Best by val_acc:', best_name)
plot_history(results[best_name]['history'], best_name)

# Build model again to get predictions for confusion matrix
best_builder = dict(vgg16=build_vgg16, cnn=build_simple_cnn, alexnet=build_alexnet_like)[best_name]
best_model = best_builder(NUM_CLASSES)
best_model.load_weights(f'model_{best_name}_best.keras')

# Collect predictions
y_true_idx = []
y_pred_idx = []
for xb, yb in val_ds:
    if len(yb.shape) == 1:
        y_true_idx.extend(yb.numpy().tolist())
        probs = best_model.predict(xb, verbose=0)
        y_pred_idx.extend(np.argmax(probs, axis=1).tolist())
    else:
        y_true_idx.extend(np.argmax(yb.numpy(), axis=1).tolist())
        probs = best_model.predict(xb, verbose=0)
        y_pred_idx.extend(np.argmax(probs, axis=1).tolist())

# Confusion matrix
cmat = tf.math.confusion_matrix(y_true_idx, y_pred_idx, num_classes=NUM_CLASSES).numpy()
print('Confusion matrix:\n', cmat)

# Overall accuracy
overall_acc = (np.trace(cmat) / np.sum(cmat)) if np.sum(cmat) > 0 else float('nan')
print(f'Overall accuracy (val): {overall_acc:.4f}')
