<a href="https://colab.research.google.com/github/Dewwbe/-Real-Estate-Document-Collection-/blob/main/Suwa_manasa_EmotionD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# =========================================
# Cell 1 — Imports
# =========================================
import os
import re
import glob
import numpy as np
import pandas as pd

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

import librosa
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

print("TF:", tf.__version__)


TF: 2.19.0


In [2]:
# =========================================
# Cell 2 — Reproducibility
# =========================================
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)


In [11]:
# =========================================
# Cell 3 — Set dataset paths (EDIT ONLY THESE 3)
# Adjust these to your environment WITHOUT changing dataset internals.
# =========================================

# Dataset1 root (contains: "kidodDataset-Images" and "Texts" folders as you described)
DATASET1_ROOT = "/content/drive/MyDrive/kidoDataset"   # <- change

# Dataset2 root (contains: "newart/train" and "newart/test")
DATASET2_ROOT = "/content/drive/MyDrive/newart"   # <- change

# Dataset3 voice root (contains: Crema, Ravdess, Savee, Tess like the notebook you pasted)
VOICE_ROOT    = "/content/drive/MyDrive/voicedataset"  # <- change


In [4]:
# =========================================
# Cell 4 — Helpers (label normalization, safe listing)
# =========================================
def norm_emotion_label(x: str) -> str:
    """
    Normalize label spellings to {happy, sad}.
    Handles: Hapiness/Happiness/Happy, Sadness/Sad
    """
    s = x.strip().lower()
    s = s.replace("hapiness", "happy").replace("happiness", "happy")
    s = s.replace("sadness", "sad")
    if s in ["happy", "sad"]:
        return s
    return s

def safe_listdir(path):
    return os.listdir(path) if os.path.exists(path) else []

def count_files_recursive(path, exts=None):
    exts = exts or []
    total = 0
    for root, _, files in os.walk(path):
        for f in files:
            if not exts or any(f.lower().endswith(e) for e in exts):
                total += 1
    return total



In [5]:
# =========================================
# Cell 5 — Build Image datasets from your folder structures
# Dataset1:
# Images/Emotion/{train,test}/{Hapiness,Sadness}
#
# Dataset2:
# newart/{train,test}/{Happy,Sad,...} -> we ONLY take Happy/Sad for publishable alignment
# =========================================

IMG_SIZE = (224, 224)
BATCH = 32

# Dataset1 Emotion images
D1_IMG_EMO_TRAIN = os.path.join(DATASET1_ROOT, "Images", "Emotion", "train")
D1_IMG_EMO_TEST  = os.path.join(DATASET1_ROOT, "Images", "Emotion", "test")

# Dataset2 images
D2_TRAIN = os.path.join(DATASET2_ROOT, "train")
D2_TEST  = os.path.join(DATASET2_ROOT, "test")

print("Dataset1 Emotion train exists:", os.path.exists(D1_IMG_EMO_TRAIN))
print("Dataset2 train exists:", os.path.exists(D2_TRAIN))

def make_image_ds_from_directory(path, subset=None, validation_split=None):
    return tf.keras.utils.image_dataset_from_directory(
        path,
        labels="inferred",
        label_mode="int",
        image_size=IMG_SIZE,
        batch_size=BATCH,
        shuffle=True,
        seed=SEED,
        validation_split=validation_split,
        subset=subset,
    )

# --- Load Dataset1 Emotion (train/test)
ds1_img_train = make_image_ds_from_directory(D1_IMG_EMO_TRAIN)
ds1_img_test  = make_image_ds_from_directory(D1_IMG_EMO_TEST)

# --- Load Dataset2 BUT only keep Happy/Sad
# We'll load full, then filter classes by name mapping.
ds2_img_train_full = make_image_ds_from_directory(D2_TRAIN)
ds2_img_test_full  = make_image_ds_from_directory(D2_TEST)

print("D1 classes:", ds1_img_train.class_names)
print("D2 classes:", ds2_img_train_full.class_names)

# Build a mapping for Dataset2 indices -> happy/sad or ignore
d2_keep = {"happy", "sad"}
d2_idx_to_label = {i: norm_emotion_label(name) for i, name in enumerate(ds2_img_train_full.class_names)}
d2_keep_indices = [i for i, lab in d2_idx_to_label.items() if lab in d2_keep]
print("D2 keep indices:", d2_keep_indices, "mapping:", d2_idx_to_label)

def filter_image_ds_to_happy_sad(ds, idx_to_label, keep_indices):
    keep_indices_tensor = tf.constant(keep_indices, dtype=tf.int32)

    # Unbatch, filter individual elements, then re-batch
    ds_unbatched = ds.unbatch()

    def element_filter(image, label):
        return tf.reduce_any(tf.equal(label, keep_indices_tensor))

    ds_filtered_elements = ds_unbatched.filter(element_filter)

    # Identify original IDs for happy/sad in this dataset
    happy_ids = [i for i in keep_indices_tensor.numpy().tolist() if idx_to_label[i] == "happy"]
    sad_ids   = [i for i in keep_indices_tensor.numpy().tolist() if idx_to_label[i] == "sad"]
    if len(happy_ids) != 1 or len(sad_ids) != 1:
        raise ValueError(f"Expected exactly one Happy and one Sad folder. Found happy={happy_ids}, sad={sad_ids}")

    happy_id = tf.constant(happy_ids[0], dtype=tf.int32)
    sad_id   = tf.constant(sad_ids[0], dtype=tf.int32)

    def element_remap(image, label):
        y2 = tf.where(tf.equal(label, happy_id), 0, 1)
        return image, y2

    ds_remapped_elements = ds_filtered_elements.map(element_remap, num_parallel_calls=tf.data.AUTOTUNE)

    return ds_remapped_elements.batch(BATCH)

ds2_img_train = filter_image_ds_to_happy_sad(ds2_img_train_full, d2_idx_to_label, d2_keep_indices)
ds2_img_test  = filter_image_ds_to_happy_sad(ds2_img_test_full,  d2_idx_to_label, d2_keep_indices)

# Dataset1 class order can be [Hapiness, Sadness] or similar; remap to {happy:0, sad:1}
d1_idx_to_label = {i: norm_emotion_label(name) for i, name in enumerate(ds1_img_train.class_names)}
print("D1 idx->label:", d1_idx_to_label)

def remap_d1(ds, idx_to_label):
    # find ids
    happy_ids = [i for i, lab in idx_to_label.items() if lab == "happy"]
    sad_ids   = [i for i, lab in idx_to_label.items() if lab == "sad"]
    if len(happy_ids) != 1 or len(sad_ids) != 1:
        raise ValueError(f"Dataset1 Emotion folders must be exactly Hapiness & Sadness. Found {idx_to_label}")
    happy_id = tf.constant(happy_ids[0], dtype=tf.int32)
    def _remap(x, y):
        y2 = tf.where(tf.equal(y, happy_id), 0, 1)
        return x, y2
    return ds.map(_remap, num_parallel_calls=tf.data.AUTOTUNE)

ds1_img_train = remap_d1(ds1_img_train, d1_idx_to_label)
ds1_img_test  = remap_d1(ds1_img_test,  d1_idx_to_label)

# Combine dataset1+dataset2 for training/testing
img_train = ds1_img_train.concatenate(ds2_img_train).cache().prefetch(tf.data.AUTOTUNE)
img_test  = ds1_img_test.concatenate(ds2_img_test).cache().prefetch(tf.data.AUTOTUNE)

print("✅ Image datasets ready (happy=0, sad=1)")


Dataset1 Emotion train exists: True
Dataset2 train exists: True
Found 9228 files belonging to 2 classes.
Found 1632 files belonging to 2 classes.
Found 694 files belonging to 4 classes.
Found 401 files belonging to 4 classes.
D1 classes: ['Happiness', 'Sadness']
D2 classes: ['Angry', 'Fear', 'Happy', 'Sad']
D2 keep indices: [2, 3] mapping: {0: 'angry', 1: 'fear', 2: 'happy', 3: 'sad'}
D1 idx->label: {0: 'happy', 1: 'sad'}
✅ Image datasets ready (happy=0, sad=1)


In [15]:
# =========================================
# Cell 6 — Load Text dataset from Dataset1 structure
# Assuming Emotion_Train.csv and Emotion_Test.csv contain text and label columns.
# =========================================

D1_TEXT_EMO_TRAIN_FILE = os.path.join(DATASET1_ROOT, "Texts", "Emotion", "Emotion_Train.csv")
D1_TEXT_EMO_TEST_FILE  = os.path.join(DATASET1_ROOT, "Texts", "Emotion", "Emotion_Test.csv")

print("Text train file:", D1_TEXT_EMO_TRAIN_FILE, "exists:", os.path.exists(D1_TEXT_EMO_TRAIN_FILE))
print("Text test file :", D1_TEXT_EMO_TEST_FILE,  "exists:", os.path.exists(D1_TEXT_EMO_TEST_FILE))

# The original infer_label_from_filename and load_text_folder are not suitable
# if the data is directly in structured CSVs with 'text' and 'emotion' columns.
# We will use pandas to read these files.

def load_text_csv(file_path):
    if not os.path.exists(file_path):
        print(f"File not found: {file_path}")
        return [], []
    try:
        # Read without a header, and then assign column names by index
        df = pd.read_csv(file_path, header=None)

        # Assuming text is in column index 2 and emotion in column index 3
        # (0-indexed, so 3rd and 4th columns respectively from your sample output)
        text_col_idx = 2
        emotion_col_idx = 3

        if text_col_idx not in df.columns or emotion_col_idx not in df.columns:
            print(f"Error: Expected columns at index {text_col_idx} (text) and {emotion_col_idx} (emotion) not found in '{file_path}'.")
            print(f"Available columns indices: {df.columns.tolist()}")
            return [], []

        df = df[[text_col_idx, emotion_col_idx]].rename(columns={text_col_idx: 'text', emotion_col_idx: 'emotion'})

        # Filter for 'happy' and 'sad' emotions only, normalizing labels
        df['emotion'] = df['emotion'].apply(norm_emotion_label)
        df_filtered = df[df['emotion'].isin(['happy', 'sad'])]

        return df_filtered['text'].tolist(), df_filtered['emotion'].tolist()
    except Exception as e:
        print(f"Error reading {file_path}: {e}")
        return [], []

X_text_train, train_text_labels = load_text_csv(D1_TEXT_EMO_TRAIN_FILE)
X_text_test,  test_text_labels  = load_text_csv(D1_TEXT_EMO_TEST_FILE)

print("Train texts:", len(X_text_train), "Test texts:", len(X_text_test))
print("Train label counts:", pd.Series(train_text_labels).value_counts().to_dict())
print("Test label counts :", pd.Series(test_text_labels).value_counts().to_dict())

y_text_train = np.array([0 if l=="happy" else 1 for l in train_text_labels], dtype=np.int32)
y_text_test  = np.array([0 if l=="happy" else 1 for l in test_text_labels], dtype=np.int32)

print("✅ Text arrays ready (happy=0, sad=1)")

Text train file: /content/drive/MyDrive/kidoDataset/Texts/Emotion/Emotion_Train.csv exists: True
Text test file : /content/drive/MyDrive/kidoDataset/Texts/Emotion/Emotion_Test.csv exists: True
Train texts: 9228 Test texts: 1632
Train label counts: {'sad': 4614, 'happy': 4614}
Test label counts : {'sad': 816, 'happy': 816}
✅ Text arrays ready (happy=0, sad=1)


In [7]:
# =========================================
# Cell 7 — Load Voice datasets (Crema, Ravdess, Savee, Tess) and keep only happy/sad
# =========================================

CREMA_PATH   = os.path.join(VOICE_ROOT, "Crema")
RAVDESS_PATH = os.path.join(VOICE_ROOT, "Ravdess", "audio_speech_actors_01-24")
SAVEE_PATH   = os.path.join(VOICE_ROOT, "Savee")
TESS_PATH    = os.path.join(VOICE_ROOT, "Tess")

print("Crema exists:", os.path.exists(CREMA_PATH))
print("Ravdess exists:", os.path.exists(RAVDESS_PATH))
print("Savee exists:", os.path.exists(SAVEE_PATH))
print("Tess exists:", os.path.exists(TESS_PATH))

def load_crema_df(crema_path):
    files = safe_listdir(crema_path)
    emotions, paths = [], []
    for f in files:
        if not f.lower().endswith(".wav"):
            continue
        parts = f.split("_")
        if len(parts) < 3:
            continue
        code = parts[2].upper()
        if code == "HAP":
            emotions.append("happy")
        elif code == "SAD":
            emotions.append("sad")
        else:
            continue
        paths.append(os.path.join(crema_path, f))
    return pd.DataFrame({"emotion": emotions, "path": paths})

def load_ravdess_df(ravdess_path):
    emotions, paths = [], []
    actors = safe_listdir(ravdess_path)
    for actor_dir in actors:
        adir = os.path.join(ravdess_path, actor_dir)
        for f in safe_listdir(adir):
            if not f.lower().endswith(".wav"):
                continue
            parts = f.split(".")[0].split("-")
            if len(parts) < 3:
                continue
            emo_id = int(parts[2])
            # 3=happy, 4=sad in RAVDESS
            if emo_id == 3:
                emotions.append("happy")
            elif emo_id == 4:
                emotions.append("sad")
            else:
                continue
            paths.append(os.path.join(adir, f))
    return pd.DataFrame({"emotion": emotions, "path": paths})

def load_savee_df(savee_path):
    files = safe_listdir(savee_path)
    emotions, paths = [], []
    for f in files:
        if not f.lower().endswith(".wav"):
            continue
        # example: 'JE_h09.wav', 'KL_sa13.wav'
        token = f.split("_")[1]
        token = token[:-6]  # strip digits + '.wav' pattern as in your notebook
        if token == "h":
            emotions.append("happy")
        elif token == "sa":
            emotions.append("sad")
        else:
            continue
        paths.append(os.path.join(savee_path, f))
    return pd.DataFrame({"emotion": emotions, "path": paths})

def load_tess_df(tess_path):
    emotions, paths = [], []
    dirs = safe_listdir(tess_path)
    for d in dirs:
        subdir = os.path.join(tess_path, d)
        for f in safe_listdir(subdir):
            if not f.lower().endswith(".wav"):
                continue
            # filename contains emotion token, typical: ..._happy.wav, ..._sad.wav
            base = f.split(".")[0].lower()
            if "happy" in base:
                emotions.append("happy")
            elif re.search(r"\bsad\b", base) or "sad" in base:
                emotions.append("sad")
            else:
                continue
            paths.append(os.path.join(subdir, f))
    return pd.DataFrame({"emotion": emotions, "path": paths})

crema_df   = load_crema_df(CREMA_PATH)
ravdess_df = load_ravdess_df(RAVDESS_PATH)
savee_df   = load_savee_df(SAVEE_PATH)
tess_df    = load_tess_df(TESS_PATH)

voice_df = pd.concat([crema_df, ravdess_df, savee_df, tess_df], ignore_index=True)
print("Voice samples:", voice_df.shape)
print(voice_df["emotion"].value_counts())

# Encode labels: happy=0, sad=1
voice_df["y"] = (voice_df["emotion"] == "sad").astype(np.int32)

# Split
voice_train_df, voice_test_df = train_test_split(
    voice_df, test_size=0.2, random_state=SEED, stratify=voice_df["y"]
)

print("Train:", len(voice_train_df), "Test:", len(voice_test_df))


Crema exists: True
Ravdess exists: True
Savee exists: True
Tess exists: True
Voice samples: (3846, 2)
emotion
happy    1923
sad      1923
Name: count, dtype: int64
Train: 3076 Test: 770


In [8]:
# =========================================
# Cell 8 — MFCC extraction + dataset arrays for Audio model
# =========================================
N_MFCC = 40

def extract_mfcc(path, n_mfcc=40, duration=3.0, offset=0.5):
    y, sr = librosa.load(path, duration=duration, offset=offset)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc = np.mean(mfcc.T, axis=0)  # (n_mfcc,)
    return mfcc.astype(np.float32)

def build_audio_arrays(df):
    X = np.stack([extract_mfcc(p, n_mfcc=N_MFCC) for p in df["path"].values], axis=0)  # (N, 40)
    y = df["y"].values.astype(np.int32)
    X = np.expand_dims(X, -1)  # (N, 40, 1)
    return X, y

X_a_train, y_a_train = build_audio_arrays(voice_train_df)
X_a_test,  y_a_test  = build_audio_arrays(voice_test_df)

print("Audio train:", X_a_train.shape, y_a_train.shape)
print("Audio test :", X_a_test.shape,  y_a_test.shape)


Audio train: (3076, 40, 1) (3076,)
Audio test : (770, 40, 1) (770,)


In [9]:
# =========================================
# Cell 9 — Image model (CNN)
# =========================================
def build_image_model():
    inputs = keras.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 3))
    x = layers.Rescaling(1./255)(inputs)
    x = layers.Conv2D(32, 3, activation="relu")(x)
    x = layers.MaxPool2D()(x)
    x = layers.Conv2D(64, 3, activation="relu")(x)
    x = layers.MaxPool2D()(x)
    x = layers.Conv2D(128, 3, activation="relu")(x)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs, outputs, name="image_cnn")
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

img_model = build_image_model()
img_model.summary()

IMG_EPOCHS = 8
hist_img = img_model.fit(img_train, validation_data=img_test, epochs=IMG_EPOCHS)


Epoch 1/8
    300/Unknown [1m3062s[0m 10s/step - accuracy: 0.5481 - loss: 0.6890



[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3695s[0m 12s/step - accuracy: 0.5482 - loss: 0.6890 - val_accuracy: 0.5983 - val_loss: 0.6767
Epoch 2/8
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.5924 - loss: 0.6752 - val_accuracy: 0.5999 - val_loss: 0.6660
Epoch 3/8
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.5968 - loss: 0.6736 - val_accuracy: 0.5966 - val_loss: 0.6721
Epoch 4/8
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.6010 - loss: 0.6719 - val_accuracy: 0.6260 - val_loss: 0.6693
Epoch 5/8
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 44ms/step - accuracy: 0.6086 - loss: 0.6680 - val_accuracy: 0.6315 - val_loss: 0.6528
Epoch 6/8
[1m300/300[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 45ms/step - accuracy: 0.6112 - loss: 0.6647 - val_ac

In [20]:
# =========================================
# Cell 10 — Text model (TextVectorization + BiLSTM)
# =========================================
MAX_TOKENS = 20000
SEQ_LEN = 200

vectorizer = layers.TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode="int",
    output_sequence_length=SEQ_LEN
)

# Ensure all text entries are strings and not None
X_text_train = [str(x) for x in X_text_train if x is not None]
X_text_test  = [str(x) for x in X_text_test if x is not None]

vectorizer.adapt(X_text_train)

def build_text_model():
    inputs = keras.Input(shape=(), dtype=tf.string)
    x = vectorizer(inputs)
    x = layers.Embedding(MAX_TOKENS, 128)(x)
    x = layers.Bidirectional(layers.LSTM(64))(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs, outputs, name="text_bilstm")
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

text_model = build_text_model()
text_model.summary()

TEXT_EPOCHS = 6
hist_text = text_model.fit(
    x=np.array(X_text_train, dtype=object),
    y=y_text_train,
    validation_data=(np.array(X_text_test, dtype=object), y_text_test),
    epochs=TEXT_EPOCHS,
    batch_size=BATCH
)

Epoch 1/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 20ms/step - accuracy: 0.7903 - loss: 0.4695 - val_accuracy: 0.9510 - val_loss: 0.1282
Epoch 2/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 22ms/step - accuracy: 0.9654 - loss: 0.1022 - val_accuracy: 0.9498 - val_loss: 0.1356
Epoch 3/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.9796 - loss: 0.0663 - val_accuracy: 0.9485 - val_loss: 0.1548
Epoch 4/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.9854 - loss: 0.0468 - val_accuracy: 0.9357 - val_loss: 0.2236
Epoch 5/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 20ms/step - accuracy: 0.9864 - loss: 0.0410 - val_accuracy: 0.9442 - val_loss: 0.2091
Epoch 6/6
[1m289/289[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 18ms/step - accuracy: 0.9875 - loss: 0.0411 - val_accuracy: 0.9442 - val_loss: 0.2262


In [21]:
# =========================================
# Cell 11 — Audio model (LSTM on MFCC)
# =========================================
def build_audio_model():
    inputs = keras.Input(shape=(N_MFCC, 1))
    x = layers.LSTM(128)(inputs)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(64, activation="relu")(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(2, activation="softmax")(x)
    model = keras.Model(inputs, outputs, name="audio_lstm")
    model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
    return model

audio_model = build_audio_model()
audio_model.summary()

AUDIO_EPOCHS = 12
hist_audio = audio_model.fit(
    X_a_train, y_a_train,
    validation_data=(X_a_test, y_a_test),
    epochs=AUDIO_EPOCHS,
    batch_size=64
)


Epoch 1/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 13ms/step - accuracy: 0.5399 - loss: 0.6798 - val_accuracy: 0.6727 - val_loss: 0.5882
Epoch 2/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6722 - loss: 0.5886 - val_accuracy: 0.7260 - val_loss: 0.5373
Epoch 3/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.7929 - loss: 0.4508 - val_accuracy: 0.7831 - val_loss: 0.4592
Epoch 4/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8233 - loss: 0.4106 - val_accuracy: 0.8039 - val_loss: 0.4006
Epoch 5/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8170 - loss: 0.4021 - val_accuracy: 0.8156 - val_loss: 0.3953
Epoch 6/12
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.8212 - loss: 0.3834 - val_accuracy: 0.8234 - val_loss: 0.3752
Epoch 7/12
[1m49/49[0m [32m━━━━━━━━━

In [22]:
# =========================================
# Cell 12 — Late Fusion predictor
# =========================================
def predict_fusion(image_batch, text_batch, audio_batch, w=(1.0, 1.0, 1.0)):
    """
    image_batch: (B,224,224,3)
    text_batch : list/array of strings length B
    audio_batch: (B,40,1)
    w = weights (wi, wt, wa)
    """
    wi, wt, wa = w
    p_img  = img_model.predict(image_batch, verbose=0)
    p_text = text_model.predict(np.array(text_batch, dtype=object), verbose=0)
    p_aud  = audio_model.predict(audio_batch, verbose=0)

    p = (wi*p_img + wt*p_text + wa*p_aud) / (wi + wt + wa)
    return p  # (B,2)


In [23]:
# =========================================
# Cell 13 — Quick sanity test for fusion with 1 batch each (if available)
# This is just a demonstration of the fusion pipeline.
# =========================================

# Get one image batch from img_test
for xb_img, yb_img in img_test.take(1):
    sample_img = xb_img.numpy()
    break

# Get one text batch from text test
B = min(16, len(X_text_test))
sample_text = X_text_test[:B]

# Get one audio batch from audio test
sample_audio = X_a_test[:B]

# Make sizes consistent (same B)
B2 = min(sample_img.shape[0], len(sample_text), sample_audio.shape[0])
p_fused = predict_fusion(sample_img[:B2], sample_text[:B2], sample_audio[:B2])

print("Fusion probs shape:", p_fused.shape)
print("First 5 probs [happy,sad]:\n", p_fused[:5])
print("Pred labels (0=happy,1=sad):", np.argmax(p_fused, axis=1)[:20])


Fusion probs shape: (16, 2)
First 5 probs [happy,sad]:
 [[0.4746636  0.52533644]
 [0.5741978  0.42580214]
 [0.3136351  0.6863649 ]
 [0.29673654 0.70326346]
 [0.42825612 0.57174385]]
Pred labels (0=happy,1=sad): [1 0 1 1 1 1 1 1 1 1 0 1 0 0 0 1]


In [24]:
# =========================================
# Cell 14 — Save
# =========================================
img_model.save("image_emotion_happy_sad.keras")
text_model.save("text_emotion_happy_sad.keras")
audio_model.save("audio_emotion_happy_sad.keras")
print("✅ Saved all 3 models.")


✅ Saved all 3 models.
