In [1]:
!pip -q install kaggle opencv-python gradio scikit-learn

In [2]:
import os
os.makedirs("/root/.kaggle", exist_ok=True)
!cp kaggle.json /root/.kaggle/
!chmod 600 /root/.kaggle/kaggle.json
print("Kaggle configured ✅")

Kaggle configured ✅


In [3]:
!kaggle datasets download -d shuvoalok/raf-db-dataset -p /content --force
!unzip -q /content/raf-db-dataset.zip -d /content/raf_db

print("Unzipped ✅")
!ls -la /content/raf_db | head -n 50

Dataset URL: https://www.kaggle.com/datasets/shuvoalok/raf-db-dataset
License(s): other
Downloading raf-db-dataset.zip to /content
  0% 0.00/37.7M [00:00<?, ?B/s]
100% 37.7M/37.7M [00:00<00:00, 1.29GB/s]
Unzipped ✅
total 412
drwxr-xr-x 3 root root   4096 Jan  8 17:00 .
drwxr-xr-x 1 root root   4096 Jan  8 16:59 ..
drwxr-xr-x 4 root root   4096 Jan  8 17:00 DATASET
-rw-r--r-- 1 root root  76713 Sep 20  2023 test_labels.csv
-rw-r--r-- 1 root root 331330 Sep 20  2023 train_labels.csv


In [4]:
from pathlib import Path

SRC = Path("/content/raf_db/DATASET")
TRAIN_ROOT = SRC / "train"
TEST_ROOT  = SRC / "test"

assert TRAIN_ROOT.exists(), f"Missing: {TRAIN_ROOT}"
assert TEST_ROOT.exists(),  f"Missing: {TEST_ROOT}"

print("SRC       :", SRC)
print("TRAIN_ROOT:", TRAIN_ROOT)
print("TEST_ROOT :", TEST_ROOT)

print("\nTrain folders:", [p.name for p in TRAIN_ROOT.iterdir() if p.is_dir()])
print("Test  folders:", [p.name for p in TEST_ROOT.iterdir() if p.is_dir()])

SRC       : /content/raf_db/DATASET
TRAIN_ROOT: /content/raf_db/DATASET/train
TEST_ROOT : /content/raf_db/DATASET/test

Train folders: ['6', '3', '5', '4', '1', '2', '7']
Test  folders: ['6', '3', '5', '4', '1', '2', '7']


In [5]:
import shutil, random
from pathlib import Path

random.seed(42)

OUT = Path("/content/dataset_3class")

# Clean output
if OUT.exists():
    shutil.rmtree(OUT)

for split in ["train", "val", "test"]:
    for cls in ["happy", "sad", "neutral"]:
        (OUT / split / cls).mkdir(parents=True, exist_ok=True)

img_exts = {".jpg", ".jpeg", ".png", ".bmp"}

def list_images(folder: Path):
    return [p for p in folder.glob("*") if p.is_file() and p.suffix.lower() in img_exts]

# RAF numeric label folders:
# 4 = happy, 5 = sad, 7 = neutral
FOLDER_TO_CLASS = {"4": "happy", "5": "sad", "7": "neutral"}

def copy_split_numeric(split_root: Path, split_name: str):
    found_dirs = [p.name for p in split_root.iterdir() if p.is_dir()]
    print(f"{split_name} folders found:", found_dirs)

    counts = {"happy": 0, "sad": 0, "neutral": 0}

    for folder_name, cls in FOLDER_TO_CLASS.items():
        src_folder = split_root / folder_name
        if not src_folder.exists():
            raise RuntimeError(f"Missing folder {src_folder}. Found: {found_dirs}")

        imgs = list_images(src_folder)
        counts[cls] = len(imgs)

        for img in imgs:
            dst = OUT / split_name / cls / img.name
            shutil.copy2(img, dst)

    return counts

train_counts = copy_split_numeric(TRAIN_ROOT, "train")
test_counts  = copy_split_numeric(TEST_ROOT,  "test")

print("✅ Copied numeric folders")
print("Train counts:", train_counts)
print("Test counts :", test_counts)

# Create validation split from train (15% per class)
for cls in ["happy", "sad", "neutral"]:
    train_cls = OUT / "train" / cls
    val_cls   = OUT / "val" / cls

    imgs = [p for p in train_cls.glob("*") if p.is_file()]
    random.shuffle(imgs)

    take = max(1, int(0.15 * len(imgs)))
    for p in imgs[:take]:
        p.rename(val_cls / p.name)

print("\n✅ Final dataset sizes:")
for split in ["train", "val", "test"]:
    print(split, {cls: len(list((OUT/split/cls).glob("*"))) for cls in ["happy","sad","neutral"]})

train folders found: ['6', '3', '5', '4', '1', '2', '7']
test folders found: ['6', '3', '5', '4', '1', '2', '7']
✅ Copied numeric folders
Train counts: {'happy': 4772, 'sad': 1982, 'neutral': 2524}
Test counts : {'happy': 1185, 'sad': 478, 'neutral': 680}

✅ Final dataset sizes:
train {'happy': 4057, 'sad': 1685, 'neutral': 2146}
val {'happy': 715, 'sad': 297, 'neutral': 378}
test {'happy': 1185, 'sad': 478, 'neutral': 680}


In [6]:
import random
random.seed(42)

def balance_train():
    split_folder = OUT / "train"
    files = {cls: [p for p in (split_folder/cls).glob("*") if p.is_file()]
             for cls in ["happy","sad","neutral"]}
    counts0 = {k: len(v) for k, v in files.items()}
    m = min(counts0.values())
    print("Before balancing:", counts0, "-> target:", m)

    for cls, lst in files.items():
        if len(lst) > m:
            for p in lst[m:]:
                p.unlink()

    counts1 = {cls: len(list((split_folder/cls).glob("*"))) for cls in ["happy","sad","neutral"]}
    print("After balancing:", counts1)

balance_train()

Before balancing: {'happy': 4057, 'sad': 1685, 'neutral': 2146} -> target: 1685
After balancing: {'happy': 1685, 'sad': 1685, 'neutral': 1685}


In [7]:
import tensorflow as tf

SEED = 42
IMG_SIZE = (48, 48)
BATCH = 64
AUTOTUNE = tf.data.AUTOTUNE

CLASS_NAMES = ["happy", "sad", "neutral"]

train_ds = tf.keras.utils.image_dataset_from_directory(
    OUT / "train",
    labels="inferred",
    label_mode="int",
    class_names=CLASS_NAMES,
    color_mode="grayscale",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=True,
    seed=SEED,
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    OUT / "val",
    labels="inferred",
    label_mode="int",
    class_names=CLASS_NAMES,
    color_mode="grayscale",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=False,
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    OUT / "test",
    labels="inferred",
    label_mode="int",
    class_names=CLASS_NAMES,
    color_mode="grayscale",
    image_size=IMG_SIZE,
    batch_size=BATCH,
    shuffle=False,
)

def normalize(x, y):
    x = tf.cast(x, tf.float32) / 255.0
    return x, y

train_ds = train_ds.map(normalize, num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
val_ds   = val_ds.map(normalize,   num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)
test_ds  = test_ds.map(normalize,  num_parallel_calls=AUTOTUNE).prefetch(AUTOTUNE)

print("Pipelines ready ✅")

Found 5055 files belonging to 3 classes.
Found 1390 files belonging to 3 classes.
Found 2343 files belonging to 3 classes.
Pipelines ready ✅


In [8]:
from tensorflow import keras
from tensorflow.keras import layers

data_aug = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.05),
    layers.RandomZoom(0.1),
], name="data_aug")

cnn = keras.Sequential([
    layers.Input(shape=(IMG_SIZE[0], IMG_SIZE[1], 1)),
    data_aug,

    layers.Conv2D(32, 3, padding="same"), layers.BatchNormalization(), layers.ReLU(),
    layers.MaxPool2D(),

    layers.Conv2D(64, 3, padding="same"), layers.BatchNormalization(), layers.ReLU(),
    layers.MaxPool2D(),

    layers.Conv2D(128, 3, padding="same"), layers.BatchNormalization(), layers.ReLU(),
    layers.MaxPool2D(),

    layers.Flatten(),
    layers.Dense(256, activation="relu", name="embedding"),
    layers.Dropout(0.4),

    layers.Dense(3, activation="softmax", name="emotion_head"),
], name="cnn_emotion")

cnn.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

cnn.summary()

In [9]:
import os
from tensorflow import keras

SAVE_DIR = "/content/raf3_emotion_project"
os.makedirs(SAVE_DIR, exist_ok=True)
BEST_CNN_PATH = os.path.join(SAVE_DIR, "best_cnn.keras")

callbacks = [
    keras.callbacks.ModelCheckpoint(BEST_CNN_PATH, monitor="val_accuracy", save_best_only=True, verbose=1),
    keras.callbacks.EarlyStopping(monitor="val_accuracy", patience=5, restore_best_weights=True, verbose=1),
    keras.callbacks.ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=2, verbose=1),
]

history = cnn.fit(train_ds, validation_data=val_ds, epochs=30, callbacks=callbacks)
print("Saved best CNN locally ✅:", BEST_CNN_PATH)

Epoch 1/30
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 53ms/step - accuracy: 0.3865 - loss: 2.6359
Epoch 1: val_accuracy improved from -inf to 0.51439, saving model to /content/raf3_emotion_project/best_cnn.keras
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 68ms/step - accuracy: 0.3868 - loss: 2.6223 - val_accuracy: 0.5144 - val_loss: 1.0281 - learning_rate: 0.0010
Epoch 2/30
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 27ms/step - accuracy: 0.4763 - loss: 1.0259
Epoch 2: val_accuracy did not improve from 0.51439
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step - accuracy: 0.4764 - loss: 1.0258 - val_accuracy: 0.5144 - val_loss: 1.0129 - learning_rate: 0.0010
Epoch 3/30
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.5261 - loss: 0.9530
Epoch 3: val_accuracy improved from 0.51439 to 0.51511, saving model to /content/raf3_emotion_project/best_cnn.keras
[1m79/79[0

In [10]:
import numpy as np
from tensorflow import keras
from sklearn.metrics import classification_report, confusion_matrix

cnn_best = keras.models.load_model(BEST_CNN_PATH)

loss, acc = cnn_best.evaluate(test_ds, verbose=0)
print("Test accuracy:", acc)

y_true, y_pred = [], []
for xb, yb in test_ds:
    probs = cnn_best.predict(xb, verbose=0)
    y_true.extend(yb.numpy().tolist())
    y_pred.extend(np.argmax(probs, axis=1).tolist())

print(classification_report(y_true, y_pred, target_names=CLASS_NAMES))
print("Confusion matrix:\n", confusion_matrix(y_true, y_pred))

Test accuracy: 0.7959880232810974
              precision    recall  f1-score   support

       happy       0.94      0.84      0.89      1185
         sad       0.60      0.75      0.67       478
     neutral       0.75      0.75      0.75       680

    accuracy                           0.80      2343
   macro avg       0.76      0.78      0.77      2343
weighted avg       0.81      0.80      0.80      2343

Confusion matrix:
 [[995  95  95]
 [ 40 359  79]
 [ 26 143 511]]


In [11]:
import numpy as np
import itertools

rng = np.random.default_rng(42)

def build_caption_bank(n_per_emotion=120, seed=42):
    rng = np.random.default_rng(seed)

    # building blocks (phrases)
    starters = [
        "The face", "This person", "The expression", "The subject", "The individual"
    ]

    verbs = [
        "shows", "displays", "reveals", "has", "presents"
    ]

    # emotion-specific phrase pools
    emotion_core = {
        0: ["happiness", "joy", "a happy emotion", "a cheerful mood", "positive emotion"],
        1: ["sadness", "a sad emotion", "a down mood", "negative emotion", "melancholy"],
        2: ["a neutral emotion", "no strong emotion", "a calm mood", "a composed look", "a neutral state"]
    }

    # emotion-specific facial cues
    cues = {
        0: [
            "with a clear smile", "with a bright smile", "with lifted cheeks",
            "with relaxed eyes", "with a warm smile", "with an upbeat look",
            "with smiling eyes", "with an open, friendly look"
        ],
        1: [
            "with a downturned mouth", "with heavy eyes", "with a tired gaze",
            "with lowered lips", "with reduced facial energy", "with a tense look",
            "with a pained expression", "with a strained mouth shape"
        ],
        2: [
            "with relaxed facial muscles", "with a steady gaze", "with a calm look",
            "with a composed face", "with a neutral mouth", "with balanced features",
            "with no strong cues", "with a relaxed expression"
        ]
    }

    # optional ending / extra detail
    endings = [
        "", ".", " overall.", " clearly.", " in the face.", " in the eyes and mouth."
    ]

    bank = {0: [], 1: [], 2: []}

    for emo in [0, 1, 2]:
        # create many combinations then sample
        combos = []
        for s, v, ec, c, e in itertools.product(starters, verbs, emotion_core[emo], cues[emo], endings):
            # Example: "The face shows happiness with a bright smile overall."
            combos.append(f"{s} {v} {ec} {c}{e}".replace("..", ".").strip())

        # remove duplicates while preserving order
        seen = set()
        unique = []
        for x in combos:
            x_norm = x.lower()
            if x_norm not in seen:
                seen.add(x_norm)
                unique.append(x)

        # sample n_per_emotion captions
        if len(unique) < n_per_emotion:
            chosen = unique
        else:
            idx = rng.choice(len(unique), size=n_per_emotion, replace=False)
            chosen = [unique[i] for i in idx]

        bank[emo] = chosen

    return bank

caption_bank = build_caption_bank(n_per_emotion=150, seed=42)

def make_caption(label_id):
    opts = caption_bank[int(label_id)]
    return opts[int(rng.integers(0, len(opts)))]

print("Caption counts:", {k: len(v) for k, v in caption_bank.items()})
print("Sample happy captions:", caption_bank[0][:5])

Caption counts: {0: 150, 1: 150, 2: 150}
Sample happy captions: ['This person presents happiness with a bright smile.', 'The expression has joy with smiling eyes overall.', 'The expression has positive emotion with an upbeat look in the face.', 'The subject presents joy with an open, friendly look in the face.', 'The subject shows a happy emotion with smiling eyes in the eyes and mouth.']


In [12]:
train_labels = []
for _, yb in train_ds:
    train_labels.extend(yb.numpy().tolist())

val_labels = []
for _, yb in val_ds:
    val_labels.extend(yb.numpy().tolist())

train_captions = [make_caption(y) for y in train_labels]
val_captions   = [make_caption(y) for y in val_labels]

print("Example captions:", train_captions[:3])

Example captions: ['The individual reveals joy with an upbeat look', 'The subject shows a composed look with a neutral mouth.', 'The individual reveals a calm mood with a neutral mouth']


In [13]:
from tensorflow import keras
from tensorflow.keras import layers
import os, json

MAX_TOKENS = 2000
SEQ_LEN = 24
EMB_DIM = 128

vectorizer = layers.TextVectorization(
    max_tokens=MAX_TOKENS,
    output_mode="int",
    output_sequence_length=SEQ_LEN
)
vectorizer.adapt(train_captions)
vocab_size = len(vectorizer.get_vocabulary())
print("Vocab size:", vocab_size)

def make_lm_arrays(captions, labels):
    x = vectorizer(tf.constant(captions))   # [N, SEQ_LEN]
    inp = x[:, :-1]                          # [N, T]
    tgt = x[:, 1:]                           # [N, T]
    emo = np.array(labels, dtype=np.int32)   # [N]
    return inp.numpy(), tgt.numpy(), emo

xtr, ytr, etr = make_lm_arrays(train_captions, train_labels)
xva, yva, eva = make_lm_arrays(val_captions,   val_labels)

BATCH_LM = 64
train_lm = tf.data.Dataset.from_tensor_slices(((xtr, etr), ytr)).shuffle(2000, seed=42).batch(BATCH_LM)
val_lm   = tf.data.Dataset.from_tensor_slices(((xva, eva), yva)).batch(BATCH_LM)

token_in = layers.Input(shape=(SEQ_LEN-1,), dtype=tf.int32, name="token_in")
emo_in   = layers.Input(shape=(), dtype=tf.int32, name="emo_in")

tok_emb = layers.Embedding(vocab_size, EMB_DIM, name="tok_emb")(token_in)
emo_emb = layers.Embedding(3, EMB_DIM, name="emo_emb")(emo_in)
emo_rep = layers.RepeatVector(SEQ_LEN-1)(emo_emb)

x = layers.Add()([tok_emb, emo_rep])
x = layers.GRU(256, return_sequences=True)(x)
x = layers.Dropout(0.2)(x)
logits = layers.Dense(vocab_size)(x)

decoder = keras.Model([token_in, emo_in], logits, name="emotion_text_decoder")
decoder.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True),
    metrics=[keras.metrics.SparseCategoricalAccuracy()]
)
decoder.summary()

history_dec = decoder.fit(train_lm, validation_data=val_lm, epochs=10)

Vocab size: 71


Epoch 1/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 13ms/step - loss: 2.3158 - sparse_categorical_accuracy: 0.5506 - val_loss: 0.8897 - val_sparse_categorical_accuracy: 0.7624
Epoch 2/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.6616 - sparse_categorical_accuracy: 0.7958 - val_loss: 0.3766 - val_sparse_categorical_accuracy: 0.8352
Epoch 3/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.3794 - sparse_categorical_accuracy: 0.8327 - val_loss: 0.3534 - val_sparse_categorical_accuracy: 0.8388
Epoch 4/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.3601 - sparse_categorical_accuracy: 0.8362 - val_loss: 0.3462 - val_sparse_categorical_accuracy: 0.8400
Epoch 5/10
[1m79/79[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.3512 - sparse_categorical_accuracy: 0.8391 - val_loss: 0.3438 - val_sparse_categorical_accuracy: 0.8435
Epoch 6/1

In [14]:
import os, json

DECODER_DIR = os.path.join(SAVE_DIR, "decoder_text")
os.makedirs(DECODER_DIR, exist_ok=True)

decoder_path = os.path.join(DECODER_DIR, "decoder.keras")
decoder.save(decoder_path)

vocab = vectorizer.get_vocabulary()
with open(os.path.join(DECODER_DIR, "vocab.json"), "w") as f:
    json.dump(vocab, f)

print("Saved decoder ✅:", decoder_path)
print("Saved vocab ✅:", os.path.join(DECODER_DIR, "vocab.json"))


Saved decoder ✅: /content/raf3_emotion_project/decoder_text/decoder.keras
Saved vocab ✅: /content/raf3_emotion_project/decoder_text/vocab.json


In [15]:
def generate_description(emotion_id, temperature=0.8):
    vocab = vectorizer.get_vocabulary()
    word_to_id = {w: i for i, w in enumerate(vocab)}

    # seed with a real token (not PAD=0)
    start_id = word_to_id.get("the", None)
    if start_id is None or start_id == 0:
        start_id = 1  # fallback (usually [UNK])

    cur = np.zeros((1, SEQ_LEN-1), dtype=np.int32)
    cur[0, 0] = start_id

    for t in range(1, SEQ_LEN-1):
        logits = decoder.predict([cur, np.array([emotion_id], dtype=np.int32)], verbose=0)
        step_logits = logits[0, t-1] / max(temperature, 1e-6)
        probs = tf.nn.softmax(step_logits).numpy()
        next_id = int(np.random.choice(len(probs), p=probs))
        cur[0, t] = next_id

    words = [vocab[i] for i in cur[0] if i != 0]  # remove PAD
    return " ".join(words).strip()


In [16]:
import shutil
from google.colab import files

SAVE_DIR = "/content/raf3_emotion_project"

zip_path = shutil.make_archive("/content/raf3_emotion_project_backup", "zip", SAVE_DIR)
files.download(zip_path)

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [17]:
import gradio as gr
import numpy as np
import cv2
import os, random

# Emotion -> YouTube videos
YOUTUBE = {
    "happy": [
        "https://www.youtube.com/embed/JnCTOi2QAZk?autoplay=1",
        "https://www.youtube.com/embed/w7JBHSx5ScA?autoplay=1",
        "https://www.youtube.com/embed/KZYqugtbcG0?autoplay=1",
        "https://www.youtube.com/embed/T-LAJ0Y7lsw?autoplay=1",
        "https://www.youtube.com/embed/Tl1WS_PvsF4?autoplay=1",
    ],
    "sad": [
        "https://www.youtube.com/embed/DNZzquw45-k?autoplay=1",
        "https://www.youtube.com/embed/v1rrjiOxiEY?autoplay=1",
        "https://www.youtube.com/embed/cvOgOjRZzr8?autoplay=1",
        "https://www.youtube.com/embed/sfCy1JhCwWg?autoplay=1",
        "https://www.youtube.com/embed/ou-7AboPoXE?autoplay=1",
    ],
    "neutral": [
        "https://www.youtube.com/embed/h--ykVNBUYQ?autoplay=1",
        "https://www.youtube.com/embed/QHC-9PGT5ZQ?autoplay=1",
        "https://www.youtube.com/embed/-HlEAEqps9c?autoplay=1",
        "https://www.youtube.com/embed/_RHIECWv728?autoplay=1",
        "https://www.youtube.com/embed/Im74ME1192E?autoplay=1",
    ],
}

def youtube_iframe(emotion_name: str):
    urls = YOUTUBE.get(emotion_name, [])
    if not urls:
        return "<p>No recommendations for this emotion.</p>", ""
    url = random.choice(urls)
    html = f"""
    <div style="width:100%; max-width:760px;">
      <iframe width="100%" height="420"
        src="{url}"
        title="YouTube video player"
        frameborder="0"
        allow="autoplay; encrypted-media"
        allowfullscreen>
      </iframe>
    </div>
    """
    return html, url

# Face crop (Haar cascade)
CASCADE_PATH = "/content/haarcascade_frontalface_default.xml"
if not os.path.exists(CASCADE_PATH):
    !wget -q https://raw.githubusercontent.com/opencv/opencv/master/data/haarcascades/haarcascade_frontalface_default.xml -O /content/haarcascade_frontalface_default.xml

face_cascade = cv2.CascadeClassifier(CASCADE_PATH)

def crop_largest_face(gray_img):
    faces = face_cascade.detectMultiScale(
        gray_img,
        scaleFactor=1.1,
        minNeighbors=5,
        minSize=(40, 40)
    )
    if len(faces) == 0:
        return None, False

    x, y, w, h = max(faces, key=lambda b: b[2] * b[3])
    pad = int(0.15 * max(w, h))
    x1 = max(0, x - pad)
    y1 = max(0, y - pad)
    x2 = min(gray_img.shape[1], x + w + pad)
    y2 = min(gray_img.shape[0], y + h + pad)

    return gray_img[y1:y2, x1:x2], True

def preprocess_pil_for_cnn(pil_img):
    img = np.array(pil_img)  # RGB
    gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)

    face, used_crop = crop_largest_face(gray)
    if face is None:
        face = gray
        used_crop = False

    face = cv2.resize(face, IMG_SIZE)
    x = face.astype(np.float32) / 255.0
    x = np.expand_dims(x, axis=-1)
    x = np.expand_dims(x, axis=0)
    return x, used_crop

# Main inference for Gradio
def predict_emotion_and_text(pil_img):
    if pil_img is None:
        return "Please upload an image.", "", "", "No image.", "<p></p>", ""

    x, used_crop = preprocess_pil_for_cnn(pil_img)
    probs = cnn_best.predict(x, verbose=0)[0]
    idx = int(np.argmax(probs))
    emotion = CLASS_NAMES[idx]

    desc = generate_description(idx, temperature=0.8)
    prob_text = "\n".join([f"{CLASS_NAMES[i]}: {probs[i]:.4f}" for i in range(len(CLASS_NAMES))])

    info = "Face crop used ✅ (largest face)." if used_crop else "No face detected → used full image."

    # YouTube recommendation
    yt_html, yt_url = youtube_iframe(emotion)

    return emotion, prob_text, desc, info, yt_html, yt_url

demo = gr.Interface(
    fn=predict_emotion_and_text,
    inputs=gr.Image(type="pil", label="Upload a face image"),
    outputs=[
        gr.Textbox(label="Predicted Emotion"),
        gr.Textbox(label="Probabilities"),
        gr.Textbox(label="Generated Emotion Description (Decoder-only)"),
        gr.Textbox(label="Info"),
        gr.HTML(label="Recommended Song (YouTube)"),
        gr.Textbox(label="Chosen YouTube URL"),
    ],
    title="RAF-DB (3 classes) — Face Crop + Emotion + Description + Song",
    description="Upload → face crop → CNN emotion (happy/sad/neutral) → decoder description → YouTube song recommendation."
)

demo.launch(share=True, debug=False)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://d76d29b9982cce3b91.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


