# 🎧 ResNet152 Audio Emotion Classification

This notebook trains a CNN model using **ResNet152** on **audio spectrograms** for emotion classification.

In [None]:
# 📦 Step 1: Install required libraries
!pip install librosa matplotlib scikit-learn tensorflow

In [None]:
# 📚 Step 2: Import libraries
import os
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.applications import ResNet152
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from PIL import Image

In [None]:
# ⚙️ Step 3: Configuration
AUDIO_DIR = "/content/audio_data"  # Upload your dataset here
SAMPLE_RATE = 22050
DURATION = 2
N_MELS = 128
IMG_SIZE = 224
CLASSES = ['Sad', 'Happy', 'Stress', 'Restless', 'Love', 'Lonely', 'Tired', 'Normal']
CLASS_TO_IDX = {label: idx for idx, label in enumerate(CLASSES)}

In [None]:
# 🎼 Step 4: Convert audio to spectrogram
def audio_to_spectrogram(file_path):
    y, sr = librosa.load(file_path, duration=DURATION, sr=SAMPLE_RATE)
    mel = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=N_MELS)
    mel_db = librosa.power_to_db(mel, ref=np.max)
    img = librosa.util.normalize(mel_db)
    img = Image.fromarray(np.uint8(plt.cm.viridis(img) * 255))
    img = img.resize((IMG_SIZE, IMG_SIZE)).convert("RGB")
    return np.array(img)

In [None]:
# 📁 Step 5: Load dataset into X, y
X, y = [], []

for label in CLASSES:
    folder = os.path.join(AUDIO_DIR, label)
    for file in os.listdir(folder):
        try:
            img = audio_to_spectrogram(os.path.join(folder, file))
            X.append(img)
            y.append(CLASS_TO_IDX[label])
        except Exception as e:
            print(f"Skipped {file} due to {e}")

X = np.array(X) / 255.0
y = to_categorical(y, num_classes=len(CLASSES))

In [None]:
# ✂️ Step 6: Split data
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)

In [None]:
# 🧠 Step 7: Create model using ResNet152
base_model = ResNet152(weights='imagenet', include_top=False, input_shape=(IMG_SIZE, IMG_SIZE, 3))
base_model.trainable = False

x = GlobalAveragePooling2D()(base_model.output)
x = Dropout(0.3)(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.3)(x)
output = Dense(len(CLASSES), activation='softmax')(x)

model = Model(inputs=base_model.input, outputs=output)
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

In [None]:
# 🏋️‍♂️ Step 8: Train model
datagen = ImageDataGenerator(horizontal_flip=True)
history = model.fit(datagen.flow(X_train, y_train, batch_size=16),
                    validation_data=(X_val, y_val),
                    epochs=15)

In [None]:
# 💾 Step 9: Save model
model.save("resnet_audio_emotion_model.h5")