In [1]:
# -------------------------
# STEP 0: Import Libraries
# -------------------------
import os
import numpy as np
np.complex = complex  # Fix for librosa compatibility
import librosa
import random
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tqdm import tqdm
from moviepy.editor import VideoFileClip
import soundfile as sf
from sklearn.utils.class_weight import compute_class_weight
from sklearn.metrics import classification_report

# -------------------------
# STEP 1: Define Directories
# -------------------------
SPEECH_DIR = r"C:\Users\shubh\Downloads\Audio_Speech_Actors_01-24"
AUDIO_SONG_DIR = r"C:\Users\shubh\Downloads\Audio_Song_Actor_01-24"
VIDEO_SPEECH_DIRS = [
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_01",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_02",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_03",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_04",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_05",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_06",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_07",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_08",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_09",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_10",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_11",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_12",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_13",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_14",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_15",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_16",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_17",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_18",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_19",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_20",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_21",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_22",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_23",
    r"C:\Users\shubh\Downloads\Video_Speech_Actor_24"
]
VIDEO_SONG_DIRS = [
    r"C:\Users\shubh\Downloads\Video_Song_Actor_01",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_02",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_03",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_04",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_05",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_06",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_07",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_08",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_09",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_10",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_11",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_12",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_13",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_14",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_15",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_16",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_17",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_19",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_20",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_21",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_22",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_23",
    r"C:\Users\shubh\Downloads\Video_Song_Actor_24"
]

ALL_AUDIO_PATHS = [SPEECH_DIR, AUDIO_SONG_DIR] + VIDEO_SPEECH_DIRS + VIDEO_SONG_DIRS
TEMP_AUDIO_PATH = "temp_audio.wav"

# -------------------------
# STEP 2: Data Augmentation Functions
# -------------------------
def add_noise(data, noise_factor=0.005):
    noise = np.random.randn(len(data))
    return data + noise_factor * noise

def shift(data, shift_max=0.2, sr=22050):
    shift = np.random.randint(sr * shift_max)
    return np.roll(data, shift)

def stretch(data, rate=1.1):
    try:
        return librosa.effects.time_stretch(data, rate)
    except:
        return data

def pitch_shift(data, sr, n_steps=2):
    try:
        return librosa.effects.pitch_shift(data, sr, n_steps)
    except:
        return data

# -------------------------
# STEP 3: Extract Emotion from Filename
# -------------------------
def extract_emotion(filename):
    emotion_map = {
        '01': 'neutral', '02': 'calm', '03': 'happy', '04': 'sad',
        '05': 'angry', '06': 'fearful', '07': 'disgust', '08': 'surprised'
    }
    try:
        parts = os.path.basename(filename).split("-")
        if len(parts) >= 3:
            return emotion_map.get(parts[2], "unknown")
    except:
        pass
    return "unknown"

# -------------------------
# STEP 4: Extract Audio Features with Augmentation
# -------------------------
def extract_features(file_path, max_pad_len=174, augment=False):
    try:
        if file_path.endswith(".mp4"):
            video = VideoFileClip(file_path)
            audio = video.audio
            audio.write_audiofile(TEMP_AUDIO_PATH, verbose=False, logger=None)
            x, sr = librosa.load(TEMP_AUDIO_PATH, sr=None)
        else:
            x, sr = librosa.load(file_path, sr=None)
        x = librosa.util.fix_length(x, size=sr*3)

        # Apply augmentation randomly
        if augment:
            aug_choice = np.random.choice(['none', 'noise', 'shift', 'stretch', 'pitch'])
            if aug_choice == 'noise':
                x = add_noise(x)
            elif aug_choice == 'shift':
                x = shift(x, sr=sr)
            elif aug_choice == 'stretch':
                x = stretch(x, rate=np.random.uniform(0.8, 1.2))
            elif aug_choice == 'pitch':
                x = pitch_shift(x, sr=sr, n_steps=np.random.randint(-2, 3))

        mfcc = librosa.feature.mfcc(y=x, sr=sr, n_mfcc=40)
        if mfcc.shape[1] < max_pad_len:
            pad_width = max_pad_len - mfcc.shape[1]
            mfcc = np.pad(mfcc, pad_width=((0,0),(0,pad_width)), mode='constant')
        else:
            mfcc = mfcc[:, :max_pad_len]
        return mfcc
    except Exception as e:
        return None

# -------------------------
# STEP 5: Load All Data with Augmentation
# -------------------------
X, y = [], []
for path in ALL_AUDIO_PATHS:
    for root, dirs, files in os.walk(path):
        for file in tqdm(files):
            if file.endswith((".wav", ".mp4")):
                full_path = os.path.join(root, file)
                emotion = extract_emotion(file)
                # Original features
                features = extract_features(full_path, augment=False)
                if features is not None and emotion != "unknown":
                    X.append(features)
                    y.append(emotion)
                # Augmented features
                features_aug = extract_features(full_path, augment=True)
                if features_aug is not None and emotion != "unknown":
                    X.append(features_aug)
                    y.append(emotion)

X = np.array(X)
y = np.array(y)
X = np.transpose(X, (0, 2, 1))  # shape: (num_samples, 174, 40)

# -------------------------
# STEP 6: Preprocess Labels
# -------------------------
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

# -------------------------
# STEP 7: Train-Test Split
# -------------------------
SEED = 42
X_train, X_val, y_train, y_val = train_test_split(X, y_cat, test_size=0.2, stratify=y_cat, random_state=SEED)

# -------------------------
# STEP 8: Build 1D CNN Model
# -------------------------
model = Sequential()
model.add(Conv1D(128, 5, activation='relu', input_shape=(X.shape[1], X.shape[2])))
model.add(BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Dropout(0.3))
model.add(Conv1D(64, 5, activation='relu'))
model.add(BatchNormalization())
model.add(MaxPooling1D(2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(y_cat.shape[1], activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# -------------------------
# STEP 9: Handle Class Imbalance
# -------------------------
class_weights = compute_class_weight('balanced', classes=np.unique(y_encoded), y=y_encoded)
class_weights = dict(enumerate(class_weights))

# -------------------------
# STEP 10: Train Model
# -------------------------
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=32,
    verbose=1,
    class_weight=class_weights
)

# -------------------------
# STEP 11: Evaluate Model
# -------------------------
scores = model.evaluate(X_val, y_val, verbose=0)
print(f"\n✅ Final Accuracy on Validation Data: {scores[1]*100:.2f}%")

# Additional metrics
y_pred = model.predict(X_val)
print(classification_report(np.argmax(y_val, axis=1), np.argmax(y_pred, axis=1), target_names=le.classes_))

  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "
0it [00:00, ?it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:06<00:00,  9.23it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 27.32it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 26.49it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 24.70it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 24.81it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 24.27it/s]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [00:02<00:00, 25.93it/s]
100%|████████████████████████████████████████████████████████████

Epoch 1/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 12ms/step - accuracy: 0.1530 - loss: 2.3086 - val_accuracy: 0.2037 - val_loss: 2.0200
Epoch 2/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.1764 - loss: 1.9517 - val_accuracy: 0.1950 - val_loss: 1.9731
Epoch 3/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 11ms/step - accuracy: 0.2143 - loss: 1.8924 - val_accuracy: 0.2577 - val_loss: 1.8324
Epoch 4/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.2707 - loss: 1.8023 - val_accuracy: 0.2931 - val_loss: 1.7511
Epoch 5/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.2810 - loss: 1.7770 - val_accuracy: 0.3270 - val_loss: 1.6774
Epoch 6/50
[1m318/318[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 11ms/step - accuracy: 0.3147 - loss: 1.6915 - val_accuracy: 0.3290 - val_loss: 1.7001
Epoch 7/50
[1m318/318

In [1]:
model.save("model.h5")


NameError: name 'model' is not defined

In [3]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Create and train your model
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(40,)))
model.add(Dense(8, activation='softmax'))

# (Assume you trained the model here...)
# model.fit(X_train, y_train, ...)

# ✅ Now save it
model.save("model.h5")


  _warn(("h5py is running against HDF5 {0} when it was built against {1}, "
  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
