In [None]:
import numpy as np
import pandas as pd
import librosa
import os
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.utils import to_categorical

# Load data
data_dir = '../data/audio/'
X, y = [], []
labels = {"happy":0, "sad":1, "angry":2, "neutral":3}

for label in labels:
    folder = os.path.join(data_dir, label)
    for file in os.listdir(folder):
        path = os.path.join(folder, file)
        y_audio, sr = librosa.load(path, duration=3, offset=0.5)
        mfcc = librosa.feature.mfcc(y=y_audio, sr=sr, n_mfcc=40)
        X.append(np.mean(mfcc.T, axis=0))
        y.append(labels[label])

X, y = np.array(X), to_categorical(y, num_classes=4)

# Model
model = Sequential([
    Dense(256, activation='relu', input_shape=(40,)),
    Dense(128, activation='relu'),
    Dense(4, activation='softmax')
])
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train
model.fit(X, y, epochs=20, batch_size=16)

# Save
model.save("../models/audio_model.h5")
