In [1]:
# Importing Libraries
import librosa  
import numpy as np  
import os
from tqdm import tqdm
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


In [2]:
def extract_features(audio_path):
    # Loading the audio file
    data, sample_rate = librosa.load(audio_path, res_type='kaiser_fast')
    
    # Extracting features from the audio data
    zcr = np.mean(librosa.feature.zero_crossing_rate(y=data).T, axis=0)

    chroma_stft = np.mean(librosa.feature.chroma_stft(y=data, sr=sample_rate).T, axis=0)

    mfcc = np.mean(librosa.feature.mfcc(y=data, sr=sample_rate).T, axis=0)

    rms = np.mean(librosa.feature.rms(y=data).T, axis=0)

    mel_spectrogram = np.mean(librosa.feature.melspectrogram(y=data, sr=sample_rate).T, axis=0)

    spectral_contrast = np.mean(librosa.feature.spectral_contrast(y=data, sr=sample_rate).T, axis=0)

    tonnetz = np.mean(librosa.feature.tonnetz(y=data, sr=sample_rate).T, axis=0)

    spectral_rolloff = np.mean(librosa.feature.spectral_rolloff(y=data, sr=sample_rate).T, axis=0)
    
    poly_features = np.mean(librosa.feature.poly_features(y=data, sr=sample_rate).T, axis=0)

    # Horizontally stacking features
    features = np.hstack([zcr, chroma_stft, mfcc, rms, mel_spectrogram, spectral_contrast, tonnetz, spectral_rolloff, poly_features])

    # Reshape features
    features = features.reshape(-1, 1)

    return features


In [None]:
# Defining features and labels
X = []
Y = []
base_dir = os.getcwd()
dataset_dir = os.path.join(base_dir, 'filtered_dataset')
emotion_list = ['Angry', 'Disgusted', 'Fearful', 'Happy', 'Neutral', 'Sad', 'Surprised']

In [None]:
# Extracting features
total_files = sum([len(files) for r, d, files in os.walk(dataset_dir)])
pbar = tqdm(total=total_files)

for emotion in emotion_list:
    emotion_dir = os.path.join(dataset_dir, emotion)
    for file in os.listdir(emotion_dir):
        if file.endswith('.wav'):
            audio_path = os.path.join(emotion_dir, file)
            features = extract_features(audio_path)
            X.append(features)
            Y.append(emotion)
            pbar.update()

pbar.close()

In [None]:
# Saving Features and Labels
np.save('saved_features/data.npy', X)
np.save('saved_features/labels.npy', Y)

In [None]:

# Importing saved features
# X = np.load('saved_features/data.npy')
# Y = np.load('saved_features/labels.npy')

# Converting to numpy arrays
X = np.array(X)
Y = np.array(Y)

# Label Encoding
lr = LabelEncoder()
Y_enc = lr.fit_transform(Y)

# Splitting the data
X_train, X_test, Y_train, Y_test = train_test_split(X, Y_enc, test_size=0.3, random_state=48)   


In [None]:
# Initialize the scaler
scaler = StandardScaler()

# Fit the scaler on the training data and transform both the training and test data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# Checking the encoded labels
for index, label in enumerate(lr.classes_):
    print(f'{index}: {label}')

In [None]:
from tensorflow.keras.models import Sequential 
from tensorflow.keras.layers import Input, Conv1D, MaxPooling1D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import ModelCheckpoint

model = Sequential([
    Input(shape=(X_train.shape[1], 1)),
    Conv1D(32, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),

    Conv1D(64, kernel_size=3, activation='relu'),
    BatchNormalization(),
    MaxPooling1D(pool_size=2),
    Dropout(0.2),

    Flatten(),
    
    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(7, activation='softmax')
])

In [None]:
checkpoint_path = 'saved_model/model.keras'
# Compiling the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
# Defining the early stopping callback
cp_callback = ModelCheckpoint(filepath=checkpoint_path, monitor='val_loss', verbose=1, save_best_only=True, mode='min')  # Set to False to save the entire model

In [None]:

# Train the model
history = model.fit(X_train_scaled, Y_train, validation_data=(X_test_scaled, Y_test), epochs=100, batch_size=32, callbacks=[cp_callback])

In [None]:
# Evaluating the model
model.evaluate(X_test, Y_test)

In [None]:
# Visualizing the training history
import matplotlib.pyplot as plt

# Create a figure with two subplots
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(6, 4))

# Plot training loss and validation loss
ax1.plot(history.history['loss'], label='Training Loss')
ax1.plot(history.history['val_loss'], label='Validation Loss')
ax1.set_xlabel('Epochs')
ax1.set_ylabel('Loss')
ax1.set_title('Training and Validation Loss')
ax1.legend()

# Plot training accuracy and validation accuracy
ax2.plot(history.history['accuracy'], label='Training Accuracy')
ax2.plot(history.history['val_accuracy'], label='Validation Accuracy')
ax2.set_xlabel('Epochs')
ax2.set_ylabel('Accuracy')
ax2.set_title('Training and Validation Accuracy')
ax2.legend()

# Adjust spacing between subplots
plt.tight_layout()

# Display the plot
plt.show()

In [None]:
# Saving the model
model.save('saved_model/model.keras')