In [1]:
# import os
# import numpy as np
# import librosa
# from sklearn.model_selection import train_test_split
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Dropout, TimeDistributed, Flatten
# from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
# from tensorflow.keras.optimizers import Adam
# from google.colab import drive
# from tensorflow.keras.utils import to_categorical

# # Set the paths to the data files
# drive.mount('/content/drive')

# angry_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Angry'
# calm_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Calm'
# neutral_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Neutral'
# happy_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Happy'
# sad_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Sad'

# # Define the number of MFCCs (Mel Frequency Cepstral Coefficients) to extract from each audio file
# num_mfcc = 40

# # Define a fixed length for each feature matrix
# max_len = 500

# # Define a function to extract the MFCCs from an audio file
# def extract_features(file_path):
#     y, sr = librosa.load(file_path, sr=None)
#     mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=num_mfcc)
#     return mfccs

# # Load the data into memory
# X = []
# y = []
# for path, emotion in [(angry_path, 0), (calm_path, 1), (neutral_path, 2), (happy_path, 3), (sad_path, 4)]:
#     for file in os.listdir(path):
#         file_path = os.path.join(path, file)
#         mfccs = extract_features(file_path)
#         if len(mfccs.T) <= max_len:
#             pad_width = max_len - len(mfccs.T)
#             mfccs = np.pad(mfccs.T, pad_width=((0, pad_width), (0, 0)), mode='constant')
#             X.append(mfccs)
#             y.append(emotion)

# X = np.array(X)
# y = np.array(y)

# # Convert the target labels to one-hot encoded vectors
# y = to_categorical(y)

# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Reshape the data to fit the RNN input shape
# X_train = np.expand_dims(X_train, axis=-1)
# X_test = np.expand_dims(X_test, axis=-1)

# # Define the model architecture
# model = Sequential()
# model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(max_len, num_mfcc, 1)))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
# model.add(MaxPooling2D((2, 2)))
# model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
# model.add(MaxPooling2D((2, 2)))
# model.add(TimeDistributed(Flatten()))
# model.add(Dropout(0.5))
# model.add(Dense(512, activation='relu'))
# model.add(LSTM(128, dropout=0.5))
# model.add(Dense(5, activation='softmax'))

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=1e-4),
#               loss='categorical_crossentropy',
#               metrics=['accuracy'])

# # Define a callback for TensorBoard
# tensorboard_callback = TensorBoard(log_dir='./logs')

# # Define early stopping
# early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=10, verbose=1, mode='auto',
#                                baseline=None, restore_best_weights=True)

# # Train the model
# history = model.fit(X_train, y_train,
# validation_data=(X_test, y_test),
# epochs=50, batch_size=32,
# callbacks=[early_stopping, tensorboard_callback])

# #Save the model
# model.save('my_model.h5')

# #Evaluate the model on test data
# test_loss, test_acc = model.evaluate(X_test, y_test)

# print('Test accuracy:', test_acc)

In [2]:
import os
import numpy as np
import librosa
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, LSTM, Dense, Dropout, TimeDistributed, Flatten
from tensorflow.keras.callbacks import EarlyStopping, TensorBoard
from tensorflow.keras.optimizers import Adam
from google.colab import drive
from tensorflow.keras.utils import to_categorical

# Set the paths to the data files
drive.mount('/content/drive')

angry_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Angry'
calm_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Calm'
neutral_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Neutral'
happy_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Happy'
sad_path = '/content/drive/My Drive/Colab Notebooks/Emotion_Recognization/Dataset/Sad'

Mounted at /content/drive


In [3]:
# Define the number of MFCCs (Mel Frequency Cepstral Coefficients) to extract from each audio file
num_mfcc = 40

# Define a fixed length for each feature matrix
max_len = 500

# Define a function to extract the MFCCs from an audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=num_mfcc)
    return mfccs

# Load the data into memory
X = []
y = []
for path, emotion in [(angry_path, 0), (calm_path, 1), (neutral_path, 2), (happy_path, 3), (sad_path, 4)]:
    for file in os.listdir(path):
        file_path = os.path.join(path, file)
        mfccs = extract_features(file_path)
        if len(mfccs.T) <= max_len:
            pad_width = max_len - len(mfccs.T)
            mfccs = np.pad(mfccs.T, pad_width=((0, pad_width), (0, 0)), mode='constant')
            X.append(mfccs)
            y.append(emotion)

X = np.array(X)
y = np.array(y)

# Convert the target labels to one-hot encoded vectors
y = to_categorical(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Reshape the data to fit the RNN input shape
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)


In [4]:
# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(max_len, num_mfcc, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(MaxPooling2D((2, 2)))
model.add(TimeDistributed(Flatten()))
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(LSTM(128, dropout=0.5))
model.add(Dense(5, activation='softmax'))


In [5]:
# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define a callback for TensorBoard
tensorboard_callback = TensorBoard(log_dir='./logs')

# Define early stopping
early_stopping = EarlyStopping(monitor='val_accuracy', min_delta=0.001, patience=10, verbose=1, mode='auto',
                               baseline=None, restore_best_weights=True)

In [6]:
# Train the model
history = model.fit(X_train, y_train,
validation_data=(X_test, y_test),
epochs=100, batch_size=32,
callbacks=[early_stopping, tensorboard_callback])


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 44: early stopping


In [7]:
#Save the model
model.save('my_model.h5')

#Evaluate the model on test data
test_loss, test_acc = model.evaluate(X_test, y_test)

print('Test accuracy:', test_acc)

Test accuracy: 0.8068410754203796
