# Create and Train Model

In [2]:
import os

import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models

import config
from data_loading import load_data_from_dir


### Load the Features previously extracted as part of main.py

In [3]:
wav_mfccs_dir = config.TRAIN_WAV_MFCCS
lms_mfccs_dir = config.TRAIN_LMS_MFCCS

wav_mfccs_list = []
wav_labels_list = []

for file_name in os.listdir(wav_mfccs_dir):
    if file_name.endswith('.npy'):
        mfccs = np.load(os.path.join(wav_mfccs_dir, file_name))
        wav_mfccs_list.append(mfccs)
        
        label = file_name.split('_')[0]
        wav_labels_list.append(label)

lms_mfccs_list = []
lms_labels_list = []

for file_name in os.listdir(lms_mfccs_dir):
    if file_name.endswith('.npy'):
        mfccs = np.load(os.path.join(lms_mfccs_dir, file_name))
        lms_mfccs_list.append(mfccs)
        
        label = file_name.split('_')[0]
        lms_labels_list.append(label)



Check shape match

In [4]:
print(wav_mfccs_list[0].shape)
print(lms_mfccs_list[0].shape)


(40, 775)
(40, 775)


In [5]:
wav_labels_list

['03-01-01-01-01-01-01.npy',
 '03-01-01-01-01-01-02.npy',
 '03-01-01-01-01-01-04.npy',
 '03-01-01-01-01-01-05.npy',
 '03-01-01-01-01-01-06.npy',
 '03-01-01-01-01-01-08.npy',
 '03-01-01-01-01-01-09.npy',
 '03-01-01-01-01-01-10.npy',
 '03-01-01-01-01-01-12.npy',
 '03-01-01-01-01-01-14.npy',
 '03-01-01-01-01-01-15.npy',
 '03-01-01-01-01-01-16.npy',
 '03-01-01-01-01-01-18.npy',
 '03-01-01-01-01-01-24.npy',
 '03-01-01-01-01-02-01.npy',
 '03-01-01-01-01-02-02.npy',
 '03-01-01-01-01-02-03.npy',
 '03-01-01-01-01-02-04.npy',
 '03-01-01-01-01-02-06.npy',
 '03-01-01-01-01-02-08.npy',
 '03-01-01-01-01-02-09.npy',
 '03-01-01-01-01-02-10.npy',
 '03-01-01-01-01-02-11.npy',
 '03-01-01-01-01-02-12.npy',
 '03-01-01-01-01-02-17.npy',
 '03-01-01-01-01-02-18.npy',
 '03-01-01-01-01-02-19.npy',
 '03-01-01-01-01-02-20.npy',
 '03-01-01-01-01-02-21.npy',
 '03-01-01-01-01-02-23.npy',
 '03-01-01-01-01-02-24.npy',
 '03-01-01-01-02-01-01.npy',
 '03-01-01-01-02-01-02.npy',
 '03-01-01-01-02-01-03.npy',
 '03-01-01-01-

### Load the Previously Split Directories

Store feature files into tes, val, and train

### Load Training

In [6]:
train_dirs = [config.TRAIN_WAV_MFCCS, config.TRAIN_LMS_MFCCS, config.TRAIN_AUG_WAV_MFCCS, config.TRAIN_AUG_LMS_MFCCS]

X_train = []
y_train = []

for train_dir in train_dirs:
    for file_name in os.listdir(train_dir):
        if file_name.endswith('.npy'):
            file_path = os.path.join(train_dir, file_name)
            mfccs = np.load(file_path)
            X_train.append(mfccs)
            label = file_name.split('.npy')[0]
            y_train.append(label)

X_train = np.array(X_train)
y_train = np.array(y_train)


In [7]:
X_train.shape, y_train.shape

((3433, 40, 775), (3433,))

In [8]:
X_train[0].shape

(40, 775)

In [9]:
y_train

array(['03-01-01-01-01-01-01', '03-01-01-01-01-01-02',
       '03-01-01-01-01-01-04', ..., '03-01-08-02-02-02-20',
       '03-01-08-02-02-02-21', '03-01-08-02-02-02-23'], dtype='<U20')

In [10]:
y_train.shape

(3433,)

In [11]:
unique_labels = np.unique(y_train)
num_unique_labels = len(unique_labels)
print("Number of unique labels:", num_unique_labels)


Number of unique labels: 1323


### Load Testing

In [12]:
test_dirs = [config.TEST_WAV_MFCCS, config.TEST_LMS_MFCCS]

X_test = []
y_test = []

for test_dir in test_dirs:
    for file_name in os.listdir(test_dir):
        if file_name.endswith('.npy'):
            file_path = os.path.join(test_dir, file_name)
            mfccs = np.load(file_path)
            X_test.append(mfccs)
            label = file_name.split('.npy')[0]
            y_test.append(label)

X_test = np.array(X_test)
y_test = np.array(y_test)


In [13]:
X_test.shape, y_test.shape

((610, 40, 775), (610,))

### Load Validation

In [14]:
val_dirs = [config.VAL_WAV_MFCCS, config.VAL_LMS_MFCCS]

X_val = []
y_val = []

for val_dir in val_dirs:
    for file_name in os.listdir(val_dir):
        if file_name.endswith('.npy'):
            file_path = os.path.join(val_dir, file_name)
            mfccs = np.load(file_path)
            X_val.append(mfccs)
            label = file_name.split('.npy')[0]
            y_val.append(label)

X_val = np.array(X_val)
y_val = np.array(y_val)


In [15]:
X_val.shape, y_val.shape

((528, 40, 775), (528,))

### Define the model

Get number of classes from filename

In [16]:
filenames, labels = load_data_from_dir(config.WAV_DIR_PATH)

emotion_labels = [filename.split('-')[2] for filename in filenames] # based on filename identifiers https://www.kaggle.com/datasets/uwrfkaggler/ravdess-emotional-speech-audio

num_classes = len(np.unique(emotion_labels))

print("Number of classes (emotions):", num_classes)

Number of classes (emotions): 8


In [17]:
print(np.unique(emotion_labels))

['01' '02' '03' '04' '05' '06' '07' '08']


In [18]:
# was necessary to reshape the data to fit the input shape of the model
X_train_reshaped = X_train.reshape(X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)

model = models.Sequential([
    layers.Input(shape=X_train_reshaped.shape[1:]),
    layers.Conv2D(32, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dense(num_classes, activation='softmax')
])


In [19]:
X_train_reshaped.shape

(3433, 40, 775, 1)

### Compile the Model

In [20]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

### Need to Encode the Labels as they are String Data

In [21]:
#from sklearn.preprocessing import OneHotEncoder

# extract emotion labels from filenames
#emotions = [label.split('-')[2] for label in y_train]

#encoder = OneHotEncoder(categories='auto', sparse_output=True)
#y_train_encoded = encoder.fit_transform(np.array(emotions).reshape(-1, 1))

In [28]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

# extract emotion labels from filenames
emotions = [label.split('-')[2] for label in y_train]

label_encoder = LabelEncoder()
emotions_encoded = label_encoder.fit_transform(emotions)
emotions_encoded = emotions_encoded.reshape(-1, 1)

encoder = OneHotEncoder(categories='auto', sparse_output=False)
y_train_encoded = encoder.fit_transform(emotions_encoded)

y_test_encoded = encoder.transform(label_encoder.transform([label.split('-')[2] for label in y_test]).reshape(-1, 1))
y_val_encoded = encoder.transform(label_encoder.transform([label.split('-')[2] for label in y_val]).reshape(-1, 1))


In [23]:
y_train_sparse = np.argmax(y_train_encoded, axis=1)


In [24]:
y_train_sparse.shape

(3433,)

In [None]:
y_train_sparse[4]

0

In [26]:

print(np.unique(emotions))

['01' '02' '03' '04' '05' '06' '07' '08']


Train a model using waveforms and another using spectrgrams and compare

### Train the Model

In [30]:
X_train_reshaped.shape

(3433, 40, 775, 1)

In [31]:
y_train_encoded.shape

(3433, 8)

In [33]:
X_val_reshaped.shape

(528, 40, 775, 1)

In [34]:
y_val_encoded.shape

(3433, 8)

In [29]:
X_val_reshaped = X_val.reshape(X_val.shape[0], X_val.shape[1], X_val.shape[2], 1)

history = model.fit(X_train_reshaped, y_train_encoded,
                    epochs=10, 
                    batch_size=32,
                    validation_data=(X_val_reshaped, y_val_encoded)) 


Epoch 1/10
[1m108/108[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 552ms/step - accuracy: 0.2070 - loss: 2.0067

ValueError: Data cardinality is ambiguous. Make sure all arrays contain the same number of samples.'x' sizes: 528
'y' sizes: 3433


### Evaluate Model

In [None]:
# Evaluate the model
test_loss, test_accuracy = model.evaluate(test_data, test_labels)
print('Test accuracy:', test_accuracy)