In [17]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score

from scipy.io import wavfile

from torchaudio import transforms

# pytorch
import torch
import torch.utils.data as tdata
import torch.nn.functional as func

# importing our own modules
import audio_datasets as ads


torch.set_printoptions(profile="full")

In [18]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data/data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*2) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [19]:
n = 12
mfcc = transforms.MFCC(sample_rate=48000, n_mfcc=n, melkwargs={"n_fft": 200, "n_mels": 13, "center": False})

In [20]:
#2D MFCC data array
# split into training and testing
mfcc_data = []
for data in data_array:
    mfcc_data_array1 = mfcc(torch.tensor(data).float())
    mfcc_data_array1 = mfcc_data_array1.reshape(mfcc_data_array1.shape[0]*mfcc_data_array1.shape[1])
    mfcc_data_array1 = np.array(mfcc_data_array1)
    mfcc_data.append(mfcc_data_array1)
mfcc_data_array = np.array(mfcc_data)   
train_data, test_data, train_labels, test_labels = train_test_split(mfcc_data_array, label_array, train_size=0.75, test_size=0.25)

In [21]:

train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

train_dataset = tdata.TensorDataset(train_ten, train_y_ten)

train_loader = tdata.DataLoader(train_dataset, batch_size=64) #this is the data loader

In [22]:
from sklearn.neural_network import MLPClassifier
y_pred = MLPClassifier(hidden_layer_sizes=(1200,), alpha=.1, max_iter=70, random_state=1).fit(train_data, train_labels).predict(test_data)
print(classification_report(y_pred, test_labels))

              precision    recall  f1-score   support

           0       0.65      0.33      0.44        33
           1       0.57      0.62      0.60        32
           2       0.34      0.52      0.41        27
           3       0.29      0.25      0.27        36
           4       0.54      0.55      0.55        38
           5       0.37      0.33      0.35        40
           6       0.50      0.70      0.58        33
           7       0.45      0.40      0.42        43

    accuracy                           0.45       282
   macro avg       0.46      0.46      0.45       282
weighted avg       0.46      0.45      0.45       282





In [24]:
from sklearn.model_selection import KFold, cross_val_score
#Cross-validation for split of 5
k_folds = KFold(n_splits = 5)
y_model = MLPClassifier(hidden_layer_sizes=(1200,), alpha=.1, max_iter=70, random_state=1)
scores = cross_val_score(y_model, train_data, train_labels, cv = k_folds)



In [27]:
print(scores)
#y_cross = scores.predict(test_data)
#print(classification_report(y_cross, test_labels))

[0.40236686 0.38461538 0.4556213  0.33928571 0.46428571]


In [None]:
#This section is for parameter selection
y_pred = MLPClassifier(max_iter=70)

parameter_space = {
    'hidden_layer_sizes': [(1200, 1200), (1200,)],
    'random_state' : [1, 3]
}

from sklearn.model_selection import GridSearchCV

clf = GridSearchCV(y_pred, parameter_space, n_jobs=-1, cv=5)
clf.fit(train_data, train_labels)

In [None]:
print('Best parameters found:\n', clf.best_params_)

#All results

means = clf.cv_results_['mean_test_score']
stds = clf.cv_results_['std_test_score']
for mean, std, params in zip(means, stds, clf.cv_results_['params']):
    print("%0.3f (+/-%0.03f) for %r" % (mean, std * 2, params))