In [1]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from scipy.io import wavfile

from torchaudio import transforms

# pytorch
import torch
import torch.utils.data as tdata
import torch.nn.functional as func

# importing our own modules
import audio_datasets as ads


torch.set_printoptions(profile="full")



In [None]:
# The next two cells are to make sure that plotly live plotting works
#fig = go.FigureWidget()
#fig.add_scatter()
#fig.layout.xaxis.title = "Epoch"
#fig.layout.yaxis.title = "Loss"
#fig.layout.height = 500
#fig.layout.width = 500

#fig

In [48]:
print(os.getcwd())

/home/ethandp/301-ml-project/src


In [2]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data/data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*2) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [68]:
n = 12
mfcc = transforms.MFCC(sample_rate=48000, n_mfcc=n, melkwargs={"n_fft": 400, "hop_length": 20, "n_mels": 12, "center": False})

In [69]:
# split into training and testing
mfcc_data_array = [np.array(mfcc(torch.tensor(data).float()))for data in data_array]
train_data, test_data, train_labels, test_labels = train_test_split(mfcc_data_array, label_array, train_size=0.75, test_size=0.25)
print(np.shape(data_array), np.shape(train_data), np.shape(test_data))

(1125, 96000) (843, 12, 4781) (282, 12, 4781)


In [70]:

train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

train_dataset = tdata.TensorDataset(train_ten, train_y_ten)

train_loader = tdata.DataLoader(train_dataset, batch_size=64) #this is the data loader

In [71]:
#implementing kernel SVM
class MLP(torch.nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = torch.nn.Linear(in_features=4781, out_features=500)
    self.fc2 = torch.nn.Linear(in_features=500, out_features=8) 
    #self.fc3 = torch.nn.Linear(in_features=100, out_features=8)
    #self.classifier = torch.softmax(dim=1)
    # 6 neurons on 2 hidden layers

  def forward(self, x):
    x1 = torch.relu(self.fc1(x))
    #x2 = torch.relu(self.fc2(x1))
    x2 = self.fc2(x1)
    return torch.sigmoid(x2) #softmax doesn't seem to work
    #return self.classifier(x3) #for classification
    

In [72]:
n_epochs = 40 #essentially number of iterations

#instantiating a model
model = MLP()

#loss function
criterion = torch.nn.CrossEntropyLoss() 

#optimizer mechanism
optimizer = torch.optim.Adam(lr=1e-4, params=model.parameters())

loss_array = np.zeros(n_epochs)
#something about test accuracy here - see final code
for epoch_idx in range(n_epochs):
  for batch in train_loader:
    data_batch, labels_batch = batch
    # move to GPU if available - data_batch = data_batch.cuda() etc.
    #predict
    labels_pred = model(data_batch)
    #compute loss
    loss = criterion(labels_pred[:,0,:], labels_batch) #note: cross entropy is not symmetric
    loss_array[epoch_idx] += loss.item()/data_batch.shape[0] #mean of loss
    #backprop
    optimizer.zero_grad()
    loss.backward() #backprop from pytorch
    optimizer.step() #all params optimized

#  if epoch_idx%50 == 0:
#    fig.data[0].y = loss_array[:epoch_idx]
#    fig.update_yaxes(type='log')

In [73]:
with torch.no_grad():
  test_pred = model(test_ten)

final_pred = []
    #Since each sample is now 8, find the index corresponding to the highest value
for i in test_pred:
  final_pred.append(torch.argmax(i))
final_pred = torch.stack(final_pred)
final_pred = final_pred.int()
print(classification_report(test_y_ten, final_pred))
#print(test_y_ten)p

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        15
           1       0.13      1.00      0.23        36
           2       0.00      0.00      0.00        44
           3       0.00      0.00      0.00        38
           4       0.00      0.00      0.00        29
           5       0.00      0.00      0.00        41
           6       0.00      0.00      0.00        39
           7       0.00      0.00      0.00        40

    accuracy                           0.13       282
   macro avg       0.02      0.12      0.03       282
weighted avg       0.02      0.13      0.03       282



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
