In [62]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from scipy.io import wavfile

# pytorch
import torch
import torch.utils.data as tdata

# Plotting (if we want it)
import matplotlib.pyplot as plt

# importing our own modules
import audio_datasets as ads

In [None]:
# The next two cells are to make sure that plotly live plotting works
#fig = go.FigureWidget()
#fig.add_scatter()
#fig.layout.xaxis.title = "Epoch"
#fig.layout.yaxis.title = "Loss"
#fig.layout.height = 500
#fig.layout.width = 500

#fig

In [48]:
print(os.getcwd())

/home/ethandp/301-ml-project/src


In [55]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*2) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [86]:
# split into training and testing
train_data, test_data, train_labels, test_labels = train_test_split(data_array, label_array, train_size=0.5, test_size=0.5)
#print(data.shape, train_data.shape, test_data.shape)

In [87]:
train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

train_dataset = tdata.TensorDataset(train_ten, train_y_ten)

train_loader = tdata.DataLoader(train_dataset, batch_size=32) #this is the data loader

In [104]:
#implementing kernel SVM
class MLP(torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.fc1 = torch.nn.Linear(in_features=96000, out_features=8)
    self.fc2 = torch.nn.Linear(in_features=8, out_features=8)
    self.fc3 = torch.nn.Linear(in_features=8, out_features=1)

    self.classifier = torch.softmax(dim=1)
    # 6 neurons on 2 hidden layers

  def forward(self, x):
    x1 = torch.relu(self.fc1(x))
    x2 = torch.relu(self.fc2(x1))
    x3 = self.fc3(x2) #usually left linear
    print(x3)
    return self.classifier(x3) #for classification

In [107]:
n_epochs = 10 #essentially number of iterations

#instantiating a model
model = MLP()

#loss function
criterion = torch.nn.BCELoss() #binary cross-entropy loss, clamps log values to protect against extremes

#optimizer mechanism
optimizer = torch.optim.Adam(lr=1e-2, params=model.parameters())

loss_array = np.zeros(n_epochs)
#something about test accuracy here - see final code
for epoch_idx in range(n_epochs):
  for batch in train_loader:
    data_batch, labels_batch = batch

    # move to GPU if available - data_batch = data_batch.cuda() etc.

    #predict
    labels_pred = model(data_batch.to(torch.float32))

    #compute loss
    loss = criterion(labels_pred.flatten(), labels_batch.to(torch.float32)) #note: cross entropy is not symmetric

    loss_array[epoch_idx] += loss.item()/data_batch.shape[0] #mean of loss
    #plotting the precision
    #with torch.no_grad():
      #test_pred = model(test_ten).flatten()
      #test_loss =
 
    #backprop
    optimizer.zero_grad()
    loss.backward() #backprop from pytorch
    optimizer.step() #all params optimized

  #if epoch_idx%50 == 0:
  #  fig.data[0].y = loss_array[:epoch_idx]
  #  fig.update_yaxes(type='log')

tensor([[  -1.0272],
        [  10.6057],
        [  -7.9476],
        [   3.2453],
        [  -4.5837],
        [ -88.0562],
        [ -25.8645],
        [  -4.5980],
        [  81.6657],
        [ -27.8865],
        [ -28.0219],
        [  -2.4419],
        [  -1.5372],
        [ -10.3942],
        [ -30.1047],
        [   0.1405],
        [ -34.9585],
        [-123.6157],
        [ 100.2664],
        [ -19.2831],
        [ -35.3879],
        [ -20.6436],
        [   8.7236],
        [  -6.8262],
        [ -10.0513],
        [ -32.4376],
        [  -7.1324],
        [ -14.5964],
        [ -33.8618],
        [ -25.6655],
        [ -52.6064],
        [ -22.9612]], grad_fn=<AddmmBackward0>)
tensor([[ -36.0552],
        [  -6.5435],
        [  11.1721],
        [ -67.7044],
        [ -60.1644],
        [-170.5860],
        [   0.8323],
        [ -36.9735],
        [ -78.1228],
        [  10.0849],
        [  -8.0506],
        [ -17.9014],
        [ -11.3470],
        [  -6.5339],
       

In [106]:
with torch.no_grad():
  test_pred = model(test_ten.to(torch.float32)).flatten()

print(classification_report(test_y_ten, test_pred))

tensor([[-1.7226e+01],
        [-2.8167e+01],
        [-8.1368e+00],
        [ 2.5520e+01],
        [ 2.0588e+00],
        [-1.6326e+01],
        [-8.1965e-01],
        [ 5.9950e+01],
        [-7.9297e+00],
        [-7.1984e-01],
        [-4.0538e+01],
        [ 1.3690e+01],
        [-3.3584e+01],
        [ 7.4702e+01],
        [ 5.9603e+00],
        [ 1.3121e+01],
        [-2.4796e+01],
        [-1.2551e-01],
        [-1.1680e+01],
        [ 5.2231e+00],
        [-1.4369e+01],
        [ 2.3314e+02],
        [-9.9820e+00],
        [ 1.2637e+02],
        [ 3.2209e+00],
        [ 6.1711e+01],
        [-2.7453e+01],
        [-7.9741e+00],
        [-5.6872e+00],
        [ 1.4550e+00],
        [ 1.0515e+01],
        [-1.9508e+01],
        [-3.8490e+01],
        [ 4.9043e+00],
        [-1.8754e+00],
        [-1.1567e+01],
        [ 9.2301e+00],
        [ 7.9717e+00],
        [-2.2133e+00],
        [-6.1522e+01],
        [-1.6662e+01],
        [ 3.0397e+01],
        [-2.6792e+00],
        [-2

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
