In [47]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from scipy.io import wavfile

# pytorch
import torch
import torch.utils.data as tdata

# Plotting (if we want it)
import matplotlib.pyplot as plt

# importing our own modules
import audio_datasets as ads

In [None]:
# The next two cells are to make sure that plotly live plotting works
#fig = go.FigureWidget()
#fig.add_scatter()
#fig.layout.xaxis.title = "Epoch"
#fig.layout.yaxis.title = "Loss"
#fig.layout.height = 500
#fig.layout.width = 500

#fig

In [48]:
print(os.getcwd())

/home/ethandp/301-ml-project/src


In [55]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*2) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [57]:
# split into training and testing
train_data, test_data, train_labels, test_labels = train_test_split(data_array, label_array, train_size=0.5, test_size=0.5)
#print(data.shape, train_data.shape, test_data.shape)

[4, 1, 1, 6, 0, 2, 4, 6, 3, 5, 3, 1, 0, 3, 1, 7, 0, 4, 7, 5, 6, 5, 1, 2, 7, 7, 4, 4, 3, 7, 4, 3, 2, 4, 5, 2, 0, 3, 6, 7, 5, 7, 6, 5, 7, 6, 2, 7, 1, 5, 7, 3, 2, 5, 0, 7, 1, 6, 3, 0, 7, 1, 6, 5, 5, 7, 4, 2, 5, 7, 2, 0, 5, 3, 4, 5, 3, 7, 7, 4, 7, 2, 4, 4, 6, 5, 6, 4, 6, 3, 5, 6, 5, 4, 3, 4, 3, 4, 1, 1, 4, 2, 7, 2, 4, 3, 2, 0, 4, 7, 1, 2, 4, 7, 2, 6, 6, 3, 5, 6, 2, 1, 4, 7, 3, 5, 1, 7, 2, 5, 2, 4, 5, 3, 2, 6, 5, 3, 3, 1, 1, 3, 2, 5, 6, 5, 7, 4, 7, 2, 1, 7, 2, 1, 6, 1, 2, 3, 6, 4, 6, 5, 3, 7, 6, 3, 6, 3, 3, 3, 6, 2, 0, 6, 3, 0, 1, 1, 6, 7, 5, 5, 1, 6, 2, 0, 4, 0, 2, 5, 6, 4, 3, 6, 2, 5, 5, 6, 5, 6, 5, 1, 0, 2, 1, 4, 7, 4, 3, 5, 1, 3, 3, 6, 6, 4, 3, 7, 1, 7, 6, 1, 7, 2, 5, 5, 0, 2, 1, 5, 0, 3, 1, 2, 1, 6, 7, 4, 5, 2, 2, 1, 7, 1, 6, 3, 0, 5, 7, 7, 5, 5, 4, 5, 5, 7, 1, 2, 3, 4, 4, 1, 7, 5, 5, 2, 1, 1, 2, 6, 4, 3, 1, 5, 5, 1, 1, 7, 5, 1, 3, 6, 4, 2, 7, 3, 4, 1, 5, 7, 7, 5, 0, 1, 5, 5, 7, 2, 2, 3, 1, 3, 3, 4, 4, 6, 5, 2, 1, 4, 4, 6, 0, 0, 6, 1, 7, 4, 6, 7, 1, 3, 6, 5, 3, 2, 7, 7, 4, 1, 4, 7, 3, 

In [56]:
train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

train_dataset = tdata.TensorDataset(train_ten, train_y_ten)

train_loader = tdata.DataLoader(train_dataset, batch_size=32) #this is the data loader

['disgust', 'sad', 'fearful', 'disgust', 'surprised', 'fearful', 'fearful', 'happy', 'calm', 'angry', 'calm', 'disgust', 'fearful', 'surprised', 'sad', 'disgust', 'calm', 'neutral', 'happy', 'happy', 'fearful', 'calm', 'calm', 'surprised', 'happy', 'surprised', 'happy', 'happy', 'sad', 'calm', 'angry', 'disgust', 'surprised', 'sad', 'calm', 'surprised', 'sad', 'angry', 'happy', 'sad', 'angry', 'angry', 'calm', 'fearful', 'angry', 'sad', 'fearful', 'disgust', 'happy', 'sad', 'calm', 'surprised', 'surprised', 'sad', 'surprised', 'calm', 'fearful', 'sad', 'calm', 'surprised', 'disgust', 'disgust', 'fearful', 'angry', 'disgust', 'fearful', 'fearful', 'neutral', 'calm', 'neutral', 'calm', 'surprised', 'fearful', 'angry', 'happy', 'surprised', 'angry', 'disgust', 'disgust', 'neutral', 'disgust', 'happy', 'calm', 'disgust', 'angry', 'happy', 'disgust', 'surprised', 'fearful', 'sad', 'surprised', 'disgust', 'angry', 'fearful', 'disgust', 'happy', 'fearful', 'sad', 'angry', 'surprised', 'fearfu

ValueError: too many dimensions 'str'

In [None]:
#implementing kernel SVM
class MLP(torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.fc1 = torch.nn.Linear(in_features=2, out_features=6)
    self.fc2 = torch.nn.Linear(in_features=6, out_features=6)
    self.fc3 = torch.nn.Linear(in_features=6, out_features=1)
    # 6 neurons on 2 hidden layers

  def forward(self, x):
    x1 = torch.relu(self.fc1(x))
    x2 = torch.relu(self.fc2(x1))
    x3 = self.fc3(x2) #usually left linear
    return torch.sigmoid(x3) #for classification

In [None]:
n_epochs = 1000 #essentially number of iterations

#instantiating a model
model = MLP()

#loss function
criterion = torch.nn.BCELoss() #binary cross-entropy loss, clamps log values to protect against extremes

#optimizer mechanism
optimizer = torch.optim.Adam(lr=1e-2, params=model.parameters())

loss_array = np.zeros(n_epochs)
#something about test accuracy here - see final code
for epoch_idx in range(n_epochs):
  for batch in train_loader:
    data_batch, labels_batch = batch

    # move to GPU if available - data_batch = data_batch.cuda() etc.

    #predict
    labels_pred = model(data_batch)

    #compute loss
    loss = criterion(labels_pred.flatten(), labels_batch) #note: cross entropy is not symmetric

    loss_array[epoch_idx] += loss.item()/data_batch.shape[0] #mean of loss
    #plotting the precision
    #with torch.no_grad():
      #test_pred = model(test_ten).flatten()
      #test_loss =
 
    #backprop
    optimizer.zero_grad()
    loss.backward() #backprop from pytorch
    optimizer.step() #all params optimized

  #if epoch_idx%50 == 0:
  #  fig.data[0].y = loss_array[:epoch_idx]
  #  fig.update_yaxes(type='log')