In [24]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

from scipy.io import wavfile

# pytorch
import torch
import torch.utils.data as tdata
import torch.nn.functional as func

# Plotting (if we want it)
import matplotlib.pyplot as plt

# importing our own modules
import audio_datasets as ads

torch.set_printoptions(profile="full")

In [None]:
# The next two cells are to make sure that plotly live plotting works
#fig = go.FigureWidget()
#fig.add_scatter()
#fig.layout.xaxis.title = "Epoch"
#fig.layout.yaxis.title = "Loss"
#fig.layout.height = 500
#fig.layout.width = 500

#fig

In [48]:
print(os.getcwd())

/home/ethandp/301-ml-project/src


In [2]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data/data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*2) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [3]:
# split into training and testing
train_data, test_data, train_labels, test_labels = train_test_split(data_array, label_array, train_size=0.5, test_size=0.5)
print(np.shape(data_array), np.shape(train_data), np.shape(test_data))

(1125, 96000) (562, 96000) (563, 96000)


In [4]:
train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

train_dataset = tdata.TensorDataset(train_ten, train_y_ten)

train_loader = tdata.DataLoader(train_dataset, batch_size=32) #this is the data loader

  train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)


In [22]:
#implementing kernel SVM
class MLP(torch.nn.Module):
  def __init__(self):
    super().__init__()

    self.fc1 = torch.nn.Linear(in_features=96000, out_features=10000)
    self.fc2 = torch.nn.Linear(in_features=10000, out_features=100) #Probably add extra layers
    self.fc3 = torch.nn.Linear(in_features=100, out_features=8)
    #self.fc4 = torch.nn.Linear(in_features=8, out_features=1)
    #self.classifier = torch.softmax(dim=1)
    # 6 neurons on 2 hidden layers

  def forward(self, x):
    x1 = torch.relu(self.fc1(x))
    x2 = torch.relu(self.fc2(x1))
    x3 = self.fc3(x2)
    #x4 = self.fc4(x3) #usually left linear
    return torch.softmax(x3, dim=1) #softmax doesn't seem to work
    #return self.classifier(x3) #for classification
    

In [23]:
n_epochs = 10 #essentially number of iterations

#instantiating a model
model = MLP()

#loss function
criterion = torch.nn.CrossEntropyLoss() 

#optimizer mechanism
optimizer = torch.optim.Adam(lr=1e-3, params=model.parameters())

loss_array = np.zeros(n_epochs)
#something about test accuracy here - see final code
for epoch_idx in range(n_epochs):
  for batch in train_loader:
    data_batch, labels_batch = batch

    # move to GPU if available - data_batch = data_batch.cuda() etc.
    #data_batch = data_batch.cuda()

    #predict
    labels_pred = model(data_batch.to(torch.float32))
    final_label = []
    print(labels_pred)
    #create histogram output
    for i in labels_pred:
      final_label.append(torch.histc(i, bins=8, min=0, max=1, out=None))
    tensor_label = torch.tensor(torch.stack(final_label), requires_grad=True)
    #create one-hot labels
    one_label = func.one_hot(labels_batch, num_classes=8)
    #compute loss
    loss = criterion(tensor_label, one_label.type(torch.float32)) #note: cross entropy is not symmetric
    loss_array[epoch_idx] += loss.item()/data_batch.shape[0] #mean of loss
    #backprop
    optimizer.zero_grad()
    loss.backward() #backprop from pytorch
    optimizer.step() #all params optimized

#  if epoch_idx%50 == 0:
#    fig.data[0].y = loss_array[:epoch_idx]
#    fig.update_yaxes(type='log')

tensor([[0.0000e+00, 1.0000e+00, 0.0000e+00, 0.0000e+00, 3.2230e-44, 0.0000e+00,
         2.0516e-41, 0.0000e+00],
        [7.1683e-20, 8.1246e-16, 2.3453e-24, 1.5687e-09, 3.2871e-14, 2.9730e-24,
         1.0000e+00, 6.9428e-11],
        [6.5129e-32, 1.0000e+00, 0.0000e+00, 1.0554e-27, 1.3680e-10, 0.0000e+00,
         2.4692e-24, 2.2421e-44],
        [2.0046e-12, 1.0000e+00, 0.0000e+00, 2.8026e-45, 4.4367e-20, 0.0000e+00,
         1.7141e-15, 0.0000e+00],
        [6.6713e-39, 3.9284e-08, 5.4140e-34, 2.4623e-30, 1.0000e+00, 0.0000e+00,
         6.9584e-30, 2.0940e-24],
        [5.9052e-31, 9.1276e-11, 2.3375e-41, 9.2576e-01, 8.1361e-05, 1.6650e-32,
         9.7910e-14, 7.4155e-02],
        [1.5013e-31, 9.9978e-01, 1.6769e-25, 2.6598e-06, 3.5037e-19, 0.0000e+00,
         8.4078e-45, 2.2068e-04],
        [5.9273e-32, 1.0000e+00, 0.0000e+00, 1.0659e-17, 7.3815e-17, 0.0000e+00,
         2.2344e-14, 1.2824e-20],
        [0.0000e+00, 3.4238e-08, 0.0000e+00, 1.4460e-16, 1.0000e+00, 0.0000e+00,

  tensor_label = torch.tensor(torch.stack(final_label), requires_grad=True)


tensor([[0.0000e+00, 3.7994e-26, 0.0000e+00, 7.4445e-01, 2.5555e-01, 0.0000e+00,
         3.4142e-18, 0.0000e+00],
        [0.0000e+00, 9.9999e-01, 0.0000e+00, 1.0611e-07, 4.4834e-24, 0.0000e+00,
         1.2705e-05, 0.0000e+00],
        [1.8894e-02, 1.6362e-01, 6.7870e-03, 8.2947e-02, 4.8335e-01, 6.7445e-02,
         6.2498e-02, 1.1447e-01],
        [4.2878e-13, 2.2106e-04, 6.8415e-25, 5.7446e-04, 2.6808e-02, 2.5477e-28,
         9.7240e-01, 1.1736e-19],
        [0.0000e+00, 1.2372e-05, 0.0000e+00, 6.2117e-37, 9.9999e-01, 0.0000e+00,
         2.5850e-24, 0.0000e+00],
        [0.0000e+00, 8.7267e-28, 0.0000e+00, 0.0000e+00, 1.0000e+00, 0.0000e+00,
         0.0000e+00, 0.0000e+00],
        [1.8172e-20, 4.2378e-09, 1.3744e-24, 6.3879e-01, 3.6121e-01, 1.4233e-15,
         1.6417e-06, 8.0586e-21],
        [3.9636e-32, 2.6166e-04, 4.0438e-39, 1.3134e-02, 9.8620e-01, 5.3146e-35,
         4.0849e-04, 7.3019e-18],
        [5.6308e-09, 6.7145e-07, 5.3880e-11, 4.3138e-04, 4.6322e-08, 2.3705e-08,

KeyboardInterrupt: 

In [27]:
with torch.no_grad():
  test_pred = model(test_ten.to(torch.float32))

final_pred = []
    #Since each sample is now 8, find the index corresponding to the highest value
for i in test_pred:
  final_pred.append(torch.argmax(i))
final_pred = torch.stack(final_pred)
final_pred = final_pred.int()
print(classification_report(test_y_ten, final_pred))
#print(test_y_ten)p

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        40
           1       0.15      0.37      0.21        78
           2       0.00      0.00      0.00        66
           3       0.07      0.03      0.04        72
           4       0.12      0.33      0.18        83
           5       0.00      0.00      0.00        71
           6       0.13      0.16      0.14        73
           7       0.00      0.00      0.00        80

    accuracy                           0.12       563
   macro avg       0.06      0.11      0.07       563
weighted avg       0.06      0.12      0.08       563



  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
