In [4]:
# this code implements a deep neural network for feature learning
# this code comes from Vishwa's class on constructing a data loader and creating a model

# Some common system imports
import os
import sys
import importlib
import time
import csv

# Numeric computing
import numpy as np

# Sklearn functions are useful for generating train/test splits, and metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.naive_bayes import GaussianNB

from scipy.io import wavfile

# pytorch
import torch
import torch.utils.data as tdata
from torchaudio import transforms

# Plotting (if we want it)
import matplotlib.pyplot as plt

# importing our own modules
import audio_datasets as ads

In [2]:
# loading the raw data for feature learning
training_path = os.path.join(os.getcwd(), "..", "training_data/data")
files = os.listdir(training_path)
wav_files = []
file_type = "wav"
LABELS = {"neutral": 0, "calm": 1, "happy": 2, "sad": 3, "angry": 4, "fearful": 5, "disgust": 6, "surprised": 7}

for file in files:
    curr_path = os.path.join(training_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        for label in LABELS.keys():
            if label in file:
                wav_files.append((file, LABELS[label]))

data_array = []
label_array = []
for data in wav_files: #in the my_datasets code
    data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])
    label_array.append(data[1])

max_len = max([len(data) for data in data_array])
data_array = [np.resize(data, 48000*5) for data in data_array]

  data_array.append(wavfile.read(os.path.join(training_path, data[0]))[1])


In [7]:
# loading the raw data for feature learning
test_path = os.path.join(os.getcwd(), "..", "test_data")
test_files = os.listdir(test_path)
test_wav_files = []

for file in test_files:
    curr_path = os.path.join(test_path, file)
    if os.path.isfile(curr_path) and file_type in file:
        test_wav_files.append(file)

test_data_array = []
for data in test_wav_files: #in the my_datasets code
    test_data_array.append(wavfile.read(os.path.join(test_path, data))[1])

test_data_array = [np.resize(data, 48000*5) for data in test_data_array]

  test_data_array.append(wavfile.read(os.path.join(test_path, data))[1])


In [11]:
# split into training and testing
train_data, test_data, train_labels, test_labels = train_test_split(data_array, label_array, train_size=0.5, test_size=0.5)
#print(data.shape, train_data.shape, test_data.shape)

train_ten, test_ten = torch.tensor(train_data), torch.tensor(test_data)
train_y_ten, test_y_ten = torch.tensor(train_labels), torch.tensor(test_labels)

In [12]:
#implementing the Mel-frequency cepstrum coefficients as feature vectors
mfcc = transforms.MFCC(sample_rate=48000, n_mfcc=40)
print(train_ten.shape)
#mel_spectrogram = transforms.MelSpectrogram(sample_rate=48000, n_fft=400, n_mels=1)
#print("2", mel_spectrogram(train_ten[0].float()).shape)
#print("2.5", mfcc(mel_spectrogram(train_ten[0].float())).shape)
print(mfcc(train_ten[0].float()).shape)
mfcc_features = np.zeros((562, 48040))
for row in range(562):
    mel_coef = np.array(mfcc(train_ten[row].float()))
    mel_coef = mel_coef.reshape((1, 48040))
    mfcc_features[row] = mel_coef

print("3", train_ten.size())
print("4", mfcc_features.shape)
print("5", train_y_ten.shape)



torch.Size([562, 240000])
torch.Size([40, 1201])
3 torch.Size([562, 240000])
4 (562, 48040)
5 torch.Size([562])


In [13]:
#implementing the Mel-frequency cepstrum coefficients as feature vectors
mfcc2 = transforms.MFCC(sample_rate=48000, n_mfcc=40)
#mel_spectrogram = transforms.MelSpectrogram(sample_rate=48000, n_fft=400, n_mels=1)
#print("2", mel_spectrogram(train_ten[0].float()).shape)
#print("2.5", mfcc(mel_spectrogram(train_ten[0].float())).shape)
mfcc_test_features = np.zeros((562, 48040))
for row in range(562):
    mel_coef = np.array(mfcc2(test_ten[row].float()))
    mel_coef = mel_coef.reshape((1, 48040))
    mfcc_test_features[row] = mel_coef

In [14]:
gnb2 = GaussianNB() #gnb and y_pred lines directly from scikit-learn website documentation for Naive-Bayes
print(mfcc_features.shape, train_y_ten.shape)
y_pred2 = gnb2.fit(mfcc_features, train_labels).predict(mfcc_test_features)
y_pred2 = list(y_pred2)

'''
LABEL_CONVERTER = dict([(value, key) for key, value in LABELS.items()])

#counting the mislabeled points

with open("bayes_results.csv", "w+") as result_file:
   writer = csv.writer(result_file)
   writer.writerow(["filename", "label"])

   for idx in range(len(y_pred)):
       writer.writerow([os.path.splitext(test_wav_files[idx])[0], LABEL_CONVERTER[y_pred[idx]]])
'''
#print(classification_report(test_y_ten, test_pred))
correct = 0
for label in range(len(y_pred2)):
    if y_pred2[label] == test_labels[label]:
        correct += 1
print("Number of correct labels: ", correct)
#print("Number of mislabeled points out of a total %d points : %d", (len(test_labels), (test_labels != y_pred2).sum()))

(562, 48040) torch.Size([562])
Number of correct labels:  181


In [7]:
gnb = GaussianNB() #gnb and y_pred lines directly from scikit-learn website documentation for Naive-Bayes
y_pred = gnb.fit(data_array, label_array).predict(test_data_array)

LABEL_CONVERTER = dict([(value, key) for key, value in LABELS.items()])

#counting the mislabeled points
with open("bayes_results.csv", "w+") as result_file:
    writer = csv.writer(result_file)
    writer.writerow(["filename", "label"])

    for idx in range(len(y_pred)):
        writer.writerow([os.path.splitext(test_wav_files[idx])[0], LABEL_CONVERTER[y_pred[idx]]])

#print(classification_report(test_y_ten, test_pred))
#print("Number of mislabeled points out of a total %d points : %d", (test_ten.shape[0], (test_y_ten != y_pred).sum()))

NameError: name 'test_data_array' is not defined