# Imports

In [142]:
from functions import getClasses
from functions import createFolder
import csv
import os
import matplotlib.pyplot as plt
import sys
import numpy as np
sys.path.append('/home/arclab/Documents/FlorianHwk/ECE271B/')
from pyAudioAnalysis import audioFeatureExtraction as aT
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn import preprocessing
from sklearn.neural_network import MLPClassifier

# Generate folders of features!!

In [36]:
#No need to run if you have the train_Features and test_Features Folders. Put them inside the /data/folder

ourClasses, _, _, _ = getClasses('classes.csv', "data/ontology.json", 'data/class_labels_indices.csv')

createFolder("data/train_Features")
    
for c in ourClasses:
    directory = "data/train_rawAudio/" + c + "/"
    features, names = aT.dirWavFeatureExtraction(directory, 1.0, 1.0, 0.05, 0.05)
    with open('data/train_Features/' + c + '.csv', 'wb') as csvfile:
        csvWriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for i in range(0, min(len(names),len(features))):            
            row = ["%.10f" % feat for feat in features[i]]
            row.insert(0,names[i])
            csvWriter.writerow(row)
            
createFolder("data/test_Features")
    
for c in ourClasses:
    directory = "data/test_rawAudio/" + c + "/"
    features, names = aT.dirWavFeatureExtraction(directory, 1.0, 1.0, 0.05, 0.05)
    with open('data/test_Features/' + c + '.csv', 'wb') as csvfile:
        csvWriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for i in range(0, len(names)):            
            row = ["%.20f" % feat for feat in features[i]]
            row.insert(0,names[i])
            csvWriter.writerow(row)

# Generating Features

In [119]:
#Assumes featureFolder is generated. Look at generateFeatures.ipynb
def getFeatureMatrices(pathToFeatureCSV):
    data = [];
    numComma = pathToFeatureCSV.count(',')
    with open(pathToFeatureCSV, 'r') as f:
        csvReader = csv.reader(f, delimiter=',')
        for row in csvReader:
            data.append(np.array(row[1+numComma:]).astype(np.float))
    return np.array(data)

# Getting Featureset from saved files

In [143]:
def getFeatureSet(pathToDirWithCSV, ourClasses, numSamplesPerClass = 855):
    wholeDataSet = np.zeros([numSamplesPerClass*len(ourClasses), 68]);
    Labels = np.zeros([numSamplesPerClass*len(ourClasses), 1])
    
    for i in range(0,len(ourClasses)):
        c = ourClasses[i]
        data = getFeatureMatrices(pathToDirWithCSV + c +".csv")
        wholeDataSet[i*numSamplesPerClass : (i+1)*numSamplesPerClass, :] = data[:numSamplesPerClass,:]
        Labels[i*numSamplesPerClass : (i+1)*numSamplesPerClass, :] = i*np.ones([numSamplesPerClass, 1])
        
    Labels = np.ravel(Labels)
    
    return wholeDataSet, Labels

# Get Training and Test Error

In [145]:
def getError(clf, ourClasses, pathToDirWithTrainCSV, pathToDirWithTestCSV):
    numCorrect = 0;
    totalNum = 0;
    for i in range(0,len(ourClasses)):
        c = ourClasses[i]
        data = getFeatureMatrices(pathToDirWithTrainCSV + c +".csv")
        test = clf.predict(data);
        numCorrect = numCorrect + np.sum(test == i)
        totalNum   = totalNum + test.size
    trainE = 1 - float(numCorrect)/float(totalNum)


    for i in range(0,len(ourClasses)):
        c = ourClasses[i]
        data = getFeatureMatrices(pathToDirWithTestCSV + c +".csv")
        test = clf.predict(data);
        numCorrect = numCorrect + np.sum(test == i)
        totalNum   = totalNum + test.size

    testE = 1 - float(numCorrect)/float(totalNum)

    return trainE, testE

# Performing LDA

In [149]:
#wholeDataSet = preprocessing.normalize(wholeDataSet, norm='l2')

print("Using LDA")
wholeDataSet, Labels = getFeatureSet("data/train_Features/", ourClasses, numSamplesPerClass = 855)

clf = LDA()
clf.fit(wholeDataSet, Labels);
LDA(n_components=None, priors=None, shrinkage=None, solver='svd',store_covariance=False, tol=0.1)

trainE, testE = getError(clf, ourClasses, "data/train_Features/", "data/test_Features/")

print("Train Error : {}".format(trainE))
print("Test Error  : {}".format(testE))

# Performing Neural Network

In [166]:
print("Using Neural Network")

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(50, 25, 10), random_state=1)
clf.fit(wholeDataSet, Labels);

trainE, testE = getError(clf, ourClasses, "data/train_Features/", "data/test_Features/")

print("Train Error : {}".format(trainE))
print("Test Error  : {}".format(testE))