# Imports

In [1]:
from functions import getClasses
from functions import createFolder
import csv
import os
import matplotlib.pyplot as plt
import sys
import numpy as np

#Comment out if not generating features
sys.path.append('/home/arclab/Documents/FlorianHwk/ECE271B/')
from pyAudioAnalysis import audioFeatureExtraction as aT

from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn import svm

ourClasses, _, _, _ = getClasses('classes.csv', "data/ontology.json", 'data/class_labels_indices.csv')

# Generate folders of mean features!!

In [4]:
#No need to run if you have the train_audioFeaturesMean and test_audioFeaturesMean Folders. 
#Put them inside the /data/folder
createFolder("data/train_audioFeaturesMean")
    
for c in ourClasses:
    directory = "data/train_rawAudio/" + c + "/"
    features, names = aT.dirWavFeatureExtraction(directory, 1.0, 1.0, 0.05, 0.05)
    with open('data/train_audioFeaturesMean/' + c + '.csv', 'wb') as csvfile:
        csvWriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for i in range(0, min(len(names),len(features))):            
            row = ["%.10f" % feat for feat in features[i]]
            curName = names[i]
            e = curName.rfind(".");
            b = curName.rfind("/");
            
            row.insert(0,curName[b+1:e])
            csvWriter.writerow(row)
            
createFolder("data/test_audioFeaturesMean")
    
for c in ourClasses:
    directory = "data/test_rawAudio/" + c + "/"
    features, names = aT.dirWavFeatureExtraction(directory, 1.0, 1.0, 0.05, 0.05)
    with open('data/test_audioFeaturesMean/' + c + '.csv', 'wb') as csvfile:
        csvWriter = csv.writer(csvfile, delimiter=',',quotechar='|', quoting=csv.QUOTE_MINIMAL)
        for i in range(0, len(names)):            
            row = ["%.20f" % feat for feat in features[i]]
            curName = names[i]
            e = curName.rfind(".");
            b = curName.rfind("/");
            
            row.insert(0,curName[b+1:e])
            csvWriter.writerow(row)

# Get Generated Features

In [5]:
#Assumes featureFolder is generated. Look at generateFeatures.ipynb
def getFeatureMatrices(pathToFeatureCSV):
    data = [];
    names = [];
    #numComma = pathToFeatureCSV.count(',')
    with open(pathToFeatureCSV, 'r') as f:
        csvReader = csv.reader(f, delimiter=',')
        rowCount = 0
        for row in csvReader:
            rowCount = rowCount + 1
            
            names.append(row[0])
            data.append(np.array(row[1:]).astype(np.float))
                            
    return np.array(data), names

# Get an equal positive set and negative set for a class

In [13]:
def getFeatureSetPosNeg(pathToDirWithCSV, cName, ourClasses):
    data_pos, names = getFeatureMatrices(pathToDirWithCSV + cName + ".csv")
    amountOfNegData = data_pos.shape[0]
    
    data_neg = [];
    
    for c in ourClasses:
        if c == cName:
            continue
        data_new, names_new = getFeatureMatrices(pathToDirWithCSV + c + ".csv")
        negDataCounter = 0
        for i in range(0, len(names_new)):
            
            if names_new[i] in names:
                continue
                
            negDataCounter = negDataCounter + 1
            data_neg.append(data_new[i,:])
            
            if negDataCounter >= amountOfNegData/6:
                break

    return data_pos, np.array(data_neg)

# Calculates test error given a list of classifiers

In [7]:
def getTestErrorValue(clf_list, ourClasses, pathToDirWithTestCSV):
    numCorrect_test  = 0
    num_test  = 0
    
    dataSets = [];
    nameSets = [];
    for c in ourClasses:
        data, names = getFeatureMatrices(pathToDirWithTestCSV + c + ".csv")
        dataSets.append(data)
        nameSets.append(names)
    
    for k in range(0, len(ourClasses)):
        data = dataSets[k]
        names = nameSets[k]
        
        for i in range(0, len(clf_list)):
            clf = clf_list[i]
            result = clf.predict(data)
            
            for j in range(0, len(names)):
                num_test = num_test + 1
                if result[j]  and names[j] in nameSets[i]:
                    numCorrect_test = numCorrect_test + 1
                    
                if not(result[j]) and not(names[j] in nameSets[i]):
                    numCorrect_test = numCorrect_test + 1

    return 1 - float(numCorrect_test)/float(num_test)

# Performing LDA

In [14]:
print("Using LDA")

clf_list = []

for c in ourClasses:
    data_pos, data_neg = getFeatureSetPosNeg("data/train_audioFeaturesMean/", c, ourClasses)
    dataset = np.vstack([data_pos, data_neg])
    labels  = np.ravel(np.vstack([np.ones([data_pos.shape[0],1]), np.zeros([data_neg.shape[0],1])]))

    clf = LDA()
    clf.fit(dataset, labels);
    LDA(n_components=None, priors=None, shrinkage=None, solver='svd',store_covariance=False, tol=0.1)
    print("Training classifier " + c + " training error: {}".format(1 - clf.score(dataset,labels)))
    clf_list.append(clf)

testE = getTestErrorValue(clf_list, ourClasses, "data/test_audioFeaturesMean/")
print("Test Error  : {}".format(testE))


#   Performing Neural Network

In [17]:
print("Using Neural Network")

print("Looking for best 2 layer network")

l1_list = [];
l2_list = [];

for c in ourClasses:
    data_pos, data_neg = getFeatureSetPosNeg("data/train_Features/", c, ourClasses)
    dataset = np.vstack([data_pos, data_neg])
    labels  = np.ravel(np.vstack([np.ones([data_pos.shape[0],1]), np.zeros([data_neg.shape[0],1])]))
    
    minTrainE = 1
    bestl1 = 0
    bestl2 = 0
    
    for l1 in range(10, 51,5):
        for l2 in range(5,l1,5):
            clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(l1, l2), random_state=1)
            clf.fit(dataset, labels);
            trainE = 1 - clf.score(dataset,labels);
            
            if minTrainE > trainE:
                minTrainE = trainE
                bestl1 = l1
                bestl2 = l2
                
    print("For class " + c + "Best pair is {}.{}".format(bestl1, bestl2))
    l1_list.append(bestl1)
    l2_list.append(bestl2)
    print("Train Error : {}".format(minTrainE))
    
        

In [10]:
#l1,l2 values taken from last one:


clf_list = []

for i in range(0,len(ourClasses)):
    c = ourClasses[i]
    #l1 = l1_list[i]
    #l2 = l2_list[i]
    l1 = 40
    l2 = 10
    
    data_pos, data_neg = getFeatureSetPosNeg("data/train_audioFeaturesMean/", c, ourClasses)
    dataset = np.vstack([data_pos, data_neg])
    labels  = np.ravel(np.vstack([np.ones([data_pos.shape[0],1]), np.zeros([data_neg.shape[0],1])]))
    
    clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(l1, l2), random_state=1)
    clf.fit(dataset, labels);
    print("Training classifier " + c + " training error: {}".format(1 - clf.score(dataset,labels)))
    
    clf_list.append(clf)
    
testE = getTestErrorValue(clf_list, ourClasses, "data/test_audioFeaturesMean//")
print("Test Error  : {}".format(testE))

# Performing Boosting

In [11]:
print("Using Adaboost")

clf_list = []

for c in ourClasses:
    data_pos, data_neg = getFeatureSetPosNeg("data/train_audioFeaturesMean/", c, ourClasses)
    dataset = np.vstack([data_pos, data_neg])
    labels  = np.ravel(np.vstack([np.ones([data_pos.shape[0],1]), np.zeros([data_neg.shape[0],1])]))
    
    clf = AdaBoostClassifier(n_estimators = 75)
    
    clf.fit(dataset, labels);
    print("Training classifier " + c + " training error: {}".format(1 - clf.score(dataset,labels)))
    
    clf_list.append(clf)
    
testE = getTestErrorValue(clf_list, ourClasses, "data/test_audioFeaturesMean/")
print("Test Error  : {}".format(testE))

# Performing SVM

In [12]:
print("Using SVM")

clf_list = []

for c in ourClasses:
    data_pos, data_neg = getFeatureSetPosNeg("data/train_audioFeaturesMean/", c, ourClasses)
    dataset = np.vstack([data_pos, data_neg])
    labels  = np.ravel(np.vstack([np.ones([data_pos.shape[0],1]), np.zeros([data_neg.shape[0],1])]))
    
    clf = svm.SVC(kernel='linear')
    clf.fit(dataset, labels)
    print("Training classifier " + c + " training error: {}".format(1 - clf.score(dataset,labels)))
    
    clf_list.append(clf)
    
testE = getTestErrorValue(clf_list, ourClasses, "data/test_audioFeaturesMean/")
print("Test Error  : {}".format(testE))