# Exercise 2 - More comparative evaluation

## Import dependencies

In [116]:
import datetime
import glob
import os
import time
from collections import deque
from sklearn.utils import shuffle

from IPython.core.display_functions import display
from progressbar import ProgressBar

import librosa
import numpy as np
import pandas as pd
from scipy.stats import skew, kurtosis
from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import Perceptron
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

# disable all warnings
import warnings

warnings.filterwarnings('ignore')

matrikelNumber = 11776836
isEvenMatrikelNumber = matrikelNumber % 2 == 0
dataset = "music" if True else "image"
print("The", dataset, "dataset is used!")

imageFolder = "GTZANmp3_22khz"
print(os.getcwd())
if imageFolder in os.getcwd():
    path_parent = os.path.dirname(os.getcwd())
    os.chdir(path_parent)
    path_parent = os.path.dirname(os.getcwd())
    os.chdir(path_parent)
    os.chdir('ex2/')
print(os.getcwd())

The music dataset is used!
/home/user/projects/private/fh/fh-machine-learning/data/GTZANmp3_22khz
/home/user/projects/private/fh/fh-machine-learning/ex2


## Load datasets

### Small dataset: HR-dataset

In the first step, the dataset was cleaned. I filtered data, that is not very expressive and classified the absences into different classes. I also one hot encoded the dataset. The target feature is the `RaceDesc_White`.

In [117]:
hrDataset = pd.read_csv('../data/HRDataset_v14.csv', sep=',')
hrDataset.sample(frac=1, random_state=matrikelNumber).reset_index(drop=True)

# Clean the dataset
print(
    "I drop all continuous columns from the dataset, I can't encode these columns well. Categorization of these columns will not work, because every value is very different.")
filteredHR = hrDataset.drop("EmpID", axis=1)
filteredHR = filteredHR.drop("Employee_Name", axis=1)
filteredHR = filteredHR.drop("DateofTermination", axis=1)
filteredHR = filteredHR.drop("LastPerformanceReview_Date", axis=1)
filteredHR = filteredHR.drop("DateofHire", axis=1)
filteredHR = filteredHR.drop("EngagementSurvey", axis=1)
filteredHR = filteredHR.drop("Salary", axis=1)
filteredHR = filteredHR.drop("DOB", axis=1)

filteredHR['absences_label'] = filteredHR['Absences'].apply(lambda value: '0-14'
if value <= 4 else '0-5'
if value <= 5 else '6-10'
if value <= 10 else '11-15'
if value <= 14 else '15+')

filteredHR = filteredHR.drop("Absences", axis=1)
# filteredHR.head()

# One hot encode the dataset
encodedHR = pd.get_dummies(filteredHR, columns=filteredHR.columns, drop_first=True)

# Split the dataset
hrXAxis = encodedHR.drop('RaceDesc_White', axis=1)
hrYAxis = encodedHR['RaceDesc_White']

encodedHR.head()

I drop all continuous columns from the dataset, I can't encode these columns well. Categorization of these columns will not work, because every value is very different.


Unnamed: 0,MarriedID_1,MaritalStatusID_1,MaritalStatusID_2,MaritalStatusID_3,MaritalStatusID_4,GenderID_1,EmpStatusID_2,EmpStatusID_3,EmpStatusID_4,EmpStatusID_5,...,DaysLateLast30_1,DaysLateLast30_2,DaysLateLast30_3,DaysLateLast30_4,DaysLateLast30_5,DaysLateLast30_6,absences_label_0-5,absences_label_11-15,absences_label_15+,absences_label_6-10
0,0,0,0,0,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,1,1,0,0,0,1,0,0,0,1,...,0,0,0,0,0,0,0,0,1,0
2,1,1,0,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
3,1,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,1,0
4,0,0,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0


### Large dataset: Census income

In the first step, the dataset was cleaned. I filtered data, that is not very expressive and classified the age and wage into different classes. I also one hot encoded the dataset. The target feature is the `race_ White`.

In [118]:
censusIncome = pd.read_csv('../data/census-income.data', sep=',')
censusIncome.sample(frac=1, random_state=matrikelNumber).reset_index(drop=True)

censusIncome['age_label'] = censusIncome['age'].apply(lambda value: '0-14'
if value <= 14 else '15-24'
if value <= 24 else '25-54'
if value <= 54 else '55-64'
if value <= 64 else '65 years and over')

censusIncome['wage_label'] = censusIncome['wage per hour'].apply(lambda value: '0-3000'
if value <= 3000 else '3001-6000'
if value <= 6000 else '6001-7000'
if value <= 7000 else '7001-9000+')

filteredCensus = censusIncome[
    (censusIncome['class of worker'] != ' Not in universe') &
    (censusIncome['education'] != ' Children') &
    (censusIncome['wage per hour'] > 0) &
    (censusIncome['weeks worked in year'] > 0)
    ]

filteredCensus = filteredCensus.drop("age", axis=1)
filteredCensus = filteredCensus.drop("wage per hour", axis=1)
filteredCensus = filteredCensus.drop("year", axis=1)
filteredCensus = filteredCensus.drop("ignore", axis=1)
filteredCensus = filteredCensus.drop("instance weight", axis=1)

# filteredCensus.head()

# One hot encode the dataset
encodedIncome = pd.get_dummies(filteredCensus, columns=filteredCensus.columns, drop_first=True)

# Split the dataset
censusIncomeXAxis = encodedIncome.drop('race_ White', axis=1)
censusIncomeYAxis = encodedIncome['race_ White']

encodedIncome.head()

Unnamed: 0,class of worker_ Local government,class of worker_ Private,class of worker_ State government,detailed industry recode_2,detailed industry recode_3,detailed industry recode_4,detailed industry recode_5,detailed industry recode_6,detailed industry recode_7,detailed industry recode_8,...,weeks worked in year_49,weeks worked in year_50,weeks worked in year_51,weeks worked in year_52,age_label_25-54,age_label_55-64,age_label_65 years and over,wage_label_3001-6000,wage_label_6001-7000,wage_label_7001-9000+
5,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
8,1,0,0,0,0,0,0,0,0,0,...,0,0,0,1,1,0,0,0,0,0
22,0,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,0,0
49,0,1,0,0,0,0,0,0,0,0,...,0,0,0,1,0,0,1,0,0,0
147,0,0,1,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


### Music dataset

Following features were extracted from the music dataset:

- Beats per minute
- Beats per minute statistics
- Chroma
- MFCC

The target is the genre of the song.

In [119]:
# We need to construct our data set; unfortunately, we don't simply have a "loadGTZanDataSet()" function in SK-learn...
# So we need to
## Download our data set & extract it (one-time effort)
## Run an audio feature extraction
## Create the create the ground truth (label assignment, target, ...)


# path to our audio folder
# For the first run, download the images from http://kronos.ifs.tuwien.ac.at/GTZANmp3_22khz.zip, and unzip them to your folder
imagePath = "../data/GTZANmp3_22khz/"

# Find all songs in that folder; there are like 1.000 different ways to do this in Python, we chose this one :-)
os.chdir(imagePath)
fileNames = glob.glob("*/*.mp3")
numberOfFiles = len(fileNames)
targetLabels = []

print("Found " + str(numberOfFiles) + " files\n")

# The first step - create the ground truth (label assignment, target, ...)
# For that, iterate over the files, and obtain the class label for each file
# Basically, the class name is in the full path name, so we simply use that
for fileName in fileNames:
    pathSepIndex = fileName.index("/")
    targetLabels.append(fileName[:pathSepIndex])

# sk-learn can only handle labels in numeric format - we have them as strings though...
# Thus we use the LabelEncoder, which does a mapping to Integer numbers
le = preprocessing.LabelEncoder()
le.fit(targetLabels)  # this basically finds all unique class names, and assigns them to the numbers
print("Found the following classes: " + str(list(le.classes_)))

# now we transform our labels to integers
musicTarget = le.transform(targetLabels)
print("Transformed labels (first elements: " + str(musicTarget[0:150]))

# If we want to find again the label for an integer value, we can do something like this:
# print list(le.inverse_transform([0, 18, 1]))

print("... done label encoding")

Found 1000 files

Found the following classes: ['blues', 'classical', 'country', 'disco', 'hiphop', 'jazz', 'metal', 'pop', 'reggae', 'rock']
Transformed labels (first elements: [4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4
 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 4 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1]
... done label encoding


In [120]:
# Now we do the actual feature extraction

# This is a helper function that computes the differences between adjacent array values
def differences(seq):
    iterable = iter(seq)
    prev = next(iterable)
    for element in iterable:
        yield element - prev
        prev = element


# This is a helper function that computes various statistical moments over a series of values, including mean, median, var, min, max, skewness and kurtosis (a total of 7 values)
def statistics(numericList):
    return [np.mean(numericList), np.median(numericList), np.var(numericList), np.float64(skew(numericList)),
            np.float64(kurtosis(numericList)), np.min(numericList), np.max(numericList)]


print("Extracting features using librosa" + " (" + str(datetime.datetime.now()) + ")")

# compute some features based on BPMs, MFCCs, Chroma
data_bpm = []
data_bpm_statistics = []
data_mfcc = []
data_chroma = []

# This takes a bit, so let's show it with a progress bar
with ProgressBar(max_value=len(fileNames)) as bar:
    for indexSample, fileName in enumerate(fileNames):
        # Load the audio as a waveform `y`, store the sampling rate as `sr`
        y, sr = librosa.load(fileName)

        # run the default beat tracker
        tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr)
        # from this, we simply use the tempo as BPM feature
        data_bpm.append([tempo])

        # Then we compute a few statistics on the beat timings
        beat_times = librosa.frames_to_time(beat_frames, sr=sr)
        # from the timings, compute the time differences between the beats
        beat_intervals = np.array(deque(differences(beat_times)))

        # And from this, take some statistics
        # There might be a few files where the beat timings are not determined properly; we ignore them, resp. give them 0 values
        if len(beat_intervals) < 1:
            print("Errors with beat interval in file " + fileName + ", index " + str(
                indexSample) + ", using 0 values instead")
            data_bpm_statistics.append([tempo, 0, 0, 0, 0, 0, 0, 0])
        else:
            bpm_statisticsVector = []
            bpm_statisticsVector.append(tempo)  # we also include the raw value of tempo
            for stat in statistics(beat_intervals):  # in case the timings are ok, we actually compute the statistics
                bpm_statisticsVector.append(stat)  # and append it to the vector, which finally has 1 + 7 features
            data_bpm_statistics.append(bpm_statisticsVector)

        # Next feature are MFCCs; we take 12 coefficients; for each coefficient, we have around 40 values per second
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=12)
        mfccVector = []
        for mfccCoefficient in mfccs:  # we transform this time series by taking again statistics over the values
            mfccVector.append(statistics(mfccCoefficient))

        # Finally, this vector should have 12 * 7 features
        data_mfcc.append(np.array(mfccVector).flatten())

        # Last feature set - chroma (which is roughly similar to actual notes)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chromaVector = []
        for chr in chroma:  # similar to before, we get a number of time-series
            chromaVector.append(statistics(chr))  # and we resolve that by taking statistics over the time series
        # Finally, this vector should be be 12 * 7 features
        data_chroma.append(np.array(chromaVector).flatten())

        bar.update(indexSample)

print(".... done" + " (" + str(datetime.datetime.now()) + ")")

Extracting features using librosa (2022-05-30 21:23:20.893987)


 11% (115 of 1000) |##                   | Elapsed Time: 0:00:55 ETA:   0:06:56

Errors with beat interval in file classical/classical.00050.mp3, index 115, using 0 values instead


100% (1000 of 1000) |####################| Elapsed Time: 0:07:55 Time:  0:07:55


.... done (2022-05-30 21:31:16.579348)


## Define Classifiers

In [121]:
bestResult = {
    'f1Score': 0,
    'yTest': 0,
    'yTestPredicted': 0,
    'algorithm': '',
    'dataset': ''
}

### k-NN (k-nearest-neighbours)

In [122]:
def kNearestNeighbours(datasetName, kNeighbours, XTrain, XTest, yTrain, yTest):
    results = []

    for k in kNeighbours:
        print(datasetName)
        print('kNN with', k, 'neighbours')
        knnClassifier = KNeighborsClassifier(n_neighbors=k, algorithm='kd_tree')

        # Train the classifier
        startTime = time.time()
        knnClassifier.fit(XTrain, yTrain.ravel())
        endTime = time.time()
        trainTime = endTime - startTime

        # Predict
        startTime = time.time()
        predicted = knnClassifier.predict(XTest)
        endTime = time.time()
        predictionTime = endTime - startTime

        # Effectiveness measurement
        accuracyScore = accuracy_score(yTest, predicted)
        f1Score = f1_score(yTest, predicted, average='weighted')

        formattedTrainTime = str("{:.3f}s".format(trainTime))
        formattedPredictionTime = str("{:.3f}s".format(predictionTime))
        formattedAccuracyScore = str("{:.3f}%".format(accuracyScore * 100))
        formattedF1Score = str("{:.3f}%".format(f1Score * 100))

        print('Training time:', formattedTrainTime)
        print('Testing time:', formattedPredictionTime)
        print()

        print('Accuracy:', formattedAccuracyScore)
        print('F1 score:', formattedF1Score)
        print('------------------------------------')

        result = {
            'datasetName': datasetName,
            'algorithmName': 'kNN with "' + str(k) + '" neighbours',
            'neighbours': k,
            'accuracyScore': formattedAccuracyScore,
            'f1Score': formattedF1Score,
            'trainTime': formattedTrainTime,
            'predictionTime': formattedPredictionTime
        }

        results.append(result)

        if datasetName != 'HR' and datasetName != 'Census Income':
            if bestResult is None or bestResult['f1Score'] < f1Score:
                bestResult['f1Score'] = f1Score
                bestResult['yTestPredicted'] = predicted
                bestResult['yTest'] = yTest
                bestResult['algorithm'] = 'kNN'
                bestResult['dataset'] = datasetName

    return results

### Perceptron

In [123]:
def perceptron(datasetName, alphaValues, XTrain, XTest, yTrain, yTest):
    results = []

    for alpha in alphaValues:
        print(datasetName)
        print('Perceptron with alpha', alpha)
        perceptronClassifier = Perceptron(alpha=alpha, random_state=matrikelNumber)

        # Train the classifier
        startTime = time.time()
        perceptronClassifier.fit(XTrain, yTrain.ravel())
        endTime = time.time()
        trainTime = endTime - startTime

        # Predict
        startTime = time.time()
        predicted = perceptronClassifier.predict(XTest)
        endTime = time.time()
        predictionTime = endTime - startTime

        # Effectiveness measurement
        accuracyScore = accuracy_score(yTest, predicted)
        f1Score = f1_score(yTest, predicted, average='weighted')

        formattedTrainTime = str("{:.3f}s".format(trainTime))
        formattedPredictionTime = str("{:.3f}s".format(predictionTime))
        formattedAccuracyScore = str("{:.3f}%".format(accuracyScore * 100))
        formattedF1Score = str("{:.3f}%".format(f1Score * 100))

        print('Training time:', formattedTrainTime)
        print('Testing time:', formattedPredictionTime)
        print()

        print('Accuracy:', formattedAccuracyScore)
        print('F1 score:', formattedF1Score)
        print('------------------------------------')

        result = {
            'datasetName': datasetName,
            'algorithmName': 'Perceptron with "' + str(alpha) + '" alpha',
            'alpha': alpha,
            'accuracyScore': formattedAccuracyScore,
            'f1Score': formattedF1Score,
            'trainTime': formattedTrainTime,
            'predictionTime': formattedPredictionTime
        }
        results.append(result)

        if datasetName != 'HR' and datasetName != 'Census Income':
            if bestResult is None or bestResult['f1Score'] < f1Score:
                bestResult['f1Score'] = f1Score
                bestResult['yTestPredicted'] = predicted
                bestResult['yTest'] = yTest
                bestResult['algorithm'] = 'Perceptron'
                bestResult['dataset'] = datasetName

    return results

### Decision tree

In [124]:
def decisionTree(datasetName, maxFeatureValues, XTrain, XTest, yTrain, yTest):
    results = []

    for maxFeatures in maxFeatureValues:
        print(datasetName)
        print('Decision Tree with max features', maxFeatures)

        decisionTreeClassifier = DecisionTreeClassifier(max_features=maxFeatures, random_state=matrikelNumber)

        # Train the classifier
        startTime = time.time()
        decisionTreeClassifier.fit(XTrain, yTrain.ravel())
        endTime = time.time()
        trainTime = endTime - startTime

        # Predict
        startTime = time.time()
        predicted = decisionTreeClassifier.predict(XTest)
        endTime = time.time()
        predictionTime = endTime - startTime

        # Effectiveness measurement
        accuracyScore = accuracy_score(yTest, predicted)
        f1Score = f1_score(yTest, predicted, average='weighted')

        formattedTrainTime = str("{:.3f}s".format(trainTime))
        formattedPredictionTime = str("{:.3f}s".format(predictionTime))
        formattedAccuracyScore = str("{:.3f}%".format(accuracyScore * 100))
        formattedF1Score = str("{:.3f}%".format(f1Score * 100))

        print('Training time:', formattedTrainTime)
        print('Testing time:', formattedPredictionTime)
        print()

        print('Accuracy:', formattedAccuracyScore)
        print('F1 score:', formattedF1Score)
        print('------------------------------------')

        result = {
            'datasetName': datasetName,
            'algorithmName': 'Decision Tree with "' + str(maxFeatures) + '" max features',
            'maxFeatures': maxFeatures,
            'accuracyScore': formattedAccuracyScore,
            'f1Score': formattedF1Score,
            'trainTime': formattedTrainTime,
            'predictionTime': formattedPredictionTime
        }
        results.append(result)

        if datasetName != 'HR' and datasetName != 'Census Income':
            if bestResult is None or bestResult['f1Score'] < f1Score:
                bestResult['f1Score'] = f1Score
                bestResult['yTestPredicted'] = predicted
                bestResult['yTest'] = yTest
                bestResult['algorithm'] = 'Decision Tree'
                bestResult['dataset'] = datasetName

    return results

### SVM (Support vector machine)

In [125]:
def supportVectorMachine(datasetName, XTrain, XTest, yTrain, yTest):
    print(datasetName)
    print('Support Vector Machine')

    results = []

    svmClassifier = make_pipeline(StandardScaler(), SVC(random_state=matrikelNumber))

    # Train the classifier
    startTime = time.time()
    svmClassifier.fit(XTrain, yTrain.ravel())
    endTime = time.time()
    trainTime = endTime - startTime

    # Predict
    startTime = time.time()
    predicted = svmClassifier.predict(XTest)
    endTime = time.time()
    predictionTime = endTime - startTime

    # Effectiveness measurement
    accuracyScore = accuracy_score(yTest, predicted)
    f1Score = f1_score(yTest, predicted, average='weighted')

    formattedTrainTime = str("{:.3f}s".format(trainTime))
    formattedPredictionTime = str("{:.3f}s".format(predictionTime))
    formattedAccuracyScore = str("{:.3f}%".format(accuracyScore * 100))
    formattedF1Score = str("{:.3f}%".format(f1Score * 100))

    print('Training time:', formattedTrainTime)
    print('Testing time:', formattedPredictionTime)
    print()

    print('Accuracy:', formattedAccuracyScore)
    print('F1 score:', formattedF1Score)
    print('------------------------------------')

    result = {
        'datasetName': datasetName,
        'algorithmName': 'SVM',
        'accuracyScore': formattedAccuracyScore,
        'f1Score': formattedF1Score,
        'trainTime': formattedTrainTime,
        'predictionTime': formattedPredictionTime
    }
    results.append(result)

    if datasetName != 'HR' and datasetName != 'Census Income':
        if bestResult is None or bestResult['f1Score'] < f1Score:
            bestResult['f1Score'] = f1Score
            bestResult['yTestPredicted'] = predicted
            bestResult['yTest'] = yTest
            bestResult['algorithm'] = 'SVM'
            bestResult['dataset'] = datasetName

    return results

### Random forests

In [126]:
def randomForest(datasetName, numberOfTrees, maxFeatureValues, XTrain, XTest, yTrain, yTest):
    results = []

    for numberOfTreeElements in numberOfTrees:
        for maxFeatureValue in maxFeatureValues:
            print(datasetName)
            print('Random forest with', numberOfTreeElements, 'trees and', maxFeatureValue, 'max features')

            randomForestClassifier = RandomForestClassifier(
                n_estimators=numberOfTreeElements,
                max_features=maxFeatureValue,
                random_state=matrikelNumber
            )

            # Train the classifier
            startTime = time.time()
            randomForestClassifier.fit(XTrain, yTrain.ravel())
            endTime = time.time()
            trainTime = endTime - startTime

            # Predict
            startTime = time.time()
            predicted = randomForestClassifier.predict(XTest)
            endTime = time.time()
            predictionTime = endTime - startTime

            # Effectiveness measurement
            accuracyScore = accuracy_score(yTest, predicted)
            f1Score = f1_score(yTest, predicted, average='weighted')

            formattedTrainTime = str("{:.3f}s".format(trainTime))
            formattedPredictionTime = str("{:.3f}s".format(predictionTime))
            formattedAccuracyScore = str("{:.3f}%".format(accuracyScore * 100))
            formattedF1Score = str("{:.3f}%".format(f1Score * 100))

            print('Training time:', formattedTrainTime)
            print('Testing time:', formattedPredictionTime)
            print()

            print('Accuracy:', formattedAccuracyScore)
            print('F1 score:', formattedF1Score)
            print('------------------------------------')

            result = {
                'datasetName': datasetName,
                'algorithmName': 'Random F. ("' + str(numberOfTreeElements) + '" trees, "' + str(
                    maxFeatureValue) + '" max feature)',
                'numberOfTreeElements': numberOfTreeElements,
                'maxFeatureValue': maxFeatureValue,
                'accuracyScore': formattedAccuracyScore,
                'f1Score': formattedF1Score,
                'trainTime': formattedTrainTime,
                'predictionTime': formattedPredictionTime
            }
            results.append(result)

            if datasetName != 'HR' and datasetName != 'Census Income':
                if bestResult is None or bestResult['f1Score'] < f1Score:
                    bestResult['f1Score'] = f1Score
                    bestResult['yTestPredicted'] = predicted
                    bestResult['yTest'] = yTest
                    bestResult['algorithm'] = 'Random Forest'
                    bestResult['dataset'] = datasetName

    return results

## Apply Classifiers

Now we apply our classifiers to the datasets.

In [127]:
testSplitSize = 0.33

kNNNeighbours = [2, 4, 6]
perceptronAlphaValues = [0.0001, 0.001, 0.01]
decisionTreeMaxFeatureValues = [None, 'sqrt', 'log2']
randomForestTrees = [10, 50, 100]
randomForestMaxFeatureValues = ['sqrt', 'log2']

censusIncomeAggregatedResults = []
hrAggregatedResults = []
musicBmpAggregatedResults = []
musicBmpStatisticsAggregatedResults = []
musicChromaAggregatedResults = []
musicMfccAggregatedResults = []

datasets = [
    ('Census Income', censusIncomeXAxis, censusIncomeYAxis, censusIncomeAggregatedResults),
    ('HR', hrXAxis, hrYAxis, hrAggregatedResults),
    ('Music BMP', data_bpm, musicTarget, musicBmpAggregatedResults),
    ('Music BMP Statistics', data_bpm_statistics, musicTarget, musicBmpStatisticsAggregatedResults),
    ('Music Chroma', data_chroma, musicTarget, musicChromaAggregatedResults),
    ('Music MFCC', data_mfcc, musicTarget, musicMfccAggregatedResults),
]

for _, dataset in enumerate(datasets):
    datasetName = dataset[0]
    xAxis = dataset[1]
    yAxis = dataset[2]
    results = dataset[3]

    # Shuffle the data, for better results
    X, y = shuffle(xAxis, yAxis, random_state=matrikelNumber)

    trainData, testData, trainLabels, testLabels = train_test_split(
        X,
        y,
        test_size=testSplitSize,
        random_state=matrikelNumber
    )

    if datasetName == 'Census Income' or datasetName == 'HR':
        trainLabels = trainLabels.values

    kNNResults = kNearestNeighbours(
        datasetName,
        kNNNeighbours,
        trainData,
        testData,
        trainLabels,
        testLabels
    )
    results.extend(kNNResults)

    perceptronResults = perceptron(
        datasetName,
        perceptronAlphaValues,
        trainData,
        testData,
        trainLabels,
        testLabels
    )
    results.extend(perceptronResults)

    decisionTreeResults = decisionTree(
        datasetName,
        decisionTreeMaxFeatureValues,
        trainData,
        testData,
        trainLabels,
        testLabels
    )
    results.extend(decisionTreeResults)

    supportVectorMachineResults = supportVectorMachine(
        datasetName,
        trainData,
        testData,
        trainLabels,
        testLabels
    )
    results.extend(supportVectorMachineResults)

    randomForestResults = randomForest(
        datasetName,
        randomForestTrees,
        randomForestMaxFeatureValues,
        trainData,
        testData,
        trainLabels,
        testLabels
    )
    results.extend(randomForestResults)


Census Income
kNN with 2 neighbours
Training time: 0.707s
Testing time: 34.247s

Accuracy: 83.943%
F1 score: 84.432%
------------------------------------
Census Income
kNN with 4 neighbours
Training time: 0.640s
Testing time: 34.238s

Accuracy: 87.847%
F1 score: 86.309%
------------------------------------
Census Income
kNN with 6 neighbours
Training time: 0.655s
Testing time: 34.467s

Accuracy: 88.205%
F1 score: 85.552%
------------------------------------
Census Income
Perceptron with alpha 0.0001
Training time: 0.185s
Testing time: 0.012s

Accuracy: 98.103%
F1 score: 98.093%
------------------------------------
Census Income
Perceptron with alpha 0.001
Training time: 0.161s
Testing time: 0.012s

Accuracy: 98.103%
F1 score: 98.093%
------------------------------------
Census Income
Perceptron with alpha 0.01
Training time: 0.162s
Testing time: 0.012s

Accuracy: 98.103%
F1 score: 98.093%
------------------------------------
Census Income
Decision Tree with max features None
Training t

### Census Income

In [128]:
print("Census Income Results:")
censusIncomeDf = pd.DataFrame(censusIncomeAggregatedResults,
                              columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime', 'predictionTime'])
display(censusIncomeDf)

Census Income Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",83.943%,84.432%,0.707s,34.247s
1,"kNN with ""4"" neighbours",87.847%,86.309%,0.640s,34.238s
2,"kNN with ""6"" neighbours",88.205%,85.552%,0.655s,34.467s
3,"Perceptron with ""0.0001"" alpha",98.103%,98.093%,0.185s,0.012s
4,"Perceptron with ""0.001"" alpha",98.103%,98.093%,0.161s,0.012s
5,"Perceptron with ""0.01"" alpha",98.103%,98.093%,0.162s,0.012s
6,"Decision Tree with ""None"" max features",97.305%,97.323%,0.722s,0.010s
7,"Decision Tree with ""sqrt"" max features",87.902%,87.800%,0.055s,0.013s
8,"Decision Tree with ""log2"" max features",86.142%,85.820%,0.039s,0.010s
9,SVM,96.728%,96.573%,14.095s,9.431s


#### Scores

The best accuracy-score was achieved by the `Random forest with 100 trees and sqrt`, closely followed by the `Random forest with 50 trees and sqrt`. The best f1-score was also achieved by the `Random forest with 100 trees and sqrt`.

The second-best results of the accuracy-score and f1-score were achieved by the `Perceptron`. All three alpha values achieved the same results in both accuracy- and f1-score.

The next best accuracy- and f1-score results were achieved by the `Decision tree with "None" max features`.

The worst accuracy-score was achieved by `kNN with 2 neighbours`, `Decision tree with "log2" max-features` was a bit better.

The worst f1-score was achieved by `kNN with 2 neighbours`, `kNN with 6 neighbours` was a bit better.

Overall the scores were very good for this dataset, because all values are above 80%.

#### Training and test-times

Most algorithms had a very short training time. Only 4 executions exceeded 1 second. The `Support Vector Machine` had the longest training time with more than 13 seconds.
The training time of the `Random forests` were very short and they achieved the best scores. The training time increases with more trees being added. The training times of the `Perceptron` were also very short and it receives very good scores.


The prediction-time was very high for the `kNN`. They exceed the other times many times. They next worst test-times were achieved by the `SVM`. The other testing-times are very good, because they are all below 1 second.

### HR

In [129]:
print("HR Results:")
hrDf = pd.DataFrame(hrAggregatedResults,
                    columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime', 'predictionTime'])
display(hrDf)

HR Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",58.252%,58.276%,0.005s,0.015s
1,"kNN with ""4"" neighbours",67.961%,67.145%,0.005s,0.014s
2,"kNN with ""6"" neighbours",69.903%,67.764%,0.005s,0.015s
3,"Perceptron with ""0.0001"" alpha",94.175%,94.150%,0.004s,0.004s
4,"Perceptron with ""0.001"" alpha",94.175%,94.150%,0.004s,0.002s
5,"Perceptron with ""0.01"" alpha",94.175%,94.150%,0.005s,0.003s
6,"Decision Tree with ""None"" max features",99.029%,99.028%,0.004s,0.002s
7,"Decision Tree with ""sqrt"" max features",85.437%,85.453%,0.003s,0.003s
8,"Decision Tree with ""log2"" max features",64.078%,63.872%,0.004s,0.004s
9,SVM,65.049%,61.398%,0.017s,0.014s


#### Scores

The best accuracy-score was achieved by the `Decision Tree with "None" max-features`, closely followed by the `Random forest with 100 trees and sqrt`. The best f1-score was also achieved by the `Decision Tree with "None" max features`, followed by the `Random forest with 100 trees and sqrt`.

The next best results of the accuracy-score and f1-score were achieved by the `Perceptron`. All three alpha values achieved the same results in both accuracy- and f1-score.

The worst accuracy-score was achieved by `kNN with 2 neighbours`, followed by the `Decision tree with "log2" max-features`.

The worst f1-score was achieved by `kNN with 2 neighbours`, followed by the `SVM`.

#### Training and test-times

All algorithms have a very short training times. The `Random Forest with 100 trees and "sqrt" max-features` had the longest training time.
The training time of the best `Decision Tree with "None" max-features` was very short and achieved the best scores. The training times of the `Perceptron` were also very short, and it receives very good scores.


The prediction-time was very high for the `kNN`. They exceed the other times many times. They next worst test-times were achieved by the `svm`. The other testing-times are very good, because they are all below 1 second.

The prediction-times are the highest for the `Random-forests with 100 trees`, followed the by the `kNN`. The best time in combination with scores was achieved by the perceptron.


### Comparison

For the HR dataset the `Decision Tree with "None" max-features` delivered the best accuracy- and f1-score. For the Census Income dataset the `Random forest with 100 trees and sqrt` delivered the best results.

### Music BMP

In [130]:
print("Music BMP Results:")
musicBmpDf = pd.DataFrame(musicBmpAggregatedResults,
                          columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime', 'predictionTime'])
display(musicBmpDf)

Music BMP Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",13.333%,10.423%,0.002s,0.006s
1,"kNN with ""4"" neighbours",13.333%,10.292%,0.001s,0.007s
2,"kNN with ""6"" neighbours",12.727%,11.450%,0.001s,0.006s
3,"Perceptron with ""0.0001"" alpha",8.182%,1.238%,0.007s,0.000s
4,"Perceptron with ""0.001"" alpha",8.182%,1.238%,0.007s,0.000s
5,"Perceptron with ""0.01"" alpha",8.182%,1.238%,0.007s,0.000s
6,"Decision Tree with ""None"" max features",17.273%,13.610%,0.002s,0.000s
7,"Decision Tree with ""sqrt"" max features",17.273%,13.610%,0.001s,0.000s
8,"Decision Tree with ""log2"" max features",17.273%,13.610%,0.001s,0.000s
9,SVM,20.000%,10.942%,0.027s,0.012s


#### Scores

The best accuracy-score was achieved by the `Support Vector Machine`, closely followed the `Decision Trees`. The best f1-score was achieved by the `Random Forests with 10 trees and "sqrt", and "log2"`.

The next best results of the accuracy-score and f1-score were achieved by the `kNN`.

The worst accuracy-score was achieved by the `Perceptron`. The worst f1-score was achieved by the `Perceptron`.

Overall the scores were not very good.

#### Training and test-times

All algorithms have a very short training times. The `Random Forest with 100 trees and "sqrt" and "log2" max-features` had the longest training times.
The training time of the best `SVM` was very short and achieved the best scores.

The prediction times are also very short for all datasets.


### Music BMP Statistics

In [131]:
print("Music BMP Statistics Results:")
musicBmpStatisticsDf = pd.DataFrame(musicBmpStatisticsAggregatedResults,
                                    columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime',
                                             'predictionTime'])
display(musicBmpStatisticsDf)

Music BMP Statistics Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",14.545%,13.974%,0.002s,0.007s
1,"kNN with ""4"" neighbours",15.455%,15.583%,0.001s,0.007s
2,"kNN with ""6"" neighbours",15.152%,14.704%,0.001s,0.007s
3,"Perceptron with ""0.0001"" alpha",8.182%,1.238%,0.007s,0.001s
4,"Perceptron with ""0.001"" alpha",8.182%,1.238%,0.007s,0.001s
5,"Perceptron with ""0.01"" alpha",8.182%,1.238%,0.007s,0.000s
6,"Decision Tree with ""None"" max features",16.667%,16.608%,0.005s,0.001s
7,"Decision Tree with ""sqrt"" max features",22.424%,22.204%,0.003s,0.001s
8,"Decision Tree with ""log2"" max features",21.212%,21.201%,0.003s,0.001s
9,SVM,22.424%,19.081%,0.029s,0.013s


#### Scores

The best accuracy-score was achieved by the `Random Forest with 50 trees and "sqrt" max-feature`, closely followed the `Random Forest with 10 trees and "log2" max-feature`. The best f1-score was achieved by the `Random Forest with 50 trees and "sqrt" max-feature`.

The next best accuracy-score and f1-score of another algorithm were achieved by the `Decision Tree with "sqrt"`.

The worst accuracy-score was achieved by the `Perceptron`. The worst f1-score was also achieved by the `Perceptron`.

Overall the scores were not very good.

#### Training and test-times

All algorithms have a very short training times. The `Random Forest with 100 trees and "sqrt" and "log2" max-features` had the longest training times.
The training time of the best `Random Forest with 50 trees and "sqrt" max-feature` was very short and achieved the best scores.

The prediction times are also very short for all datasets.


### Music Chroma Results

In [132]:
print("Music Chroma Results:")
musicChromaDf = pd.DataFrame(musicChromaAggregatedResults,
                             columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime', 'predictionTime'])
display(musicChromaDf)

Music Chroma Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",36.364%,34.105%,0.004s,0.024s
1,"kNN with ""4"" neighbours",35.152%,34.457%,0.003s,0.026s
2,"kNN with ""6"" neighbours",35.152%,34.102%,0.004s,0.026s
3,"Perceptron with ""0.0001"" alpha",25.758%,24.170%,0.015s,0.001s
4,"Perceptron with ""0.001"" alpha",25.758%,24.170%,0.015s,0.000s
5,"Perceptron with ""0.01"" alpha",25.758%,24.170%,0.015s,0.000s
6,"Decision Tree with ""None"" max features",35.758%,36.099%,0.030s,0.000s
7,"Decision Tree with ""sqrt"" max features",30.909%,31.706%,0.004s,0.000s
8,"Decision Tree with ""log2"" max features",32.424%,32.873%,0.003s,0.000s
9,SVM,46.667%,46.306%,0.033s,0.020s


#### Scores

The best accuracy-score was achieved by the `Random Forest with 100 trees and "log2" max-feature`, directly followed by the `Random Forest with 50 trees and "sqrt" max-feature`, `Random Forest with 50 trees and "log2" max-feature`. The best f1-score was achieved by the `Random Forest with 100 trees and "log2" max-feature`.

The next best accuracy-score and f1-score of another algorithm were achieved by the `SVM`.

The worst accuracy-score was achieved by the `Perceptron`. The worst f1-score was also achieved by the `Perceptron`.

Overall the scores are not very good, but they are better, than the last two datasets.

#### Training and test-times

All algorithms have a very short training times. The `Random Forest with 100 trees and "sqrt" and "log2" max-features` had the longest training times.
The training time of the `SVM` was very short and achieved very good scores.

The prediction times are also very short for all datasets, were the `SVM` has a slightly longer runtime.

### Music MFCC

In [133]:
print("Music MFCC Results:")
musicMfccDf = pd.DataFrame(musicMfccAggregatedResults,
                           columns=['algorithmName', 'accuracyScore', 'f1Score', 'trainTime', 'predictionTime'])
display(musicMfccDf)

Music MFCC Results:


Unnamed: 0,algorithmName,accuracyScore,f1Score,trainTime,predictionTime
0,"kNN with ""2"" neighbours",32.424%,33.023%,0.003s,0.011s
1,"kNN with ""4"" neighbours",36.364%,37.555%,0.003s,0.012s
2,"kNN with ""6"" neighbours",37.273%,37.295%,0.003s,0.012s
3,"Perceptron with ""0.0001"" alpha",25.758%,18.026%,0.014s,0.002s
4,"Perceptron with ""0.001"" alpha",25.758%,18.026%,0.014s,0.001s
5,"Perceptron with ""0.01"" alpha",25.758%,18.026%,0.015s,0.001s
6,"Decision Tree with ""None"" max features",45.455%,46.537%,0.047s,0.000s
7,"Decision Tree with ""sqrt"" max features",38.182%,38.726%,0.006s,0.000s
8,"Decision Tree with ""log2"" max features",41.515%,41.691%,0.004s,0.000s
9,SVM,70.303%,70.375%,0.032s,0.019s


#### Scores

The best accuracy-score was achieved by the `SVM`, directly followed by the `Random Forest with 100 trees and "log2" max-feature`, `Random Forest with 100 trees and "sqrt" max-feature`. The best f1-score was achieved by the `SVM`, followed by the `Random Forest with 100 trees and "log2" max-feature`, `Random Forest with 100 trees and "sqrt" max-feature`.

The next best accuracy-score and f1-score of another algorithm were achieved by the `Decision Tree with "None" max-features`, with a massive gap to the other two.

The worst accuracy-score was achieved by the `Perceptron`. The worst f1-score was also achieved by the `Perceptron`.

Overall the scores are very average, but better than the last music-dataset.

#### Training and test-times

All algorithms have a very short training times. The `Random Forest with 100 trees and "sqrt"` had the longest training times.
The training time of the `SVM` was very short and achieved the best scores.

The prediction times are also very short for all datasets.

### Comparison

The different extracted features used for training the models, received very different results.

**Beats per minute** received very bad classification results. The accuracy-scores span from 8% to 20%. The f1-scores span from 1.2% to ~15%.

**Beats per minute statistics** got slightly better results. The accuracy-scores span from 8% to ~26%. The f1-scores span from 1.2% to ~25%.

**Music Chroma results** got much better results. The accuracy-scores span from ~25% to ~47%. The f1-scores span from 24% to 47%.

**Music MFCC** got even better results. The accuracy scores span from ~26% to 70%. The f1-scores span from 18% to 70%.

The training times and prediction-times are very fast for all four datasets. A second was never exceeded for all algorithms.

## Confusion Matrix


In [134]:
yTest = le.inverse_transform(bestResult['yTest'])
yTestPredicted = le.inverse_transform(bestResult['yTestPredicted'])

conf_matrix = pd.DataFrame(
    confusion_matrix(
        yTest,
        yTestPredicted,
        labels=list(le.classes_),
        normalize='true'
    ),
    index=list(le.classes_),
    columns=list(le.classes_)
)

conf_matrix = round(conf_matrix, 3)

print('Dataset: ' + bestResult['dataset'])
print('Best performing algorithm: ' + bestResult['algorithm'])
print('f1 measure: ' + str(bestResult['f1Score']) + '\n')
display(conf_matrix)

Dataset: Music MFCC
Best performing algorithm: SVM
f1 measure: 0.7037468748954211



Unnamed: 0,blues,classical,country,disco,hiphop,jazz,metal,pop,reggae,rock
blues,0.758,0.0,0.091,0.0,0.0,0.0,0.03,0.0,0.0,0.121
classical,0.0,0.917,0.0,0.0,0.0,0.083,0.0,0.0,0.0,0.0
country,0.0,0.0,0.727,0.03,0.0,0.03,0.03,0.061,0.0,0.121
disco,0.067,0.0,0.0,0.667,0.0,0.0,0.033,0.033,0.067,0.133
hiphop,0.027,0.0,0.0,0.135,0.459,0.0,0.054,0.108,0.189,0.027
jazz,0.0,0.067,0.0,0.0,0.0,0.833,0.0,0.0,0.033,0.067
metal,0.059,0.0,0.059,0.029,0.029,0.0,0.765,0.0,0.0,0.059
pop,0.0,0.0,0.026,0.105,0.026,0.0,0.0,0.789,0.026,0.026
reggae,0.031,0.0,0.062,0.094,0.094,0.031,0.031,0.062,0.562,0.031
rock,0.0,0.0,0.037,0.222,0.0,0.111,0.074,0.037,0.0,0.519


The best results were achieved with the `SVM` in the `MFCC`-dataset. The best result was achieved in the genre `classic`. Round about 92% of the tested songs were classified correctly. The other 8% were classified as jazz songs. Aside from jazz no other genre was identified for classic songs.

Hiphop was the genre, which was classified the worst. Only 45% were identified correctly. It had the most problems to distinct hiphop songs from reggae, disco and pop songs. About 19% were identified as reggae songs.