In [None]:
# General Imports
import pandas as pd
import random as rd
import numpy as np
import math as m
import sklearn.cluster as skc
import sklearn.neighbors as skn
import sklearn.feature_selection as skf
import matplotlib.pyplot as plt
import scipy as sp

In [None]:
# Kicked Tensorflow to it's own cell so I can work on K-Means at work
import tensorflow as tf
from keras import layers, models

In [17]:
data = pd.read_csv('../Datasets/data.csv', header=0)
genreKey = {"blues": 0,
            "classical": 1,
            "country": 2,
            "disco": 3,
            "hiphop": 4,
            "jazz": 5,
            "metal": 6,
            "pop": 7,
            "reggae": 8,
            "rock": 9}

def dimReduce(data, dims=10):
    # Select Data
    X = np.array(data.iloc[:, 1:29])
    y = np.array(data.iloc[:, 29])
    # Make the feature selection model with ANOVA F Measure
    fs = skf.SelectKBest(score_func=skf.f_classif, k=dims)
    # Run the data through selection to obtain final set
    X_sel = fs.fit_transform(X, y)

    return X_sel, y

def prepData(data, key, split=0.2, dims= 10, doSplit=True):
    # Reduce the dimensionality of the dataset to a set number of features
    x, y = dimReduce(data, dims)
    # Define the number of elements in the test set
    splitRange = int(len(data) * split)
    # Create and randomize an array representing data ordering
    rand = [i for i in range(len(data))]
    rd.shuffle(rand)
    # Create Testing and training arrays
    x_train = np.array([])
    y_train = np.array([])
    x_test = np.array([])
    y_test = np.array([])

    if (doSplit):
        # Populate test arrays with a random split% of the whole set
        for i in range(splitRange):
            # Set the dimensions of the x_test array
            if x_test.ndim == 1:
                x_test = np.array([x[rand[i]]])
            # Append further elements to the existing array
            else:
                x_test = np.append(x_test, [x[rand[i]]], 0)
            # Add label in integer form
            y_test = np.append(y_test, key[y[rand[i]]])
            
        for i in range(splitRange, len(data)):
            # Set the dimensions of the x_train array
            if x_train.ndim == 1:
                x_train = np.array([x[rand[i]]])
            # Append further elements to the existing array
            else:
                x_train = np.append(x_train, [x[rand[i]]], 0)
            # Add label in integer form
            y_train = np.append(y_train, key[y[rand[i]]])

        return x_train, y_train, x_test, y_test
    else:
        for i in range(len(data)):
            # Add label in integer form
            y[i] = key[yrand[i]]
        return x, y

def normalize(data):
    # Array for the max value of each feature. Used in normalization
    normax = []
    # Array for the min value of each feature. Used to eliminate negatives
    normin = []

    # Populate max and min arrays
    for i in range(data.shape[1]):
        normin.append(min(data[:, i]))
        normax.append(max(data[:, i]) - normin[i])

    # Normalize each vector in the dataset
    for i in data:
        for j in range(len(i)):
            i[j] = (i[j] - normin[j]) / normax[j]

    return data

# Split the dataset into test and training sets.
# x_train, y_train, x_test, y_test = prepData(data, genreKey, dims=10)
# x_train = normalize(x_train)
# x_test = normalize(x_test)

# Unsplit Data for K-Means
x, y = prepData(data, genreKey, doSplit=False)

In [None]:
#Split the raw data into labels and features for the testing & training sets
rawData = data.values.tolist()

#Shuffle the data so it's random
rd.shuffle(rawData)

#Get the labels and relevant features
allLabels = [None] * len(rawData)
allFeatures = [None] * len(rawData)

#Label is column #30, index 29
for i in range(len(allLabels)):
    allLabels[i] = rawData[i][29]

#Need to convert them to ints
for i in range(len(allLabels)):
    label = allLabels[i]
    if(label == "blues"):
        allLabels[i] = 0
    elif(label == "classical"):
        allLabels[i] = 1
    elif(label == "country"):
        allLabels[i] = 2
    elif(label == "disco"):
        allLabels[i] = 3
    elif(label == "hiphop"):
        allLabels[i] = 4
    elif(label == "jazz"):
        allLabels[i] = 5
    elif(label == "metal"):
        allLabels[i] = 6
    elif(label == "pop"):
        allLabels[i] = 7
    elif(label == "reggae"):
        allLabels[i] = 8
    elif(label == "rock"):
        allLabels[i] = 9

#Make them the right dimensions
allLabels = tf.keras.utils.to_categorical(allLabels)

#We want the features at 3, 4, 5, 6, 7, 10, 11, 13, 15, & 17
for i in range(len(allFeatures)):
    rawItem = rawData[i][:]
    item = rawItem[3:8]
    item.append(rawItem[10])
    item.append(rawItem[11])
    item.append(rawItem[13])
    item.append(rawItem[15])
    item.append(rawItem[17])
    allFeatures[i] = item

#Make it the right type
#allFeatures = np.expand_dims(allFeatures, axis=-1)

#Determine how many elements the test set should contain
testSize = round(len(rawData) * 0.2)

#Split the sets
testFeatures = np.asarray(allFeatures[:testSize])
testLabels = np.asarray(allLabels[:testSize])
trainFeatures = np.asarray(allFeatures[testSize:])
trainLabels = np.asarray(allLabels[testSize:])

In [None]:
#MLP
mlpModel = models.Sequential()
mlpModel.add(layers.Dense(20, activation='relu', input_shape = (10,)))
mlpModel.add(layers.Dense(60))
mlpModel.add(layers.Dense(120))
mlpModel.add(layers.Dense(30))
mlpModel.add(layers.Dense(10))
mlpModel.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=["accuracy"]
)
#mlpModel.build()
#mlpModel.summary()
history = mlpModel.fit(trainFeatures, trainLabels, epochs=10, shuffle=True, validation_data=(testFeatures, testLabels))

In [18]:
# K-Means Classification
def kMeans(train, labels):
    kmeans = skc.KMeans(n_clusters=10, n_init=100, max_iter=1000, verbose=0).fit(train)
    print(labels)
    print(kmeans.labels_)

kMeans(x, y)

['blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues' 'blues'
 'blues' 'classical' 'classical' 'classical' 'classical' 'classical'
 'classical' 'classical' 'classical' 'classical' 'classical' 'classical'
 'classical' 'classical' 'classical' 'classical' 'class

NameError: name 'test' is not defined