In [23]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import plot_confusion_matrix, confusion_matrix
from seaborn import heatmap


Data Prep:

In [24]:
df = pd.read_csv("Data\\features_3_sec.csv")
y = np.asarray(df["label"])
df = df.drop(columns=["label", "filename"])
X = np.asarray(df)

SPLIT = 0.2
SEED = 425

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = SPLIT)

Question 1:

In [None]:
# Run both before and after running the question 2 code to list the data before and after processing, 
# rather than writing the same code twice
X_features = np.swapaxes(X_train, 0,1)
for feature in X_features:
    print("Mean: ", np.mean(feature),"\n    Standard Deviation: ", np.std(feature))  

Question 2:

In [26]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


Question 3:

In [None]:
splits = KFold(n_splits=3)
num = [50, 100, 200, 500]
lrs = [0.0001, 0.001, 0.01, 0.1, 1]
map_info = []

for rate in lrs:
    for n in num:
        fold_scores = []
        for train_index, val_index in splits.split(X_train):
            nn = MLPClassifier(n, 'tanh', solver ='sgd', learning_rate_init=rate, random_state=SEED)
            X_subtrain = X_train[train_index]
            X_val = X_train[val_index]
            y_subtrain = y_train[train_index]
            y_val = y_train[val_index]
            nn.fit(X_subtrain, y_subtrain)
            fold_scores.append(accuracy_score(y_val, nn.predict(X_val)))
        map_info.append(np.mean(fold_scores))
        print(map_info[-1])



Question 3.1: Heatmap

In [None]:
map_square = np.empty((5,4))
for i in range(20):
    map_square[i//4][i%4] = map_info[i]

fig, ax = plt.subplots()
ax = heatmap(map_square, xticklabels=num, yticklabels=lrs)
plt.ylabel("Learning Rate")
plt.xlabel("Number of Neurons")
plt.title("Heatmap of NN Hyperparameters")
# cbar = plt.colorbar()
plt.show()

Question 4:

In [None]:
structs = [(100), (100,100), (100,100,100), (200),
(200,100), (200,100,100), (500), (500,200), (500,200,100)]
struct_info = []
for struct in structs:
    fold_scores = []
    for train_index, val_index in splits.split(X_train):
        nn = MLPClassifier(struct, 'tanh', solver ='sgd', learning_rate_init=0.1, random_state=SEED)
        X_subtrain = X_train[train_index]
        X_val = X_train[val_index]
        y_subtrain = y_train[train_index]
        y_val = y_train[val_index]
        nn.fit(X_subtrain, y_subtrain)
        fold_scores.append(accuracy_score(y_val, nn.predict(X_val)))
    struct_info.append(np.mean(fold_scores))
for i in range(9):
    print(structs[i], ":\n   ", struct_info[i])

Question 5:

In [None]:
np.random.seed(SEED)
states = np.random.randint(low=1000,size=10, )

train_acc = []
test_acc = []
for state in states:
    nn = MLPClassifier((500,200), 'tanh', solver ='sgd', learning_rate_init=0.1, random_state=state, max_iter=1000) 
    nn.fit(X_train, y_train)
    train_acc.append(accuracy_score(y_train, nn.predict(X_train)))
    test_acc.append(accuracy_score(y_test, nn.predict(X_test)))
    plt.plot(nn.loss_curve_)
plt.title("Loss curves")
plt.xlabel("Iterations")
plt.ylabel("Loss")
for i in range(10):
    print(states[i], ":\n    Train:", train_acc[i], "\n    Test: ", test_acc[i])

Question 6:

In [None]:
nn = MLPClassifier((500,200), 'tanh', solver ='sgd', learning_rate_init=0.1, random_state=SEED, max_iter=1000) 
nn.fit(X_train, y_train)

matrix = confusion_matrix(y_test, nn.predict(X_test))
plot_confusion_matrix(nn, X_test, y_test, xticks_rotation=45, include_values=False)

print(matrix)
plt.title("Confusion Matrix")
plt.show()