In [1]:
# MLP Classifier from scikit learn
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt 
from sklearn.preprocessing import OneHotEncoder

In [3]:
def get_data(x_path, y_path):
    '''
    Args:
        x_path: path to x file
        y_path: path to y file
    Returns:
        x: np array of [NUM_OF_SAMPLES x n]
        y: np array of [NUM_OF_SAMPLES]
    '''
    x = np.load(x_path)
    y = np.load(y_path)

    y = y.astype('float')
    x = x.astype('float')

    #normalize x:
    x = 2*(0.5 - x/255)
    return x, y

In [4]:
def get_metric(y_true, y_pred):
    '''
    Args:
        y_true: np array of [NUM_SAMPLES x r] (one hot) 
                or np array of [NUM_SAMPLES]
        y_pred: np array of [NUM_SAMPLES x r] (one hot) 
                or np array of [NUM_SAMPLES]
                
    '''
    results = classification_report(y_pred, y_true)
    return results

In [20]:
x_train_path = './x_train.npy'
y_train_path = './y_train.npy'

X_train, y_train = get_data(x_train_path, y_train_path)

x_test_path = './x_test.npy'
y_test_path = './y_test.npy'

X_test, y_test = get_data(x_test_path, y_test_path)

#you might need one hot encoded y in part a,b,c,d,e
label_encoder = OneHotEncoder(sparse_output = False)
label_encoder.fit(np.expand_dims(y_train, axis = -1))

y_train_onehot = label_encoder.transform(np.expand_dims(y_train, axis = -1))
y_test_onehot = label_encoder.transform(np.expand_dims(y_test, axis = -1))

In [27]:
architecures = [[512], [512, 256], [512, 256, 128], [512, 256, 128, 64]]
classifiers = []
opfile = open('part_f.txt', 'w')

In [28]:
for arch in architecures:
    clf = MLPClassifier(solver='sgd', 
                        alpha=0, 
                        hidden_layer_sizes=arch, 
                        activation='relu', 
                        batch_size=32, 
                        learning_rate='invscaling', 
                        learning_rate_init=0.01,
                        verbose=True)
    print(f"Starting architecture: {arch}")
    clf.fit(X_train, y_train)
    print(f"Trained architecture: {arch}")
    classifiers.append(clf)
    opfile.write(f"Architecture: {arch}\n")
    y_test_pred = clf.predict(X_test)
    y_train_pred = clf.predict(X_train)
    results_train = get_metric(y_train, y_train_pred)
    results_test = get_metric(y_test, y_test_pred)
    opfile.write(f"Training Data:\n{results_train}\nTesting Data:\n{results_test}\n\n")

Starting architecture: [512]
Iteration 1, loss = 0.97515181
Iteration 2, loss = 0.83174944
Iteration 3, loss = 0.76978568
Iteration 4, loss = 0.76795605
Iteration 5, loss = 0.76702198
Iteration 6, loss = 0.76641146
Iteration 7, loss = 0.76580071
Iteration 8, loss = 0.76529058
Iteration 9, loss = 0.76477996
Iteration 10, loss = 0.76417849
Iteration 11, loss = 0.76401693
Iteration 12, loss = 0.76354859
Iteration 13, loss = 0.76318742
Iteration 14, loss = 0.76293184
Iteration 15, loss = 0.76260204
Iteration 16, loss = 0.76229978
Iteration 17, loss = 0.76210018
Iteration 18, loss = 0.76190156
Iteration 19, loss = 0.76158466
Iteration 20, loss = 0.76137316
Iteration 21, loss = 0.76110987
Iteration 22, loss = 0.76088191
Iteration 23, loss = 0.76065378
Iteration 24, loss = 0.76046978
Iteration 25, loss = 0.76015724
Iteration 26, loss = 0.76007899
Iteration 27, loss = 0.76003575
Iteration 28, loss = 0.75977805
Iteration 29, loss = 0.75962438
Iteration 30, loss = 0.75941563
Iteration 31, loss =

ValueError: Classification metrics can't handle a mix of multiclass and multilabel-indicator targets

In [None]:
opfile.close()