In [1]:
import numpy as np
import pandas as pd
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.metrics import confusion_matrix, accuracy_score, mean_squared_error
from sklearn.model_selection import train_test_split, cross_val_score, GridSearchCV, RandomizedSearchCV
from sklearn.multioutput import MultiOutputRegressor

# Load Datasets

In [2]:
def load_data(file_path):
    A = np.loadtxt(file_path)
    X = A[:, :9]    # Input features
    y = A[:, 9:]    # Output labels
    return X, y

In [3]:
# Load all 3 datasets
X_final, y_final = load_data('data/tictac_final.txt')
y_final = y_final[:, 0]

X_single, y_single = load_data('data/tictac_single.txt')
y_single = y_single[:, 0]

X_multi, y_multi = load_data('data/tictac_multi.txt')

## Multilayer Perceptron Classifier

In [4]:
def mlp_clf_train(X, y, is_one_tenth = False):
    # Split into training and testing data
    if is_one_tenth == False:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.1, shuffle=True, random_state=42)

    from sklearn.preprocessing import StandardScaler 
    scaler = StandardScaler()
    scaler.fit(X_train) 
    X_train = scaler.transform(X_train)  
    X_test = scaler.transform(X_test)
    
    # Define and train model
    model = MLPClassifier(max_iter=300,solver='lbfgs', alpha=1e-5)
    model.fit(X_train, y_train)

    # Get cross validation accuracy
    val_accuracy = cross_val_score(model, X_train, y_train, cv=10, scoring="accuracy")
    val_accuracy = np.mean(val_accuracy)

    # Get test accuracy
    y_pred = model.predict(X_test)
    test_accuracy = accuracy_score(y_test, y_pred)

    # Get confusion matrix
    confusion_mtrx = confusion_matrix(y_test, y_pred, normalize="true")

    return val_accuracy, test_accuracy, confusion_mtrx

In [5]:
def print_clf_results(val_accuracy, test_accuracy, confusion_mtrx, dataset_name):
    print(f"Performance of Multilayer Perceptron Classification on {dataset_name}:")
    print("Cross Validation Accuracy = ", val_accuracy)
    print("Test Accuracy = ", test_accuracy)
    print("Confusion Matrix:")
    print(confusion_mtrx.round(decimals=3)) # Round to 3 decimal places

## Final Dataset

In [6]:
val_acc_final, test_acc_final, confusion_mtrx_final = mlp_clf_train(X_final, y_final)
print_clf_results(val_acc_final, test_acc_final, confusion_mtrx_final, "Final Dataset")

Performance of Multilayer Perceptron Classification on Final Dataset:
Cross Validation Accuracy =  0.9895762132604238
Test Accuracy =  0.9739583333333334
Confusion Matrix:
[[0.925 0.075]
 [0.    1.   ]]


In [7]:
print("Extra Credit #2 - Train the models on 1/10th of the data")
val_acc_final, test_acc_final, confusion_mtrx_final = mlp_clf_train(X_final, y_final, True)
print_clf_results(val_acc_final, test_acc_final, confusion_mtrx_final, "Final Dataset")

Extra Credit #2 - Train the models on 1/10th of the data
Performance of Multilayer Perceptron Classification on Final Dataset:
Cross Validation Accuracy =  0.8166666666666667
Test Accuracy =  0.9003476245654692
Confusion Matrix:
[[0.77  0.23 ]
 [0.029 0.971]]


## Single Dataset

In [8]:
import warnings
warnings.filterwarnings('ignore')
val_acc_single, test_acc_single, confusion_mtrx_single = mlp_clf_train(X_single, y_single)
print_clf_results(val_acc_single, test_acc_single, confusion_mtrx_single, "Single Dataset")

Performance of Multilayer Perceptron Classification on Single Dataset:
Cross Validation Accuracy =  0.9099236641221374
Test Accuracy =  0.9443173150266971
Confusion Matrix:
[[0.966 0.003 0.    0.006 0.003 0.    0.012 0.003 0.006]
 [0.012 0.929 0.006 0.012 0.012 0.006 0.012 0.    0.012]
 [0.027 0.    0.936 0.016 0.    0.    0.011 0.005 0.005]
 [0.017 0.017 0.    0.932 0.017 0.    0.    0.009 0.009]
 [0.02  0.    0.015 0.    0.955 0.01  0.    0.    0.   ]
 [0.039 0.    0.013 0.    0.    0.934 0.013 0.    0.   ]
 [0.    0.    0.01  0.01  0.    0.    0.98  0.    0.   ]
 [0.02  0.04  0.    0.1   0.    0.    0.    0.84  0.   ]
 [0.    0.011 0.034 0.022 0.    0.    0.    0.    0.933]]


In [9]:
print("Extra Credit #2 - Train the models on 1/10th of the data")
val_acc_single, test_acc_single, confusion_mtrx_single = mlp_clf_train(X_single, y_single, True)
print_clf_results(val_acc_single, test_acc_single, confusion_mtrx_single, "Single Dataset")

Extra Credit #2 - Train the models on 1/10th of the data
Performance of Multilayer Perceptron Classification on Single Dataset:
Cross Validation Accuracy =  0.6992074592074593
Test Accuracy =  0.7042062415196744
Confusion Matrix:
[[0.838 0.026 0.024 0.015 0.038 0.011 0.023 0.008 0.018]
 [0.041 0.656 0.054 0.049 0.07  0.044 0.023 0.025 0.037]
 [0.084 0.056 0.679 0.036 0.096 0.005 0.018 0.016 0.01 ]
 [0.043 0.041 0.024 0.565 0.099 0.075 0.079 0.032 0.043]
 [0.073 0.04  0.029 0.052 0.75  0.021 0.018 0.001 0.017]
 [0.093 0.062 0.062 0.056 0.003 0.579 0.075 0.037 0.031]
 [0.068 0.087 0.068 0.054 0.027 0.006 0.641 0.01  0.039]
 [0.078 0.069 0.037 0.05  0.078 0.041 0.018 0.573 0.055]
 [0.08  0.029 0.027 0.078 0.01  0.068 0.005 0.024 0.679]]


# Multilayer Perceptron Regressor

In [10]:
def finetune_reg_parameters(X_train, y_train):
    mlp_regressor = MLPRegressor()
    multioutput_regressor = MultiOutputRegressor(mlp_regressor)

    # Define the parameter grid
    param_grid = {'estimator__alpha': [1e-6],
                  'estimator__hidden_layer_sizes': (1024,),
                  'estimator__max_iter': [10000]}


    # RandomizedSearchCV to find the best value of k
    randomized_search = RandomizedSearchCV(multioutput_regressor, param_grid, n_iter=4, cv=10, scoring='neg_mean_squared_error')
    randomized_search.fit(X_train, y_train)

    # Get the best value of k
    best_params = randomized_search.best_params_

    # Remove estimator__ in front of every hyperparameter
    remove_str = "estimator__"

    for param in list(best_params.keys()):
        if remove_str in param:
            new_param = param.replace(remove_str, "")
            best_params[new_param] = best_params[param]
            del best_params[param]
    
    print(best_params)
        
    return best_params

In [13]:
def MLP_reg_train(X, y, is_one_tenth = False):
    # Split into training and testing data
    if is_one_tenth == False:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=True, random_state=42)
    else:
        X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.1, shuffle=True, random_state=42)
    
    # from sklearn.preprocessing import StandardScaler 
    # scaler = StandardScaler()
    # scaler.fit(X_train) 
    # X_train = scaler.transform(X_train)  
    # X_test = scaler.transform(X_test)
    
    best_params = finetune_reg_parameters(X_train,y_train)

    # Define and train model
    base_regressor = MLPRegressor(**best_params)
    model = MultiOutputRegressor(base_regressor)
    model.fit(X_train, y_train)

    # Get cross validation accuracy
    val_accuracy = cross_val_score(model, X_train, y_train, cv=10, scoring="r2")
    val_accuracy = np.mean(val_accuracy)

    # Get test accuracy
    test_accuracy = model.score(X_test, y_test)

    # Get RMSE
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    rmse = np.sqrt(mse)

    return val_accuracy, test_accuracy, rmse

In [14]:
def print_reg_results(val_rmse, test_accuracy, rmse, dataset_name):
    print(f"Performance of MLP Regression on {dataset_name}:")
    print("Cross Validation Accuracy = ", val_rmse)
    print("Test Accuracy = ", test_accuracy)
    print("RMSE = ", rmse)

## Multi Dataset

In [11]:
val_rmse_multi, test_acc_multi, rmse_multi = MLP_reg_train(X_multi, y_multi)
print_reg_results(val_rmse_multi, test_acc_multi, rmse_multi, "Multi Dataset")

{'estimator__max_iter': 10000, 'estimator__hidden_layer_sizes': 1024, 'estimator__alpha': 1e-06}
Performance of MLP Regression on Multi Dataset:
Cross Validation Accuracy =  0.7983083685060413
Test Accuracy =  0.819724836397738
RMSE =  0.17222943080373798


In [15]:
print("Extra Credit #2 - Train the models on 1/10th of the data")
val_rmse_multi, test_acc_multi, rmse_multi = MLP_reg_train(X_multi, y_multi, True)
print_reg_results(val_rmse_multi, test_acc_multi, rmse_multi, "Multi Dataset")

Extra Credit #2 - Train the models on 1/10th of the data
{'max_iter': 10000, 'hidden_layer_sizes': 1024, 'alpha': 1e-06}
Performance of MLP Regression on Multi Dataset:
Cross Validation Accuracy =  0.49550508790833525
Test Accuracy =  0.5261690641829009
RMSE =  0.27871836772496833
