In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical
from utils.models import keras_nn
from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import accuracy_score


In [2]:
data = pd.read_csv('data/dataset1.csv').to_numpy()
X, y = data[:,:6], to_categorical(data[:,6])

# Normalize X values:

means, stds = np.zeros(6), np.zeros(6)
for i in range (X.shape[1]):
    mean, std = X[:, i].mean(), X[:, i].std()
    means[i] = mean
    stds[i] = std
    X[:, i] = (X[:, i]-mean)/std

np.savetxt('ae1220-1.txt', [means, stds])


### Determining Optimal Lambda for Ridge Regularization

In [17]:
l2_lambdas = np.logspace(-4,2,20)

kfold = KFold(n_splits=5, shuffle=True, random_state=42)

results = {}
for l2_lambda in l2_lambdas:
    fold_accuracies = []
    for train, test in kfold.split(X, y):
        X_train, X_test = X[train], X[test]
        y_train, y_test = y[train], y[test]
        model = keras_nn(input_dim=6, output_dim=2, activation='relu', hidden_nodes=4, l2_lambda=l2_lambda)
        model.compile (loss = 'categorical_crossentropy', optimizer='sgd', metrics = ['accuracy'])

        #Test model at this lambda
        model.fit(X_train, y_train, epochs = 250, batch_size = 32, verbose = 0)

        y_pred = model.predict(X[test])
        #Convert probabilities to classifications
        y_pred = [1 if pred > 0.5 else 0 for pred in y_pred[:,0]]

        #Calculate Accuracy
        fold_accuracies.append(accuracy_score(y_test[:,0], y_pred))
    
    
    mean_accuracy = np.mean(fold_accuracies)
    results[l2_lambda] = mean_accuracy
    print(f"Lambda: {l2_lambda}, Mean Validation Accuracy: {mean_accuracy}") 

best_lambda = max(results, key=results.get)
print(f"Best Lambda: {best_lambda}")

Lambda: 0.0001, Mean Validation Accuracy: 0.489
Lambda: 0.00020691380811147902, Mean Validation Accuracy: 0.49024999999999996
Lambda: 0.00042813323987193956, Mean Validation Accuracy: 0.489
Lambda: 0.0008858667904100823, Mean Validation Accuracy: 0.5029999999999999
Lambda: 0.0018329807108324356, Mean Validation Accuracy: 0.48925
Lambda: 0.00379269019073225, Mean Validation Accuracy: 0.4885
Lambda: 0.007847599703514606, Mean Validation Accuracy: 0.49024999999999996
Lambda: 0.01623776739188721, Mean Validation Accuracy: 0.489
Lambda: 0.03359818286283781, Mean Validation Accuracy: 0.48949999999999994
Lambda: 0.06951927961775606, Mean Validation Accuracy: 0.4977499999999999
Lambda: 0.14384498882876628, Mean Validation Accuracy: 0.4885
Lambda: 0.29763514416313164, Mean Validation Accuracy: 0.49375
Lambda: 0.615848211066026, Mean Validation Accuracy: 0.489
Lambda: 1.2742749857031321, Mean Validation Accuracy: 0.4885
Lambda: 2.6366508987303554, Mean Validation Accuracy: 0.4885
Lambda: 5.45559

### Varying Parameters

In [3]:
# Best lambda:  0.0008858667904100823
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
l2_lambda = 0.0008858667904100823

In [7]:
model = keras_nn(input_dim=6, output_dim=2,activation='relu', final_activation='sigmoid', hidden_layers = 3, hidden_nodes=4, l2_lambda=l2_lambda)
model.compile (loss = 'categorical_crossentropy', optimizer='sgd', metrics = ['accuracy'])

#Test model at this lambda
model.fit(X_train, y_train, epochs = 1500, batch_size = 32, verbose = 0)

y_pred = model.predict(X_test)
#Convert probabilities to classifications
y_pred = [1 if pred > 0.5 else 0 for pred in y_pred[:,0]]

#Calculate Accuracy
accuracy = accuracy_score(y_test[:,0], y_pred)
print(accuracy)

0.925


In [8]:
model.save('ae1220-1.h5')