In [44]:
import exercise2_config as config
import pandas as pd
import numpy as np


train_images = pd.read_csv(config.TRAIN_DATA_FILE, header=None)
test_images = pd.read_csv(config.TEST_DATA_FILE, header=None)

train_data = np.array(train_images.iloc[:,1:])
train_labels = np.array(train_images.iloc[:,0])
test_data = np.array(test_images.iloc[:,1:])
test_labels = np.array(test_images.iloc[:,0])


# REMOVE THIS
train_data = train_data[:100]
train_labels = train_labels[:100]
test_data = test_data[:100]
test_labels = test_labels[:100] 

In [45]:
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.exceptions import ConvergenceWarning
ConvergenceWarning('ignore')


def algorithm_pipeline(X_train_data, X_test_data, y_train_data, 
                       model, param_grid, cv=10, scoring_fit='accuracy'):
    # What value should we use for scoring_fit, cv and activation?
    gs = GridSearchCV(
        estimator=model,
        param_grid=param_grid, 
        cv=cv, 
        n_jobs=-1, 
        scoring=scoring_fit,
        verbose=0
    )
    fitted_model = gs.fit(X_train_data, y_train_data)
    pred = fitted_model.predict(X_test_data)
    
    return fitted_model, pred


mlp = MLPClassifier(random_state=1)
parameter_space = {
    'max_iter' : np.linspace(10, 100, 5, dtype=int),
    'hidden_layer_sizes' : np.linspace(10, 100, 5, dtype=int),
    'activation' : ['logistic'],
    'solver' : ['sgd'],
    'learning_rate_init' : np.linspace(0.001, 0.1, 5), #  Only used when solver=’sgd’ or ‘adam’.
    'learning_rate' : ['adaptive'] # Only used when solver='sgd'
}
clf, pred = algorithm_pipeline(train_data, test_data, train_labels, mlp, parameter_space, 5)



In [60]:
print(clf.best_estimator_)
print(clf.best_params_)
print(clf.best_score_)

MLPClassifier(activation='logistic', hidden_layer_sizes=100,
              learning_rate='adaptive', learning_rate_init=0.045000000000000005,
              max_iter=70, random_state=1, solver='sgd')
{'activation': 'logistic', 'hidden_layer_sizes': 100, 'learning_rate': 'adaptive', 'learning_rate_init': 0.045000000000000005, 'max_iter': 70, 'solver': 'sgd'}
-2.41


In [62]:
class Result:
  def __init__(self, hidden_layer_size, learning_rate, max_iter, accuracy):
    self.hidden_layer_size = hidden_layer_size
    self.learning_rate = learning_rate
    self.max_iter = max_iter
    self.accuracy = accuracy

results = []
for index in range(len(clf.cv_results_['param_hidden_layer_sizes'])):
  result = Result(clf.cv_results_['param_hidden_layer_sizes'][index],
                  clf.cv_results_['param_learning_rate_init'][index],
                  clf.cv_results_['param_max_iter'][index],
                  clf.cv_results_['mean_test_score'][index])
  results.append(result)


# Report

In [93]:
from pylatex import Document, Command, LongTable
from pylatex.utils import NoEscape

doc = Document(page_numbers=False)

# Add title
doc.preamble.append(Command('title', 'Exercise 2b - MLP'))
doc.preamble.append(Command('date', ''))
doc.append(NoEscape(r'\maketitle'))


# Add table with results
with doc.create(LongTable('l l l l l')) as data_table:
  data_table.add_hline()
  data_table.add_row(['Hidden Layer Size', 'Learning Rate', 'Max Iterations', 'Accuracy', 'Best?'])
  data_table.add_hline()
  data_table.end_table_header()
  data_table.add_hline()

  for result in results:
    row = [result.hidden_layer_size,
           round(result.learning_rate, 5),
           result.max_iter,
           round(result.accuracy, 5), 
           'YES' if round(clf.best_score_, 5) == round(result.accuracy, 5) else ''] 

    data_table.add_row(row)
    data_table.add_hline()

  data_table.end_table_last_footer()
  

try:
  doc.generate_pdf('E2b')
  doc.generate_tex('E2b')
except:
  # The generate_pdf sometimes fails and prints some message, but the pdf is actually generated. 
  # Not worth debugging :)
  pass
