In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# visualise 5 random rows from the iris dataset
data = pd.read_csv("iris.csv")
data['species'] = data['species'].astype('category').cat.codes
data.iloc[np.random.permutation(data.shape[0])[:5]]

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
138,6.0,3.0,4.8,1.8,2
22,4.6,3.6,1.0,0.2,0
16,5.4,3.9,1.3,0.4,0
64,5.6,2.9,3.6,1.3,1
114,5.8,2.8,5.1,2.4,2


In [3]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier

In [4]:
# possible hidden layer sizes to consider
# single layer
hidden_layer_sizes = [(i, ) for i in range(5, 15)]

# two layers
hidden_layer_sizes.extend([(i, j) for i in range(5, 15, 3) for j in range(5, 15, 3)])

hidden_layer_sizes

[(5,),
 (6,),
 (7,),
 (8,),
 (9,),
 (10,),
 (11,),
 (12,),
 (13,),
 (14,),
 (5, 5),
 (5, 8),
 (5, 11),
 (5, 14),
 (8, 5),
 (8, 8),
 (8, 11),
 (8, 14),
 (11, 5),
 (11, 8),
 (11, 11),
 (11, 14),
 (14, 5),
 (14, 8),
 (14, 11),
 (14, 14)]

In [7]:
MAX_ITER = 5000

In [8]:
# perform 5-fold cross validation to find the best hidden layer size from the options
res = GridSearchCV(MLPClassifier(max_iter = MAX_ITER), param_grid={'hidden_layer_sizes': hidden_layer_sizes}, cv=5).fit(data.iloc[:, 0:4], data.iloc[:, 4])

In [6]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,0:4], data.iloc[:,4], test_size=0.25, random_state=0)
pd.concat([X_test, y_test], axis = 1).to_csv("iris_test.csv", index = False)

In [12]:
res.best_params_, res.best_score_, res.score(X_test, y_test), res.classes_

({'hidden_layer_sizes': (8, 14)},
 0.9866666666666667,
 0.7105263157894737,
 array([0, 1, 2], dtype=int8))

In [14]:
# get best 5 models
best = res.cv_results_['rank_test_score'].argsort()
indices = np.append(best[:5], best[-1])
params = [d['hidden_layer_sizes'] for d in np.array(res.cv_results_['params'])[indices]]
params.extend([(5, 11, 8), tuple([np.random.randint(5, 30) for i in range(12)])])
params

[(8, 14),
 (14, 11),
 (5, 11),
 (14, 5),
 (11, 11),
 (11, 5),
 (5, 11, 8),
 (13, 16, 13, 28, 28, 28, 29, 29, 28, 14, 28, 19)]

In [16]:
import pickle

for p in params:
	print(f"training model with hidden layer sizes {p}")
	model = MLPClassifier(hidden_layer_sizes = p, max_iter = MAX_ITER).fit(X_train, y_train)
	print(f"accuracy: {model.score(X_test, y_test)}")
	for i, coef in enumerate(model.coefs_):
		print(f"shape of layer {i} matrix: {coef.shape}")

	# output model structure, trained weights and biases
	# TODO: binary file
	fname = f'trained_iris_model_{"_".join([str(s) for s in p])}'
	with open(f"models/{fname}.pkl", "wb") as f:
		pickle.dump(model, f)
	
	with open(f"weights/{fname}.txt", 'w') as f:
		for weights, biases in zip(model.coefs_, model.intercepts_):
			f.write(f"{weights.transpose()}\n")
			f.write(f"{biases}\n")
	
	print()

training model with hidden layer sizes (8, 14)
accuracy: 0.9473684210526315
shape of layer 0 matrix: (4, 8)
shape of layer 1 matrix: (8, 14)
shape of layer 2 matrix: (14, 3)

training model with hidden layer sizes (14, 11)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 14)
shape of layer 1 matrix: (14, 11)
shape of layer 2 matrix: (11, 3)

training model with hidden layer sizes (5, 11)
accuracy: 0.9473684210526315
shape of layer 0 matrix: (4, 5)
shape of layer 1 matrix: (5, 11)
shape of layer 2 matrix: (11, 3)

training model with hidden layer sizes (14, 5)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 14)
shape of layer 1 matrix: (14, 5)
shape of layer 2 matrix: (5, 3)

training model with hidden layer sizes (11, 11)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 11)
shape of layer 1 matrix: (11, 11)
shape of layer 2 matrix: (11, 3)

training model with hidden layer sizes (11, 5)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 11)
shape of l