In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
# visualise 5 random rows from the iris dataset
data = pd.read_csv("iris.csv")
print(data.shape)
data.iloc[np.random.permutation(data.shape[0])[:5]]

(150, 5)


Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
35,5.0,3.2,1.2,0.2,setosa
55,5.7,2.8,4.5,1.3,versicolor
10,5.4,3.7,1.5,0.2,setosa
117,7.7,3.8,6.7,2.2,virginica
44,5.1,3.8,1.9,0.4,setosa


In [3]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neural_network import MLPClassifier

In [4]:
# possible hidden layer sizes to consider
# single layer
hidden_layer_sizes = [(i, ) for i in range(5, 15)]

# two layers
hidden_layer_sizes.extend([(i, j) for i in range(5, 15, 3) for j in range(5, 15, 3)])

hidden_layer_sizes

[(5,),
 (6,),
 (7,),
 (8,),
 (9,),
 (10,),
 (11,),
 (12,),
 (13,),
 (14,),
 (5, 5),
 (5, 8),
 (5, 11),
 (5, 14),
 (8, 5),
 (8, 8),
 (8, 11),
 (8, 14),
 (11, 5),
 (11, 8),
 (11, 11),
 (11, 14),
 (14, 5),
 (14, 8),
 (14, 11),
 (14, 14)]

In [5]:
MAX_ITER = 5000

In [6]:
# perform 5-fold cross validation to find the best hidden layer size from the options
res = GridSearchCV(MLPClassifier(max_iter = MAX_ITER), param_grid={'hidden_layer_sizes': hidden_layer_sizes}, cv=5).fit(data.iloc[:, 0:4], data.iloc[:, 4])

In [8]:
X_train, X_test, y_train, y_test = train_test_split(data.iloc[:,0:4], data.iloc[:,4], test_size=0.25, random_state=0)

In [9]:
res.best_params_, res.best_score_, res.score(X_test, y_test), res.classes_

({'hidden_layer_sizes': (6,)},
 0.9866666666666667,
 0.9736842105263158,
 array(['setosa', 'versicolor', 'virginica'], dtype='<U10'))

In [28]:
# get best 5 models
best = res.cv_results_['rank_test_score'].argsort()
indices = np.append(best[:5], best[-1])
params = [d['hidden_layer_sizes'] for d in np.array(res.cv_results_['params'])[indices]]
params

[(5, 11), (6,), (14, 8), (10,), (8, 14), (5,)]

In [30]:
for p in params:
	print(f"training model with hidden layer sizes {p}")
	model = MLPClassifier(hidden_layer_sizes = p, max_iter = MAX_ITER).fit(X_train, y_train)
	print(f"accuracy: {model.score(X_test, y_test)}")
	for i, coef in enumerate(model.coefs_):
		print(f"shape of layer {i} matrix: {coef.shape}")

	# output model structure, trained weights and biases
	# TODO: binary file
	fname = f'trained_iris_model_{"_".join([str(s) for s in p])}.txt'
	with open(f"models/{fname}", 'w') as f:
		f.write(f"{p}\n")
		for weights, biases in zip(model.coefs_, model.intercepts_):
			f.write(f"{weights}\n")
			f.write(f"{biases}\n")
	print()

training model with hidden layer sizes (5, 11)
accuracy: 0.9473684210526315
shape of layer 0 matrix: (4, 5)
shape of layer 1 matrix: (5, 11)
shape of layer 2 matrix: (11, 3)

training model with hidden layer sizes (6,)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 6)
shape of layer 1 matrix: (6, 3)

training model with hidden layer sizes (14, 8)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 14)
shape of layer 1 matrix: (14, 8)
shape of layer 2 matrix: (8, 3)

training model with hidden layer sizes (10,)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 10)
shape of layer 1 matrix: (10, 3)

training model with hidden layer sizes (8, 14)
accuracy: 0.9736842105263158
shape of layer 0 matrix: (4, 8)
shape of layer 1 matrix: (8, 14)
shape of layer 2 matrix: (14, 3)

training model with hidden layer sizes (5,)
accuracy: 0.2894736842105263
shape of layer 0 matrix: (4, 5)
shape of layer 1 matrix: (5, 3)

