# Day 7

## Importing libraries

In [6]:

from sklearn.datasets import load_iris
import pandas as pd
from sklearn.model_selection import cross_val_score # cross validation is used to evaluate the performance of a model
from sklearn.neural_network import MLPClassifier    # MLPClassifier is a classifier that uses multi-layer perceptron (a kind of neural network).

## Load the dataset

In [7]:
iris = load_iris()
data = pd.DataFrame(iris.data,columns = iris.feature_names)
data.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


## Ready the dataset

In [8]:
data['Species'] = pd.DataFrame(iris.target) # adding a column 'species' to the dataframe and assigning the target to it
data.head()
X = data.iloc[:,:-1]    # all rows, all columns except the last one
y = data.iloc[:,-1]     # all rows, only the last column


## Train the model

In [9]:
'''
hidden_layer_sizes = (number of neurons in the hidden layer, number of neurons in the second hidden layer, ...)
We have taken only one hidden layer with 10 neurons.
We will perform 5000 iterations duting training of th model.
activation='logistic' is the activation function used in the hidden layer. Logistic function is used because it is a sigmoid function.
solver='sgd' is the solver used to train the model. sgd stands for stochastic gradient descent.
learning_rate_init is the initial learning rate used in the optimization (here set as 0.001)
'''
model = MLPClassifier(hidden_layer_sizes=(10,),
                      max_iter=5000,
                      activation='logistic',
                      solver='sgd',
                      learning_rate_init=0.001
                      )


## Cross validate the model

In [10]:
scores = cross_val_score(model, X, y, cv = 10)  # Here we have done 10-fold cross validation. This means that we will have 10 different splits of the data.

print('Iteration\tAccuracy')
# We will print the number of iterations (10) and the score for each iteration along a table.
for idx,score in enumerate(scores):
    print('%d\t\t%0.2f'%(idx,score))

# We will print the mean accuracy and standard deviation of the scores.
print("Average Accuracy: %0.2f Standard deviation of Accuracy: %0.2f" % (scores.mean(), scores.std()))

Iteration	Accuracy
0		0.93
1		1.00
2		1.00
3		0.87
4		0.80
5		0.93
6		0.87
7		1.00
8		0.87
9		1.00
Average Accuracy: 0.93 Standard deviation of Accuracy: 0.07
