In [1]:
# Install all the packages
!pip install numpy
!pip install pandas
!pip install scikit-learn



In [2]:
# Import all the packages
import numpy as np
import pandas as pd
import sklearn
from sklearn import preprocessing
from sklearn.model_selection import train_test_split  
from sklearn.neural_network import MLPClassifier 
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix 

url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"

In [3]:
# Let's start by naming the features
names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'Class']

In [4]:
# Reading the dataset through pandas function
iris_data = pd.read_csv( url , names = names )
print(iris_data)

     sepal-length  sepal-width  petal-length  petal-width           Class
0             5.1          3.5           1.4          0.2     Iris-setosa
1             4.9          3.0           1.4          0.2     Iris-setosa
2             4.7          3.2           1.3          0.2     Iris-setosa
3             4.6          3.1           1.5          0.2     Iris-setosa
4             5.0          3.6           1.4          0.2     Iris-setosa
..            ...          ...           ...          ...             ...
145           6.7          3.0           5.2          2.3  Iris-virginica
146           6.3          2.5           5.0          1.9  Iris-virginica
147           6.5          3.0           5.2          2.0  Iris-virginica
148           6.2          3.4           5.4          2.3  Iris-virginica
149           5.9          3.0           5.1          1.8  Iris-virginica

[150 rows x 5 columns]


In [6]:
# Analyzing the table 
iris_data_analyze = iris_data.info()
print(iris_data_analyze)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal-length  150 non-null    float64
 1   sepal-width   150 non-null    float64
 2   petal-length  150 non-null    float64
 3   petal-width   150 non-null    float64
 4   Class         150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB
None


In [11]:
# Checking for missing values 
missing_values = iris_data.isnull().sum()
print(missing_values)

sepal-length    0
sepal-width     0
petal-length    0
petal-width     0
Class           0
dtype: int64


In [13]:
# Assigning the inputs with feature variables
inputs = iris_data.drop(columns = ['Class'])
# Assigning the outputs with targret variables 
outputs = iris_data['Class']

In [15]:
# Displaying the top section of inputs 
inputs.head()

Unnamed: 0,sepal-length,sepal-width,petal-length,petal-width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [17]:
# Displaying the top section of outputs 
outputs.head()

0    Iris-setosa
1    Iris-setosa
2    Iris-setosa
3    Iris-setosa
4    Iris-setosa
Name: Class, dtype: object

In [19]:
# Converting into one dimensinol numpy array using numpy 
outputs_1d = outputs.values

In [21]:
# Converting non-numerical values into numerical values using LabelEncoder
# Outputs(target variables) data structure is object type
le = preprocessing.LabelEncoder()
outputs_encoded = le.fit_transform(outputs_1d)
# Displaying the converted values 
print(outputs_encoded)

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2
 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
 2 2]


In [23]:
# Spliting dataset for training(80%) and testing(20%)
inputs_train, inputs_test, outputs_encoded_train, outputs_encoded_test = train_test_split(inputs, outputs_encoded, test_size = 0.20)


In [25]:
# Normalizing the inputs data for both training and testing 
# Target variables are not normalized
# For Normalization we are using standardization(Z-score normalization) 
# Initialing the scaler
scaler = preprocessing.StandardScaler()
# Scalering the inputs data (features variables) of training 
inputs_train_normalized = scaler.fit_transform(inputs_train)
# Scalering the inputs data (features variables) of testing 
inputs_test_normalized = scaler.fit_transform(inputs_test)
# Displaying the size of Normalized data for training
print(inputs_train_normalized.size)
# Displaying the size of Normalized data for testing
print(inputs_test_normalized.size)

480
120


In feature variables there are 150 rows and 4 columns. 
So the total number of inputs data are 600(150*4).
Therefore, training inputs = 480 (600 * 0.8) and testing inputs = 120 (600 * 0.2).
Target variables are not normalized.
After normalization, the data structure is always converted to numpy array. 

In [28]:
# Using MLP: Multilayer Perceptron by sklearn
mlp = MLPClassifier(solver='lbfgs', alpha = 1e-2, activation = 'relu', hidden_layer_sizes = (14,7,3))  


In [30]:
# Training the model
mlp.fit(inputs_train_normalized, outputs_encoded_train )

In [32]:
# Prediction with test data 
predictions = mlp.predict(inputs_test_normalized)
# Display the prediction 
print(predictions)

[2 0 2 0 1 1 1 0 2 1 2 0 0 0 2 2 1 0 1 2 0 1 2 2 1 0 2 0 0 1]


Prediction has been made now its time to evaluate the model perfromance. There are different ways to check the performance of modal among tham we are going to use matric evaluation.
- Accuracy
- Confusion matrix
- Precision, Recall and F1-Score 

In [35]:
# Evaluation by Accuracy 
accuracy = accuracy_score(outputs_encoded_test,predictions)
# Displaying the accuracy of the MLP model by MLPClassifier
print(f'The MPL modal accuracy is: {accuracy}')

The MPL modal accuracy is: 0.9333333333333333


In [37]:
# Evaluation by Confusion matrix
conf_matric = confusion_matrix(outputs_encoded_test,predictions)
# Displaying the Confusion matrix of the MLP model by MLPClassifier
print(f'The MPL modal confusion matrix is: {conf_matric }')

The MPL modal confusion matrix is: [[11  0  0]
 [ 0  7  0]
 [ 0  2 10]]


In [39]:
# Evaluation by Precision, Recall and F1-Score (classification)
clf_report = classification_report(outputs_encoded_test,predictions)
print(f'The MPL modal Precision, Recall and F1-Score is:')
print(clf_report)

The MPL modal Precision, Recall and F1-Score is:
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        11
           1       0.78      1.00      0.88         7
           2       1.00      0.83      0.91        12

    accuracy                           0.93        30
   macro avg       0.93      0.94      0.93        30
weighted avg       0.95      0.93      0.93        30

