# PERCEPTRON & MULTILAYER PERCEPTRON USING SCIKIT-LEARN

In [1]:
# Imports required packages

import pandas as pd

from sklearn.datasets import load_iris, fetch_california_housing
from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import root_mean_squared_error

from random import sample
from sklearn.neural_network import MLPClassifier, MLPRegressor

## The Perceptron
A TLU/LTU based neuron with multi-input and single output with inputs and output support number.

### Retreiving & Analyzing the Dataset

In [2]:
# Loads the iris dataset
iris = load_iris(as_frame=True)

# Shows top few instances combining corresponding target
display(iris.data.assign(target = iris.target).head())

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [3]:
# Checks the basic information about the dataset
print(iris.data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   sepal length (cm)  150 non-null    float64
 1   sepal width (cm)   150 non-null    float64
 2   petal length (cm)  150 non-null    float64
 3   petal width (cm)   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None


The above information shows the dataset has 
- 150 instances,
- 4 numerical features and
- it does not have any missing values.

### Data Preparation

In [4]:
# Splits dataset into train and test set with stratification
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42, stratify=iris.target)

In [5]:
# Initializes standard scaler to scale features
iris_std_scaler = StandardScaler()

# Fits the standard scaler and then standarizes the train set
X_train_scaled = iris_std_scaler.fit_transform(X_train)

### Modeling

**First, trains the model without feature standardization and records the performance.**

In [6]:
# Initializes Perceptron model
perceptron_clf = Perceptron(random_state=42)

# Fits the model with train dataset
perceptron_clf.fit(X_train, y_train)

0,1,2
,penalty,
,alpha,0.0001
,l1_ratio,0.15
,fit_intercept,True
,max_iter,1000
,tol,0.001
,shuffle,True
,verbose,0
,eta0,1.0
,n_jobs,


In [7]:
# Checks for the unique classes that the model has considered
print("The classes:", perceptron_clf.classes_)

The classes: [0 1 2]


In [8]:
# Predicts the class for few test instances
print("Predictions: ", perceptron_clf.predict(X_test)[:5])

Predictions:  [0 2 1 1 0]


In [9]:
# Also, shows prediction confidence score for the same test instances
print("Prediction Confidence Scores:")
print(perceptron_clf.decision_function(X_test)[:5])

Prediction Confidence Scores:
[[  19.87  -17.75 -171.31]
 [ -23.65  -12.66   75.53]
 [  -9.81    0.4    -7.03]
 [ -10.45    4.5    -7.99]
 [  21.55  -23.69 -176.59]]


In [10]:
# Evaluates the model on test set
print(f"Perceptron model's accuracy on test dataset: {perceptron_clf.score(X_test, y_test):.2f}%")

Perceptron model's accuracy on test dataset: 0.77%


**Now, trains the model with standardization features and checks if the performance improves.**

In [11]:
# Re-initializes Perceptron model
perceptron_clf = Perceptron(random_state=42)

# Fits the model with scaled train dataset
perceptron_clf.fit(X_train_scaled, y_train)

# Standardizes the test data
X_test_scaled = iris_std_scaler.transform(X_test)

# Now, evaluates the model on scaled test set
print(f"Perceptron model's accuracy on scaled test dataset: {perceptron_clf.score(X_test_scaled, y_test):.2f}%)")

Perceptron model's accuracy on scaled test dataset: 0.87%)


### Observations
_Write your observations about the above experiments on Perceptron._

## The Multilayer Perceptron (MLP)

A network made of a stack of Perceptrons with one input layer, one or more hidden layers and one output layer addresses some of weaknesses of Perceptrons.

### Regression MLPs

#### Retrieving & Analyzing the Dataset

In [12]:
# Loads the Calfornia housing dataset for regression
housing = fetch_california_housing(as_frame=True)

# Shows top few instances combining corresponding target
display(housing.data.assign(target = housing.target).head())

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,target
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


NOTE: Values in target ranges between 0.15 and 5 and each value corresponds to the average house value in units of 100,000.


In [13]:
# Checks the basic information about the dataset
print(housing.data.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20640 entries, 0 to 20639
Data columns (total 8 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   MedInc      20640 non-null  float64
 1   HouseAge    20640 non-null  float64
 2   AveRooms    20640 non-null  float64
 3   AveBedrms   20640 non-null  float64
 4   Population  20640 non-null  float64
 5   AveOccup    20640 non-null  float64
 6   Latitude    20640 non-null  float64
 7   Longitude   20640 non-null  float64
dtypes: float64(8)
memory usage: 1.3 MB
None


The above information shows the dataset has 
- more than 20,000 instances,
- 8 numerical features and
- it does not have any missing values.

In [14]:
housing.target.describe()

count    20640.000000
mean         2.068558
std          1.153956
min          0.149990
25%          1.196000
50%          1.797000
75%          2.647250
max          5.000010
Name: MedHouseVal, dtype: float64

#### Data Preparation

In [15]:
# Considering the range of the house prices, each training instance is put into a bin according it its price.
# NOTE: It will be required later during dataset splitting for test set to have same distribution as that of the training set.
target_bin = pd.cut(housing.target, bins=[0, 1.0, 2.0, 3.0, 4.0, 6.0], labels=[1, 2, 3, 4, 5])

In [16]:
#  After associating a bin with each training instance, it randomly selects
# few instances and shows this association (just for reference).
sample(list(zip(housing.target, target_bin)), 10)

[(3.955, 4),
 (0.719, 1),
 (2.1, 3),
 (1.321, 2),
 (2.325, 3),
 (3.256, 4),
 (0.469, 1),
 (2.442, 3),
 (1.432, 2),
 (3.591, 4)]

In [17]:
# Splits dataset into train and test set with stratification
X_train, X_test, y_train, y_test = train_test_split(housing.data, housing.target, random_state=42, stratify=target_bin)

In [18]:
# Initializes standard scaler to scale features
housing_std_scaler = StandardScaler()

# Fits the standard scaler and then standarizes the train set
X_train_scaled = housing_std_scaler.fit_transform(X_train)

#### Modeling

In [19]:
# Initialize multi-layer perceptron regressor with 3 hidden layers with 50 TLUs per layer
mlp_reg = MLPRegressor(hidden_layer_sizes=[50, 50, 50], max_iter=400, random_state=42)

# Fits the model with scaled train set
# NOTE: This step may require approximately one minute to complete
mlp_reg.fit(X_train_scaled, y_train)

0,1,2
,loss,'squared_error'
,hidden_layer_sizes,"[50, 50, ...]"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,400


In [20]:
# Standardizes the test data
X_test_scaled = housing_std_scaler.transform(X_test)

In [21]:
# Performs predictions on test dataset
predictions = mlp_reg.predict(X_test_scaled)

In [22]:
# Measure model performance on test dataset
rmse = root_mean_squared_error(y_test, predictions)
print("RMSE of MLP Regressor:", round(rmse, 2))

RMSE of MLP Regressor: 0.57


#### Observations
_Write your observations about the above experiments on MLP regressor._

### Classification MLPs

Performs a multi-class classification task with iris data.

#### Data Preparation

In [23]:
# Splits dataset into train and test set with stratification
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.2, random_state=42, stratify=iris.target)

In [24]:
# Initializes standard scaler to scale features
iris_std_scaler = StandardScaler()

# Fits the standard scaler and then standarizes the train set
X_train_scaled = iris_std_scaler.fit_transform(X_train)

#### Modeling

In [25]:
# Initializes MLP classifier
mlp_clf = MLPClassifier(hidden_layer_sizes=[10, 10], max_iter=1000, random_state=42)

# Fits the model with scaled train set
mlp_clf.fit(X_train_scaled, y_train)

0,1,2
,hidden_layer_sizes,"[10, 10]"
,activation,'relu'
,solver,'adam'
,alpha,0.0001
,batch_size,'auto'
,learning_rate,'constant'
,learning_rate_init,0.001
,power_t,0.5
,max_iter,1000
,shuffle,True


In [26]:
# Standardizes the test data
X_test_scaled = iris_std_scaler.transform(X_test)

In [27]:
# Predicts the class for few test instances
print("Predictions: ", mlp_clf.predict(X_test_scaled)[:5])

Predictions:  [0 2 1 1 0]


In [28]:
# Also, shows prediction probability estimate for the same test instances
print("Prediction Probability Estimates:")
mlp_clf.predict_proba(X_test_scaled[:5])

Prediction Probability Estimates:


array([[9.98797579e-01, 8.00188571e-04, 4.02232387e-04],
       [3.10605366e-03, 2.17386741e-01, 7.79507205e-01],
       [2.07670445e-02, 9.75431900e-01, 3.80105579e-03],
       [1.09548482e-02, 9.86900382e-01, 2.14476959e-03],
       [9.99052269e-01, 6.48325702e-04, 2.99405045e-04]])

In [31]:
# Evaluates the model on scaled test set
print(f"MLP classifier model's accuracy on test dataset: {mlp_clf.score(X_test_scaled, y_test):.2f}")

MLP classifier model's accuracy on test dataset: 0.97


#### Observations
_Write your observations about the above experiments on MLP classifier._