## Steps for:

### Linear Regression

1. Import packages and classes
2. Feed data
3. Create model and fit training dataset
4. Results / Verify
5. Prediction
6. Visualisation

### Multi-Layer Perception

1. Import packages and classes
2. Feed data
3. Remove missing data
4. Create model and fit training dataset
5. Results and predictions
6. Report / Visualisation

---



In [1]:
# Step 1: Import packages and classes

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.neural_network import MLPClassifier

In [2]:
# Step 2: Feed the data

cancer_df = pd.read_csv('./dataset/bcw.csv')

# head() outputs the first set of rows.
print(">>>>head<<<<")
print(cancer_df.head())
print()

# count() returns the number of rows for each column in the dataset.
print(">>>>count<<<<")
print(cancer_df.count())

# info() provides other additional details, such as the datatype
print(">>>>info<<<<")
print(cancer_df.info())

>>>>head<<<<
    s_code  c_t  c_size  c_shape  m_a  e_cell_size b_n  b_c  n_n  m  class
0  1000025    5       1        1    1            2   1    3    1  1      2
1  1002945    5       4        4    5            7  10    3    2  1      2
2  1015425    3       1        1    1            2   2    3    1  1      2
3  1016277    6       8        8    1            3   4    3    7  1      2
4  1017023    4       1        1    3            2   1    3    1  1      2
>>>>count<<<<
s_code         699
c_t            699
c_size         699
c_shape        699
m_a            699
e_cell_size    699
b_n            699
b_c            699
n_n            699
m              699
class          699
dtype: int64
>>>>info<<<<
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 699 entries, 0 to 698
Data columns (total 11 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   s_code       699 non-null    int64 
 1   c_t          699 non-null    int64 
 2   c_size       6

In [3]:
# Step 3: Remove missing data

cancer_df["b_n"]=pd.to_numeric(cancer_df["b_n"],errors='coerce')
cancer_df.dropna(inplace=True)
print(cancer_df.count())

s_code         683
c_t            683
c_size         683
c_shape        683
m_a            683
e_cell_size    683
b_n            683
b_c            683
n_n            683
m              683
class          683
dtype: int64


In [4]:
# Step 4: create, train and test

#split the data for train and test
X_train, X_test, y_train, y_test = train_test_split(cancer_df.drop('class', axis=1), cancer_df['class'], test_size=0.30, random_state=45)

In [5]:
#normalize(scale) data to allow MLP to converge
scaler = StandardScaler()
scaler.fit(X_train)
#apply transformation to data
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [6]:
#create instance of MLP with 3 hidden layers and 500 iterations
mlp = MLPClassifier(hidden_layer_sizes=(20,20,20), max_iter=500)

#overview of parameter values (default)
print(mlp.get_params)

#fit the scaled data
mlp.fit(X_train, y_train)

<bound method BaseEstimator.get_params of MLPClassifier(hidden_layer_sizes=(20, 20, 20), max_iter=500)>


MLPClassifier(hidden_layer_sizes=(20, 20, 20), max_iter=500)

In [7]:
# Step 5: Results
#make predictions and generated confusion matrix
pred_result = mlp.predict(X_test)
cm = confusion_matrix(y_test, pred_result)
print(cm)
#generating the classification report
c_report = classification_report(y_test, pred_result)
print(c_report)

[[121   2]
 [  6  76]]
              precision    recall  f1-score   support

           2       0.95      0.98      0.97       123
           4       0.97      0.93      0.95        82

    accuracy                           0.96       205
   macro avg       0.96      0.96      0.96       205
weighted avg       0.96      0.96      0.96       205

