# Lab 4 - Multilayer perceptron 

In [1]:
import re
import random
import numpy as np
import pandas as pd

from tensorflow import keras
from sklearn.preprocessing import OneHotEncoder

### Data import - Heart Failure

In [2]:
df = pd.read_csv("heart_failure.csv")
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 299 entries, 0 to 298
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype  
---  ------                    --------------  -----  
 0   age                       299 non-null    float64
 1   anaemia                   299 non-null    int64  
 2   creatinine_phosphokinase  299 non-null    int64  
 3   diabetes                  299 non-null    int64  
 4   ejection_fraction         299 non-null    int64  
 5   high_blood_pressure       299 non-null    int64  
 6   platelets                 299 non-null    float64
 7   serum_creatinine          299 non-null    float64
 8   serum_sodium              299 non-null    int64  
 9   sex                       299 non-null    int64  
 10  smoking                   299 non-null    int64  
 11  time                      299 non-null    int64  
 12  DEATH_EVENT               299 non-null    int64  
dtypes: float64(3), int64(10)
memory usage: 30.5 KB


In [3]:
cols = ['age','anaemia','creatinine_phosphokinase','diabetes','ejection_fraction','high_blood_pressure','platelets','serum_creatinine','serum_sodium','sex','smoking','time','DEATH_EVENT']

In [4]:
df.head()

Unnamed: 0,age,anaemia,creatinine_phosphokinase,diabetes,ejection_fraction,high_blood_pressure,platelets,serum_creatinine,serum_sodium,sex,smoking,time,DEATH_EVENT
0,75.0,0,582,0,20,1,265000.0,1.9,130,1,0,4,1
1,55.0,0,7861,0,38,0,263358.03,1.1,136,1,0,6,1
2,65.0,0,146,0,20,0,162000.0,1.3,129,1,1,7,1
3,50.0,1,111,0,20,0,210000.0,1.9,137,1,0,7,1
4,65.0,1,160,1,20,0,327000.0,2.7,116,0,0,8,1


### Normalisation

In [5]:
for column in cols[:-1]:
    df[column] = (df[column] - df[column].mean())/df[column].std()

### Data preparation

In [6]:
Y = df.iloc[:, -1].values.reshape(-1, 1)
Y = OneHotEncoder().fit_transform(Y).toarray()
X = df.iloc[:, :-1].values

num_samples = X.shape[0]
train_samples = int(0.75*num_samples)

indexes = np.arange(num_samples)
random.shuffle(indexes)

X, Y = X[indexes], Y[indexes]
X_train, Y_train = X[:train_samples, :], Y[:train_samples]
X_test, Y_test = X[train_samples:, :], Y[train_samples:]

### Model initialising

In [7]:
inputs = keras.Input(shape=(12,))
x = keras.layers.Dense(64, activation="relu")(inputs)

outputs = keras.layers.Dense(2, activation="softmax")(x)

fc_model = keras.Model(inputs=inputs, outputs=outputs, name="fc_model")
# fc_model.summary()

In [8]:
fc_model.compile(
    loss=keras.losses.CategoricalCrossentropy(from_logits=True),
    optimizer=keras.optimizers.RMSprop(),
    metrics=["accuracy"],
)

## Training

In [9]:
fc_model.fit(X_train, Y_train, batch_size=4, epochs=10, validation_split=0.2)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x250b4c771f0>

## Eval

In [10]:
test_scores = fc_model.evaluate(X_test, Y_test, verbose=2)

3/3 - 0s - loss: 0.4122 - accuracy: 0.8400


---

# Comparing results

## MLP

In [11]:
print(f"Test accuracy: { test_scores[1]}")

Test accuracy: 0.8399999737739563


# Decision Tree

# SVM - Lin

# SVM - Rbf

# Random Forest

# Adaboost

# Gradient boost