In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import classification_report
import pandas as pd

## Obtendo os Dados

In [2]:
data = pd.read_csv("../data/heart_disease_uci.csv")

In [3]:
data.describe()

Unnamed: 0,age,resting_blood_pressure,cholestoral,Max_heart_rate,oldpeak,target
count,1025.0,1025.0,1025.0,1025.0,1025.0,1025.0
mean,54.434146,131.611707,246.0,149.114146,1.071512,0.513171
std,9.07229,17.516718,51.59251,23.005724,1.175053,0.50007
min,29.0,94.0,126.0,71.0,0.0,0.0
25%,48.0,120.0,211.0,132.0,0.0,0.0
50%,56.0,130.0,240.0,152.0,0.8,1.0
75%,61.0,140.0,275.0,166.0,1.8,1.0
max,77.0,200.0,564.0,202.0,6.2,1.0


In [4]:
data.head()

Unnamed: 0,age,sex,chest_pain_type,resting_blood_pressure,cholestoral,fasting_blood_sugar,rest_ecg,Max_heart_rate,exercise_induced_angina,oldpeak,slope,vessels_colored_by_flourosopy,thalassemia,target
0,52,Male,Typical angina,125,212,Lower than 120 mg/ml,ST-T wave abnormality,168,No,1.0,Downsloping,Two,Reversable Defect,0
1,53,Male,Typical angina,140,203,Greater than 120 mg/ml,Normal,155,Yes,3.1,Upsloping,Zero,Reversable Defect,0
2,70,Male,Typical angina,145,174,Lower than 120 mg/ml,ST-T wave abnormality,125,Yes,2.6,Upsloping,Zero,Reversable Defect,0
3,61,Male,Typical angina,148,203,Lower than 120 mg/ml,ST-T wave abnormality,161,No,0.0,Downsloping,One,Reversable Defect,0
4,62,Female,Typical angina,138,294,Greater than 120 mg/ml,ST-T wave abnormality,106,No,1.9,Flat,Three,Fixed Defect,0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1025 entries, 0 to 1024
Data columns (total 14 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   age                            1025 non-null   int64  
 1   sex                            1025 non-null   object 
 2   chest_pain_type                1025 non-null   object 
 3   resting_blood_pressure         1025 non-null   int64  
 4   cholestoral                    1025 non-null   int64  
 5   fasting_blood_sugar            1025 non-null   object 
 6   rest_ecg                       1025 non-null   object 
 7   Max_heart_rate                 1025 non-null   int64  
 8   exercise_induced_angina        1025 non-null   object 
 9   oldpeak                        1025 non-null   float64
 10  slope                          1025 non-null   object 
 11  vessels_colored_by_flourosopy  1025 non-null   object 
 12  thalassemia                    1025 non-null   o

## Normalizando os dados categoricos para númericos

In [6]:
data['sex'] = data['sex'].map({ 'Male': 0, 'Female': 1 })

In [7]:
data['chest_pain_type'] = data['chest_pain_type'].map({ 'Typical angina': 0, 'Atypical angina': 1, 'Non-anginal pain': 2, 'Asymptomatic': 3 })

In [8]:
data['fasting_blood_sugar'] = data['fasting_blood_sugar'].map({ 'Lower than 120 mg/ml': 0, 'Greater than 120 mg/ml': 1 })

In [9]:
data['rest_ecg'] = data['rest_ecg'].map({ 'Normal': 0, 'ST-T wave abnormality': 1, 'Left ventricular hypertrophy': 2 })

In [10]:
data['exercise_induced_angina'] = data['exercise_induced_angina'].map({ 'No': 0, 'Yes': 1 })

In [11]:
data['slope'] = data['slope'].map({ 'Upsloping': 0, 'Flat': 1, 'Downsloping': 2 })

In [12]:
data['vessels_colored_by_flourosopy'] = data['vessels_colored_by_flourosopy'].map({ 'Zero': 0, 'One': 1, 'Two': 2, 'Three': 3, 'Four': 4 })

In [13]:
data['thalassemia'] = data['thalassemia'].map({ 'Normal': 0, 'Fixed Defect': 1, 'Reversable Defect': 2, 'No': 3 })

## Treinando o modelo com o MLP Classifier

In [14]:
X = data.drop("target", axis=1)
y = data["target"]

In [15]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [16]:
clf = MLPClassifier(hidden_layer_sizes=(20,20,20), max_iter=1000)

In [17]:
clf.fit(X_train, y_train)

MLPClassifier(hidden_layer_sizes=(20, 20, 20), max_iter=1000)

## Métricas de desempenho

In [18]:
clf.score(X_test, y_test)

0.8439024390243902

In [19]:
y_pred = clf.predict(X_test)
y_pred

array([0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1,
       1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 1, 1,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1])

In [20]:
classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.84      0.82      0.83        97\n           1       0.85      0.86      0.85       108\n\n    accuracy                           0.84       205\n   macro avg       0.84      0.84      0.84       205\nweighted avg       0.84      0.84      0.84       205\n'