## **Doenças do coração**
### REDES NEURAIS ARTIFICIAIS: CLASSIFICAÇÃO

**Legenda das colunas**

Age = idade (anos)

Sex = sexo (0=M; 1=F)

Chest Pain Type = tipo de dor no peito (0=TA:angina típica; 1=ATA:angina atípica; 2=NAP:dor não anginosa; 3=ASY:assintomático)

Resting BP = pressão sanguínea em repouso (mmHg)

Cholesterol = colesterol sérico (mg/dl)

Fasting BS = açúcar no sangue em jejum (mg/dl)
0: Fasting BS < 120 mg/dl (não diabético)
1: Fasting BS >= 120 mg/dl, (diabético)

Resting ECG = eletrocardiograma em repouso (0=Normal; 1=ST:Anormalidade da onda ST-T; 2=LVH: Hipertrofia ventricular esquerda)

Max HR = frequência cardíaca máxima

Exercise Angina = Angina induzida por exercício (0=Não; 1=Sim)

Old Peak = Depressão de ST induzida por exercício em relação ao repouso

ST_Slope = Inclinação do segmento ST (0=UP; 1=Flat; 2=Down)

Heart Disease = Doença cardíaca (0=Não possui doença cardíaca; 1=Possui doença cardíaca)

### Importação das bibliotecas

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import warnings
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score

warnings.filterwarnings('ignore')
plt.style.use('dark_background')

df = pd.read_csv('/Users/diogomonteiro/heart_tratado.csv', sep=';', encoding = 'iso-8859-1')
df

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289.0,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180.0,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283.0,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214.0,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195.0,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
912,45,M,TA,110,264.0,0,Normal,132,N,1.2,Flat,1
913,68,M,ASY,144,193.0,1,Normal,141,N,3.4,Flat,1
914,57,M,ASY,130,131.0,0,Normal,115,Y,1.2,Flat,1
915,57,F,ATA,130,236.0,0,LVH,174,N,0.0,Flat,1


### Transformando as classes strings em variáveis categóricas ordinais

In [5]:
df1 = pd.get_dummies(df, columns = ['ChestPainType','RestingECG','ST_Slope'])
df1[['ChestPainType_ASY','ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA',
       'RestingECG_LVH', 'RestingECG_Normal', 'RestingECG_ST', 'ST_Slope_Down',
       'ST_Slope_Flat', 'ST_Slope_Up']] = df1[['ChestPainType_ASY','ChestPainType_ATA', 'ChestPainType_NAP', 'ChestPainType_TA',
       'RestingECG_LVH', 'RestingECG_Normal', 'RestingECG_ST', 'ST_Slope_Down',
       'ST_Slope_Flat', 'ST_Slope_Up']].astype('int64')

In [6]:
df1['Sex'].replace({'M': 0, 'F': 1}, inplace=True)
df1['ExerciseAngina'].replace({'N': 0, 'Y': 1}, inplace=True)
df1

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,HeartDisease,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
0,40,0,140,289.0,0,172,0,0.0,0,0,1,0,0,0,1,0,0,0,1
1,49,1,160,180.0,0,156,0,1.0,1,0,0,1,0,0,1,0,0,1,0
2,37,0,130,283.0,0,98,0,0.0,0,0,1,0,0,0,0,1,0,0,1
3,48,1,138,214.0,0,108,1,1.5,1,1,0,0,0,0,1,0,0,1,0
4,54,0,150,195.0,0,122,0,0.0,0,0,0,1,0,0,1,0,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
912,45,0,110,264.0,0,132,0,1.2,1,0,0,0,1,0,1,0,0,1,0
913,68,0,144,193.0,1,141,0,3.4,1,1,0,0,0,0,1,0,0,1,0
914,57,0,130,131.0,0,115,1,1.2,1,1,0,0,0,0,1,0,0,1,0
915,57,1,130,236.0,0,174,0,0.0,1,0,1,0,0,1,0,0,0,1,0


### Atributos previsores e alvo

In [8]:
x = df1.drop('HeartDisease',axis=1)
previsores = x.iloc[:,0:19].values
previsores

array([[ 40.,   0., 140., ...,   0.,   0.,   1.],
       [ 49.,   1., 160., ...,   0.,   1.,   0.],
       [ 37.,   0., 130., ...,   0.,   0.,   1.],
       ...,
       [ 57.,   0., 130., ...,   0.,   1.,   0.],
       [ 57.,   1., 130., ...,   0.,   1.,   0.],
       [ 38.,   0., 138., ...,   0.,   0.,   1.]])

In [9]:
previsores.shape

(917, 18)

In [10]:
alvo = df1.iloc[:,8].values
alvo

array([0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0,
       0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0,
       1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0,
       0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0,
       1, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 0, 0, 0,
       0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1,
       1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
       1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1,

In [11]:
alvo.shape

(917,)

### **Análise das escalas dos atributos (Escalonamento) e pré-processamento**

Padronização (Ultiliza a média e o desvio padrão como referência).

Normalização (Ultiliza os valores máximo e mínimo como referência).

In [14]:
x.describe()

Unnamed: 0,Age,Sex,RestingBP,Cholesterol,FastingBS,MaxHR,ExerciseAngina,Oldpeak,ChestPainType_ASY,ChestPainType_ATA,ChestPainType_NAP,ChestPainType_TA,RestingECG_LVH,RestingECG_Normal,RestingECG_ST,ST_Slope_Down,ST_Slope_Flat,ST_Slope_Up
count,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0
mean,53.509269,0.210469,132.540894,244.635389,0.23337,136.789531,0.40458,0.886696,0.540894,0.188659,0.220284,0.050164,0.205016,0.600872,0.194111,0.068702,0.500545,0.430752
std,9.437636,0.407864,17.999749,53.347125,0.423206,25.467129,0.491078,1.06696,0.498597,0.391451,0.414664,0.218401,0.403934,0.489986,0.395731,0.253085,0.500273,0.495452
min,28.0,0.0,80.0,85.0,0.0,60.0,0.0,-2.6,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,47.0,0.0,120.0,214.0,0.0,120.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,54.0,0.0,130.0,244.635389,0.0,138.0,0.0,0.6,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0
75%,60.0,0.0,140.0,267.0,0.0,156.0,1.0,1.5,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,1.0
max,77.0,1.0,200.0,603.0,1.0,202.0,1.0,6.2,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [15]:
previsores_esc = StandardScaler().fit_transform(previsores)

In [16]:
previsores_esc

array([[-1.43220634, -0.51630861,  0.41462669, ..., -0.27160724,
        -1.00109111,  1.14957339],
       [-0.47805725,  1.9368261 ,  1.52635965, ..., -0.27160724,
         0.99891008, -0.86988791],
       [-1.75025603, -0.51630861, -0.14123979, ..., -0.27160724,
        -1.00109111,  1.14957339],
       ...,
       [ 0.37007527, -0.51630861, -0.14123979, ..., -0.27160724,
         0.99891008, -0.86988791],
       [ 0.37007527,  1.9368261 , -0.14123979, ..., -0.27160724,
         0.99891008, -0.86988791],
       [-1.64423947, -0.51630861,  0.30345339, ..., -0.27160724,
        -1.00109111,  1.14957339]])

In [17]:
previsoresdf = pd.DataFrame(previsores_esc)
previsoresdf

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
0,-1.432206,-0.516309,0.414627,0.832075,-0.551733,1.383339,-0.824310,-0.831502,-1.085425,2.073784,-0.531524,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,-1.001091,1.149573
1,-0.478057,1.936826,1.526360,-1.212261,-0.551733,0.754736,-0.824310,0.106251,-1.085425,-0.482210,1.881384,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,0.998910,-0.869888
2,-1.750256,-0.516309,-0.141240,0.719543,-0.551733,-1.523953,-0.824310,-0.831502,-1.085425,2.073784,-0.531524,-0.229810,-0.507826,-1.226974,2.037569,-0.271607,-1.001091,1.149573
3,-0.584074,1.936826,0.303453,-0.574578,-0.551733,-1.131075,1.213136,0.575128,0.921298,-0.482210,-0.531524,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,0.998910,-0.869888
4,0.052026,-0.516309,0.970493,-0.930931,-0.551733,-0.581047,-0.824310,-0.831502,-1.085425,-0.482210,1.881384,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,-1.001091,1.149573
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
912,-0.902124,-0.516309,-1.252973,0.363191,-0.551733,-0.188170,-0.824310,0.293802,-1.085425,-0.482210,-0.531524,4.351412,-0.507826,0.815013,-0.490781,-0.271607,0.998910,-0.869888
913,1.536257,-0.516309,0.636973,-0.968441,1.812470,0.165420,-0.824310,2.356860,0.921298,-0.482210,-0.531524,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,0.998910,-0.869888
914,0.370075,-0.516309,-0.141240,-2.131275,-0.551733,-0.856061,1.213136,0.293802,0.921298,-0.482210,-0.531524,-0.229810,-0.507826,0.815013,-0.490781,-0.271607,0.998910,-0.869888
915,0.370075,1.936826,-0.141240,-0.161960,-0.551733,1.461915,-0.824310,-0.831502,-1.085425,2.073784,-0.531524,-0.229810,1.969177,-1.226974,-0.490781,-0.271607,0.998910,-0.869888


In [18]:
previsoresdf.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17
count,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0,917.0
mean,1.859654e-16,7.748558e-18,7.767929e-16,-1.86934e-16,4.649135e-17,-5.114048e-16,-1.046055e-16,7.748558000000001e-17,-8.135986e-17,-2.3245670000000002e-17,1.549712e-17,7.748558e-18,3.099423e-17,-1.084798e-16,1.472226e-16,4.649135e-17,5.617704000000001e-17,-3.099423e-17
std,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546,1.000546
min,-2.704405,-0.5163086,-2.920572,-2.994023,-0.5517333,-3.016886,-0.8243101,-3.269662,-1.085425,-0.4822104,-0.5315237,-0.2298105,-0.5078263,-1.226974,-0.490781,-0.2716072,-1.001091,-0.8698879
25%,-0.6900904,-0.5163086,-0.6971063,-0.5745784,-0.5517333,-0.6596226,-0.8243101,-0.8315022,-1.085425,-0.4822104,-0.5315237,-0.2298105,-0.5078263,-1.226974,-0.490781,-0.2716072,-1.001091,-0.8698879
50%,0.05202558,-0.5163086,-0.1412398,0.0,-0.5517333,0.04755658,-0.8243101,-0.26885,0.9212982,-0.4822104,-0.5315237,-0.2298105,-0.5078263,0.8150134,-0.490781,-0.2716072,0.9989101,-0.8698879
75%,0.688125,-0.5163086,0.4146267,0.4194568,-0.5517333,0.7547357,1.213136,0.5751284,0.9212982,-0.4822104,-0.5315237,-0.2298105,-0.5078263,0.8150134,-0.490781,-0.2716072,0.9989101,1.149573
max,2.490407,1.936826,3.749826,6.721265,1.81247,2.561971,1.213136,4.982571,0.9212982,2.073784,1.881384,4.351412,1.969177,0.8150134,2.037569,3.681787,0.9989101,1.149573


### **Resumo:**
alvo = variável que se pretende atingir (tem ou não doença cardíaca).

previsores = conjunto de variáveis previsoras com as variáveis categóricas transformadas em numéricas manualmente, sem escalonar.

previsores_esc = conjunto de variáveis previsoras com as variáveis categóricas transformadas em numéricas, escalonada.

----

### **Base de treino e teste**

In [21]:
x_treino, x_teste,y_treino,y_teste = train_test_split(previsores_esc, alvo, test_size = 0.3, random_state = 10)

In [22]:
x_treino.shape,y_treino.shape

((641, 18), (641,))

In [23]:
x_teste.shape, y_teste.shape

((276, 18), (276,))

### **Criação do algoritmo**

In [25]:
#MLPClassifier?

### **Parâmetros MLPClassifier**
   
- hidden_layer_sizes (camadas escondidas): default (100,)   

  Quant.= (Ne+Ns)/2 = (11+1)/2 = 6 neurônios   
  Quant.=2/3.(Ne) + Ns = 2/3.11+1 = 8 neurônios
- activation: Função de ativação default='relu'
- solver: algoritmo matemático. Default='adam' (datasets grandes = acima de 1000 amostras). lbfgs é para datasets pequenos. sgd é com a descida do gradiente estocástico (recomendado testar).
- alpha:  parâmetro para o termo de regularização de ajuste de pesos. Aumento de alpha estimula pesos menores e diminuição de alpha estimula pesos maiores. Default=0.0001.
- batch_size: tamanho dos mini lotes. default=min(200, n_samples). Não usar com o solver lbfgs.
- learning_rate: taxa de aprendizagem. default='constant'. Três tipos:   
  1- 'constant':uma taxa de aprendizado constante dada pela taxa de aprendizagem inicial.   
  2- 'invscaling': diminui gradualmente por: taxa efetiva = taxa inicial / t^power_t  
  3- 'adaptive': a taxa é dividida por 5 cada vez que em duas épocas consecutivas não diminuir o erro.
- learning_rate_init: taxa de aprendizagem inicial. Default=0.001
- max_iter int: Número máximo de iterações. default = 200.  ('sgd', 'adam').
- max_fun: Número máximo de chamadas de função de perda. Para 'lbfgs'. Default: 15000
- shuffle: default = True
Usado apenas quando solver = 'sgd' ou 'adam'.
- random_state: default = None
- tol:Tolerância para a otimização.Default=0.0001
- momentum: otimização do algoritmo 'sgd'. Default: 0.9.
- n_iter_no_change: Número máximo de épocas que não atinge a tolerância de melhoria. default = 10. Apenas para solver = 'sgd' ou 'adam'
- verbose : Mostra o progresso. default=False.



In [113]:
redes = {
    'MLP': MLPClassifier(hidden_layer_sizes =(10,10),activation= 'identity', solver='lbfgs',
                         max_iter=1200,tol= 0.0001, random_state = 3),


}

class rede():

  def __init__(self,redes,x_treino,x_teste,y_treino,y_teste):
    self.x_treino = x_treino
    self.x_teste = x_teste
    self.y_treino = y_treino
    self.y_teste = y_teste
    self.redes = redes
    self.rede_score = []

  def treino(self):

    for name, redess in self.redes.items():

        rede = redess.fit(self.x_treino,self.y_treino)
        
        resultado = rede.score(self.x_treino,self.y_treino)

        resultado1 = rede.score(self.x_teste,self.y_teste)
        
        
        self.rede_score.append({'Modelos': name, 'Acurácia Treino':'{:.2f}%'.format(resultado*100),
                                     'Acurácia Teste': '{:.2f}%'.format(resultado1*100)})

    md = pd.DataFrame(self.rede_score)  
    return md

  def previsao(self,rede):

    previsoes1 = rede.predict(self.x_teste)

    kfold = KFold(n_splits = 5, shuffle = True, random_state = 5)

    resul = cross_val_score(rede,previsores,alvo,cv=kfold)  

    print(f'\nRelatório Teste')
      
    print(f"-----------------------------------------------------------------------")
      
    print(f'Acurácia: {round(accuracy_score(self.y_teste,previsoes1)*100,2)}%')
      
    print(f"-----------------------------------------------------------------------")

    print(f'Validação cruzada Teste: {round(resul.mean()*100,2)}%')

    print(f"-----------------------------------------------------------------------")
        
    print(f'Matriz de cofusão:\n {confusion_matrix(self.y_teste,previsoes1)}')

    print(f"-----------------------------------------------------------------------")  

    print(f'Classifição:\n {classification_report(self.y_teste,previsoes1)}')
      
    print(f"-----------------------------------------------------------------------")

Redes = rede(redes,x_treino,x_teste,y_treino,y_teste)
Redes.treino()

Unnamed: 0,Modelos,Acurácia Treino,Acurácia Teste
0,MLP,87.21%,85.14%


### Previsão da rede

In [115]:
Redes.previsao(redes['MLP'])


Relatório Teste
-----------------------------------------------------------------------
Acurácia: 85.14%
-----------------------------------------------------------------------
Validação cruzada Teste: 85.93%
-----------------------------------------------------------------------
Matriz de cofusão:
 [[106  18]
 [ 23 129]]
-----------------------------------------------------------------------
Classifição:
               precision    recall  f1-score   support

           0       0.82      0.85      0.84       124
           1       0.88      0.85      0.86       152

    accuracy                           0.85       276
   macro avg       0.85      0.85      0.85       276
weighted avg       0.85      0.85      0.85       276

-----------------------------------------------------------------------
