# Importações

In [114]:
# Framework Pytorch

import torch
import torch.nn as nn

# Otimizador (Adam)
import torch.optim as optim

# Carregar os dados como batches
from torch.utils.data  import DataLoader, TensorDataset

# manipulação de dados
import pandas as pd
import numpy as np
from ydata_profiling import ProfileReport

# separa entre test e treino
from sklearn.model_selection import train_test_split

# Normaliza os dados
from  sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder

# Gráfico
import matplotlib.pyplot as plt

## Device

In [115]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Usando device:", device)

Usando device: cuda


# Carregar o CSV e entender os dados

In [116]:
# Carrega o CSV
df = pd.read_csv("heart.csv")

# Verifica as dimensões e colunas
print(df.shape)
print(df.columns.tolist())

(918, 12)
['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS', 'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope', 'HeartDisease']


In [117]:
df

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [118]:
# Verifica se há valores nulos

df.isnull().sum()

Age               0
Sex               0
ChestPainType     0
RestingBP         0
Cholesterol       0
FastingBS         0
RestingECG        0
MaxHR             0
ExerciseAngina    0
Oldpeak           0
ST_Slope          0
HeartDisease      0
dtype: int64

In [119]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age             918 non-null    int64  
 1   Sex             918 non-null    object 
 2   ChestPainType   918 non-null    object 
 3   RestingBP       918 non-null    int64  
 4   Cholesterol     918 non-null    int64  
 5   FastingBS       918 non-null    int64  
 6   RestingECG      918 non-null    object 
 7   MaxHR           918 non-null    int64  
 8   ExerciseAngina  918 non-null    object 
 9   Oldpeak         918 non-null    float64
 10  ST_Slope        918 non-null    object 
 11  HeartDisease    918 non-null    int64  
dtypes: float64(1), int64(6), object(5)
memory usage: 86.2+ KB


In [120]:
df.describe()

Unnamed: 0,Age,RestingBP,Cholesterol,FastingBS,MaxHR,Oldpeak,HeartDisease
count,918.0,918.0,918.0,918.0,918.0,918.0,918.0
mean,53.510893,132.396514,198.799564,0.233115,136.809368,0.887364,0.553377
std,9.432617,18.514154,109.384145,0.423046,25.460334,1.06657,0.497414
min,28.0,0.0,0.0,0.0,60.0,-2.6,0.0
25%,47.0,120.0,173.25,0.0,120.0,0.0,0.0
50%,54.0,130.0,223.0,0.0,138.0,0.6,1.0
75%,60.0,140.0,267.0,0.0,156.0,1.5,1.0
max,77.0,200.0,603.0,1.0,202.0,6.2,1.0


# Pré-processamento dos dados

In [121]:
df

Unnamed: 0,Age,Sex,ChestPainType,RestingBP,Cholesterol,FastingBS,RestingECG,MaxHR,ExerciseAngina,Oldpeak,ST_Slope,HeartDisease
0,40,M,ATA,140,289,0,Normal,172,N,0.0,Up,0
1,49,F,NAP,160,180,0,Normal,156,N,1.0,Flat,1
2,37,M,ATA,130,283,0,ST,98,N,0.0,Up,0
3,48,F,ASY,138,214,0,Normal,108,Y,1.5,Flat,1
4,54,M,NAP,150,195,0,Normal,122,N,0.0,Up,0
...,...,...,...,...,...,...,...,...,...,...,...,...
913,45,M,TA,110,264,0,Normal,132,N,1.2,Flat,1
914,68,M,ASY,144,193,1,Normal,141,N,3.4,Flat,1
915,57,M,ASY,130,131,0,Normal,115,Y,1.2,Flat,1
916,57,F,ATA,130,236,0,LVH,174,N,0.0,Flat,1


In [122]:
# Dados com categoricos para númericos
df["Sex"] = df["Sex"].map({"M":1, "F":0})
df["ExerciseAngina"] = df["ExerciseAngina"].map({"Y": 1, "N": 0})

df = pd.get_dummies(df, columns=["ChestPainType", "RestingECG", "ST_Slope"], drop_first=True, dtype=int)

# Separar variáveis e rótulos
X = df.drop('HeartDisease', axis=1).values

y = df["HeartDisease"]

# Padronização
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Dividir em treino e teste
X_treino, X_teste, y_treino, y_teste = train_test_split(X, y, test_size=0.2, random_state=0, stratify=y)

# Converter para tensores
X_treino = torch.tensor(X_treino, dtype=torch.float32)
X_teste = torch.tensor(X_teste, dtype=torch.float32)
y_treino = torch.tensor(y_treino, dtype=torch.float32).unsqueeze(1)
y_teste = torch.tensor(y_teste.values, dtype=torch.float32).unsqueeze(1)


# Criar DataLoaders
batch_size = 32
loader_treino = DataLoader(TensorDataset(X_treino, y_treino), batch_size=batch_size, shuffle=True)
loader_teste = DataLoader(TensorDataset(X_teste,  y_teste), batch_size=batch_size)

# Criando a Rede Neural

In [123]:
class RedeCardio(nn.Module):
    def __init__(self, input_dim):
        super().__init__()
        self.fc1 = nn.Linear(input_dim, 64) 
        self.relu1 = nn.ReLU()
        self.fc2 = nn.Linear(64, 32)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(32, 1) 
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return(x)

In [124]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = RedeCardio(input_dim=X.shape[1]).to(device)

# Treinamento da rede

In [125]:
import torch.optim as optim

# Função de perda (como é binário, usamos Binary Cross Entropy)
criterion = nn.BCELoss()

# Otimizador (usando Adam, que é ótimo para redes neurais)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Número de épocas
num_epochs = 50

for epoca in range(num_epochs):
  model.train()
  perda_total = 0
  corretos = 0
  total = 0

  for X_batch, y_batch in loader_treino:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()            # zera os gradientes antigos
        saida = model(X_batch)           # passa o batch pela rede
        loss = criterion(saida, y_batch) # calcula a perda
        loss.backward()                  # calcula os gradientes
        optimizer.step()                 # atualiza os pesos

        # Acumulando métricas
        perda_total += loss.item()
        pred = (saida > 0.5).float()     # converte probabilidade em 0 ou 1
        corretos += (pred == y_batch).sum().item()
        total += y_batch.size(0)

  acc = 100 * corretos / total
  print(f"Época {epoca+1}/{num_epochs} - Loss: {perda_total:.4f} - Acurácia: {acc:.2f}%")

Época 1/50 - Loss: 14.7824 - Acurácia: 76.43%
Época 2/50 - Loss: 11.8939 - Acurácia: 84.06%
Época 3/50 - Loss: 9.1243 - Acurácia: 86.24%
Época 4/50 - Loss: 7.9288 - Acurácia: 86.92%
Época 5/50 - Loss: 7.5843 - Acurácia: 86.92%
Época 6/50 - Loss: 7.3888 - Acurácia: 87.74%
Época 7/50 - Loss: 7.2552 - Acurácia: 87.74%
Época 8/50 - Loss: 7.1201 - Acurácia: 87.74%
Época 9/50 - Loss: 7.0287 - Acurácia: 87.87%
Época 10/50 - Loss: 6.9409 - Acurácia: 88.15%
Época 11/50 - Loss: 6.7766 - Acurácia: 88.15%
Época 12/50 - Loss: 6.6991 - Acurácia: 88.83%
Época 13/50 - Loss: 6.5913 - Acurácia: 88.96%
Época 14/50 - Loss: 6.5039 - Acurácia: 89.24%
Época 15/50 - Loss: 6.3948 - Acurácia: 89.37%
Época 16/50 - Loss: 6.3113 - Acurácia: 89.10%
Época 17/50 - Loss: 6.2268 - Acurácia: 89.92%
Época 18/50 - Loss: 6.1033 - Acurácia: 89.78%
Época 19/50 - Loss: 6.0148 - Acurácia: 90.05%
Época 20/50 - Loss: 5.9442 - Acurácia: 90.60%
Época 21/50 - Loss: 5.8462 - Acurácia: 90.33%
Época 22/50 - Loss: 5.7682 - Acurácia: 90