In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import torch
import torch.nn.functional as F
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [2]:
df = pd.read_csv('heart_disease.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [3]:
number_columns = df.select_dtypes(include='number').columns.tolist()
number_columns

['age',
 'sex',
 'cp',
 'trestbps',
 'chol',
 'fbs',
 'restecg',
 'thalach',
 'exang',
 'oldpeak',
 'slope',
 'ca',
 'thal',
 'target']

In [4]:
len(number_columns)

14

In [5]:
for col in number_columns:
    if col == "fbs":
        continue

    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)

    IQR = Q3 - Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    df[col] = df[col].clip(lower=lower_bound, upper=upper_bound)

In [6]:
X = df.loc[:, "age":"thal"]
X

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212,0,1,168,0,1.0,2,2.0,3.0
1,53,1,0,140,203,1,0,155,1,3.1,0,0.0,3.0
2,70,1,0,145,174,0,1,125,1,2.6,0,0.0,3.0
3,61,1,0,148,203,0,1,161,0,0.0,2,1.0,3.0
4,62,0,0,138,294,1,1,106,0,1.9,1,2.5,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0.0,2.0
1021,60,1,0,125,258,0,0,141,1,2.8,1,1.0,3.0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1.0,2.0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0.0,2.0


In [7]:
y = df[['target']]
y

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0
...,...
1020,1
1021,0
1022,0
1023,1


In [8]:
X_mean = X.mean()
X_mean

age          54.434146
sex           0.695610
cp            0.942439
trestbps    131.260488
chol        244.981463
fbs           0.149268
restecg       0.529756
thalach     149.153171
exang         0.336585
oldpeak       1.062244
slope         1.385366
ca            0.694146
thal          2.327317
dtype: float64

In [9]:
X_std = X.std()
X_std

age          9.072290
sex          0.460373
cp           1.029641
trestbps    16.532208
chol        47.746162
fbs          0.356527
restecg      0.527878
thalach     22.881210
exang        0.472772
oldpeak      1.141865
slope        0.617755
ca           0.890414
thal         0.609123
dtype: float64

In [10]:
X_norm = (X - X_mean) / X_std
X_norm

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,-0.268306,0.661181,-0.915309,-0.378684,-0.690767,-0.418674,0.890820,0.823682,-0.711940,-0.054511,0.994948,1.466569,1.104347
1,-0.158080,0.661181,-0.915309,0.528635,-0.879264,2.386166,-1.003559,0.255530,1.403243,1.784585,-2.242580,-0.779577,1.104347
2,1.715758,0.661181,-0.915309,0.831075,-1.486642,-0.418674,0.890820,-1.055590,1.403243,1.346705,-2.242580,-0.779577,1.104347
3,0.723726,0.661181,-0.915309,1.012539,-0.879264,-0.418674,0.890820,0.517754,-0.711940,-0.930271,0.994948,0.343496,1.104347
4,0.833952,-1.510969,-0.915309,0.407660,1.026649,2.386166,0.890820,-1.885965,-0.711940,0.733673,-0.623816,2.028106,-0.537358
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,0.503275,0.661181,0.055904,0.528635,-0.502270,-0.418674,0.890820,0.648866,1.403243,-0.930271,0.994948,-0.779577,-0.537358
1021,0.613500,0.661181,-0.915309,-0.378684,0.272661,-0.418674,-1.003559,-0.356326,1.403243,1.521857,-0.623816,0.343496,1.104347
1022,-0.819434,0.661181,-0.915309,-1.286004,0.628711,-0.418674,-1.003559,-1.361518,1.403243,-0.054511,-0.623816,0.343496,-0.537358
1023,-0.488757,-1.510969,-0.915309,-1.286004,0.188885,-0.418674,-1.003559,0.430346,-0.711940,-0.930271,0.994948,-0.779577,-0.537358


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X_norm, y, test_size=0.2, random_state=42
)

In [12]:
X_train

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
835,-0.598983,0.661181,1.027116,-0.802100,-2.010245,-0.418674,-1.003559,-1.011886,-0.711940,-0.229663,0.994948,2.028106,-0.537358
137,1.054403,-1.510969,-0.915309,2.343275,1.675916,-0.418674,0.890820,0.211826,1.403243,-0.930271,0.994948,-0.779577,-0.537358
534,-0.047854,-1.510969,1.027116,-1.406980,0.461158,-0.418674,-1.003559,0.779978,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
495,0.503275,0.661181,-0.915309,0.226196,-0.229997,-0.418674,0.890820,0.517754,-0.711940,-0.492391,-0.623816,-0.779577,1.104347
244,-0.378531,0.661181,1.027116,-0.378684,0.000388,2.386166,-1.003559,0.736274,-0.711940,1.171553,-0.623816,-0.779577,-0.537358
...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,-1.480789,0.661181,1.027116,-0.076244,-0.648879,-0.418674,-1.003559,0.823682,-0.711940,0.821249,-0.623816,-0.779577,-0.537358
71,0.723726,0.661181,-0.915309,0.528635,-0.795487,-0.418674,-1.003559,-0.487438,1.403243,0.733673,0.994948,0.343496,1.104347
106,-0.378531,0.661181,-0.915309,0.528635,1.131369,-0.418674,0.890820,1.042201,1.403243,0.470945,0.994948,-0.779577,1.104347
270,-1.260337,0.661181,-0.915309,-1.286004,-0.711711,-0.418674,0.890820,0.517754,-0.711940,-0.930271,0.994948,-0.779577,1.104347


In [13]:
y_train

Unnamed: 0,target
835,0
137,1
534,1
495,1
244,1
...,...
700,1
71,0
106,0
270,1


In [14]:
X_test

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
527,0.833952,-1.510969,-0.915309,-0.439172,-0.753599,-0.418674,0.890820,0.605162,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
359,-0.158080,-1.510969,1.027116,-0.197220,-0.606990,-0.418674,-1.003559,-1.492630,-0.711940,-0.930271,0.994948,-0.779577,-2.999917
447,0.062372,0.661181,-0.915309,1.738395,0.921928,-0.418674,-1.003559,-0.181510,1.403243,-0.229663,-0.623816,0.343496,1.104347
31,-0.488757,-1.510969,0.055904,-0.681124,-0.020556,-0.418674,0.890820,0.561458,-0.711940,0.033065,0.994948,-0.779577,-0.537358
621,-0.709209,0.661181,-0.915309,-0.076244,0.230773,2.386166,-1.003559,0.037010,1.403243,-0.930271,0.994948,1.466569,1.104347
...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,1.495306,0.661181,1.027116,-0.802100,0.670599,-0.418674,0.890820,0.080714,-0.711940,-0.054511,0.994948,0.343496,1.104347
796,-1.480789,0.661181,0.055904,0.226196,-0.879264,-0.418674,0.890820,-0.749662,-0.711940,-0.930271,-0.623816,-0.779577,-2.179064
644,-1.150112,0.661181,1.027116,-0.681124,-0.397550,-0.418674,0.890820,0.867385,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
404,0.723726,0.661181,-0.915309,0.528635,-0.795487,-0.418674,-1.003559,-0.487438,1.403243,0.733673,0.994948,0.343496,1.104347


In [15]:
y_test

Unnamed: 0,target
527,1
359,1
447,0
31,1
621,0
...,...
832,1
796,1
644,1
404,0


In [16]:
X_train_tensor = torch.tensor(X_train.values, dtype=torch.float32)
X_train_tensor

tensor([[-0.5990,  0.6612,  1.0271,  ...,  0.9949,  2.0281, -0.5374],
        [ 1.0544, -1.5110, -0.9153,  ...,  0.9949, -0.7796, -0.5374],
        [-0.0479, -1.5110,  1.0271,  ...,  0.9949, -0.7796, -0.5374],
        ...,
        [-0.3785,  0.6612, -0.9153,  ...,  0.9949, -0.7796,  1.1043],
        [-1.2603,  0.6612, -0.9153,  ...,  0.9949, -0.7796,  1.1043],
        [-0.2683,  0.6612, -0.9153,  ...,  0.9949,  0.3435, -0.5374]])

In [17]:
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32)
y_train_tensor[0:10]

tensor([[0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.]])

In [18]:
X_test_tensor = torch.tensor(X_test.values, dtype=torch.float32)
X_test_tensor

tensor([[ 0.8340, -1.5110, -0.9153,  ...,  0.9949, -0.7796, -0.5374],
        [-0.1581, -1.5110,  1.0271,  ...,  0.9949, -0.7796, -2.9999],
        [ 0.0624,  0.6612, -0.9153,  ..., -0.6238,  0.3435,  1.1043],
        ...,
        [-1.1501,  0.6612,  1.0271,  ...,  0.9949, -0.7796, -0.5374],
        [ 0.7237,  0.6612, -0.9153,  ...,  0.9949,  0.3435,  1.1043],
        [ 0.3930,  0.6612,  1.0271,  ..., -0.6238,  0.3435,  1.1043]])

In [19]:
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32)
y_test_tensor

tensor([[1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [0.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [0.],
        [0.],
        [0.],
        [1.],
        [1.],
        [1.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
        [0.],
        [1.],
        [0.],
      

In [20]:
input_dim = X_train.shape[1]
input_dim

13

In [21]:
model = nn.Sequential(
    nn.Linear(input_dim, 30),
    nn.BatchNorm1d(30),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(30,30),
    nn.BatchNorm1d(30),
    nn.ReLU(),
    nn.Dropout(0.5),
    
    nn.Linear(30,30),
    nn.BatchNorm1d(30),
    nn.ReLU(),
    nn.Dropout(0.5),

    nn.Linear(30, 1),
    nn.Sigmoid()
)

In [22]:
nn.init.kaiming_normal_(model[0].weight, nonlinearity='relu')

Parameter containing:
tensor([[-0.0185, -0.4776,  0.0941, -0.1298,  0.4579, -0.2252, -0.1638,  0.3047,
         -0.3451,  0.3553,  0.2266,  0.7778,  0.3619],
        [ 0.5266, -0.5730, -0.2676,  0.5044, -0.3190, -0.3944, -0.2683,  0.4877,
          0.5563, -0.3177,  0.4416, -0.2723, -0.1136],
        [ 0.1023,  0.1889,  0.1338,  0.3909,  0.0282, -0.7949, -0.3782,  0.0732,
          0.2046,  0.3356,  1.2144, -0.0175, -0.2694],
        [ 0.3233,  0.0755, -0.5232, -0.6390, -0.5722, -0.2809, -0.4386, -0.2894,
          0.3968, -0.0295, -0.4713, -0.4108,  0.1949],
        [ 0.1624, -0.0895,  0.7312,  0.1477,  0.6164,  0.1569,  0.1193,  0.4234,
         -0.6447,  0.5344, -0.6782, -0.1910, -0.8045],
        [-0.3599, -0.7237, -0.4309,  0.5706,  0.2109, -0.5771, -0.0177, -0.3856,
         -0.1196, -0.4623, -0.0129, -0.8691, -0.3751],
        [-0.0665,  0.0599,  0.4901,  0.6992,  0.3374,  0.0430,  0.3241,  0.3643,
         -0.2954, -0.4498, -0.4469, -0.7584,  0.1432],
        [-0.3123,  0.6675,

In [23]:
nn.init.zeros=(model[0].bias)

In [24]:
nn.init.kaiming_normal_(model[4].weight, nonlinearity='relu')

Parameter containing:
tensor([[ 1.2271e-01,  1.5361e-01,  7.1728e-01,  2.4654e-01,  1.5821e-01,
          1.7495e-01, -3.5083e-01, -1.1256e-01, -5.0050e-01,  1.5831e-01,
          3.2704e-01, -8.2839e-04,  7.4599e-03, -8.7174e-02,  4.8401e-01,
          3.3883e-01, -4.6170e-01, -2.4134e-01,  1.3773e-01,  3.3134e-01,
         -2.4670e-01, -5.9736e-02,  9.9417e-02,  2.0570e-01, -4.5248e-01,
         -7.1312e-02,  8.1014e-02, -1.7555e-01, -1.8962e-01,  2.6325e-01],
        [ 5.0884e-01,  5.3011e-02, -2.0857e-01,  4.4940e-01, -3.5871e-02,
         -3.0331e-01,  1.2250e-01, -8.2194e-02,  4.9385e-01, -2.4975e-01,
         -3.8192e-01, -1.1696e-01, -1.7959e-01, -1.3150e-01,  2.0878e-02,
         -8.1220e-02,  1.9213e-01,  5.4109e-02,  6.6847e-01, -4.2628e-01,
          1.3356e-01, -1.8348e-01,  3.8434e-01, -1.1151e-01,  4.1106e-01,
         -4.0289e-01, -2.0941e-01,  1.2020e-01,  6.8852e-02,  2.6252e-01],
        [ 1.2987e-01, -3.1421e-02,  3.0359e-01,  2.4791e-01,  3.3643e-01,
          2.02

In [25]:
nn.init.zeros_(model[4].bias)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.], requires_grad=True)

In [26]:
nn.init.kaiming_normal_(model[8].weight, nonlinearity='relu')

Parameter containing:
tensor([[-3.1620e-01, -2.9850e-01, -1.8550e-01, -4.8472e-01, -1.8880e-01,
         -1.3336e-01,  1.6449e-01,  1.5265e-01,  3.7138e-01,  1.6677e-01,
         -3.8458e-02, -1.0226e-01, -3.0172e-03,  2.1950e-01, -9.5210e-02,
          2.7215e-01,  3.0576e-01, -4.0617e-01,  5.8847e-01,  3.2591e-01,
         -2.4866e-01, -1.7978e-01, -1.6275e-02,  3.1306e-01,  6.8883e-01,
         -1.8103e-01,  1.3756e-01,  4.3656e-02,  1.7724e-01, -2.4748e-01],
        [-2.3037e-01, -5.6069e-02,  7.1278e-03,  4.3664e-01,  2.8411e-02,
         -3.4714e-01, -9.5276e-02,  1.0355e-01,  7.1247e-03, -2.4335e-01,
         -3.1861e-02,  1.0826e-01,  4.8880e-01, -1.5100e-01, -2.3466e-01,
          6.8257e-01, -7.6678e-02,  2.6314e-02,  1.8901e-01,  1.6176e-01,
         -6.9334e-02,  7.3661e-02,  8.7153e-03,  7.3125e-02, -2.4139e-01,
         -3.8425e-01,  3.1678e-02,  1.6758e-01, -4.7865e-02, -1.5231e-01],
        [ 1.6393e-01,  9.4833e-02, -1.3955e-01,  3.9118e-01, -2.0475e-01,
          2.25

In [27]:
nn.init.zeros_(model[8].bias)

Parameter containing:
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
        0., 0., 0., 0., 0., 0.], requires_grad=True)

In [28]:
nn.init.xavier_normal_(model[12].weight)

Parameter containing:
tensor([[-0.3756,  0.3960,  0.5266,  0.1795, -0.0534,  0.2441,  0.4888, -0.1222,
          0.1260, -0.2569,  0.2241,  0.0637,  0.2251, -0.2995,  0.7065,  0.3869,
         -0.1814, -0.0340, -0.2247,  0.0124,  0.1624, -0.5891,  0.2028, -0.0649,
          0.5487, -0.2455, -0.4916, -0.0107, -0.4120,  0.7142]],
       requires_grad=True)

In [29]:
nn.init.zeros_(model[12].bias)

Parameter containing:
tensor([0.], requires_grad=True)

In [30]:
optimizer = optim.Adam(model.parameters(), lr=0.005)
optimizer

Adam (
Parameter Group 0
    amsgrad: False
    betas: (0.9, 0.999)
    capturable: False
    differentiable: False
    eps: 1e-08
    foreach: None
    fused: None
    lr: 0.005
    maximize: False
    weight_decay: 0
)

In [31]:
loss_fn = torch.nn.BCELoss()
loss_fn

BCELoss()

In [32]:
BATCH_SIZE = 64

In [33]:
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)

In [34]:
train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [35]:
epochs = 1000

In [36]:
for epoch in range(epochs):
    epoch_loss = 0.0

    for X_batch, y_batch in train_loader:
        y_pred = model(X_batch)
        loss = loss_fn(y_pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()
        
    epoch_loss /= len(train_loader)

    if (epoch+1) % 100 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}")

Epoch 100/1000, Loss: 0.2368
Epoch 200/1000, Loss: 0.1792
Epoch 300/1000, Loss: 0.1945
Epoch 400/1000, Loss: 0.1718
Epoch 500/1000, Loss: 0.1764
Epoch 600/1000, Loss: 0.1636
Epoch 700/1000, Loss: 0.1692
Epoch 800/1000, Loss: 0.1760
Epoch 900/1000, Loss: 0.1732
Epoch 1000/1000, Loss: 0.1369


In [37]:
with torch.no_grad():
    model.eval()
    y_test_pred = model(X_test_tensor)
    y_test_pred_label = (y_test_pred >= 0.5).float()
    accuracy = (y_test_pred_label == y_test_tensor).float().mean()

    print(f"\n테스트 정확도: {accuracy.item() * 100}%")


테스트 정확도: 98.04878234863281%


In [38]:
new_patient_1 = pd.DataFrame({
    'age': [59],
    'sex': [0],
    'cp': [0],
    'trestbps': [98],
    'chol': [253],
    'fbs': [0],
    'restecg': [0],
    'thalach': [199],
    'exang': [0],
    'oldpeak': [1.0],
    'slope': [1],
    'ca': [0],
    'thal': [2]
})

new_patient_1

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,59,0,0,98,253,0,0,199,0,1.0,1,0,2


In [39]:
with torch.no_grad():
    model.eval()
    new_patient_1_norm = (new_patient_1 - X_mean) / X_std

    patient1_tensor = torch.tensor(new_patient_1_norm.values, dtype=torch.float32)

    pred1 = model(patient1_tensor)

    print(f"\n 환자 1의 심장병 예측 확률: {pred1.item():.4f}")


 환자 1의 심장병 예측 확률: 0.8942
