In [1]:
!pip install scikit-learn



In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import models, layers

In [3]:
df = pd.read_csv('heart_disease.csv')
df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0,2,1
1021,60,1,0,125,258,0,0,141,1,2.8,1,1,3,0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1,2,0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0,2,1


In [4]:
number_columns = df.select_dtypes(include='number').columns.tolist()
number_columns

['age',
 'sex',
 'cp',
 'trestbps',
 'chol',
 'fbs',
 'restecg',
 'thalach',
 'exang',
 'oldpeak',
 'slope',
 'ca',
 'thal',
 'target']

In [5]:
len(number_columns)

14

In [6]:
for col in number_columns:
    if col == "fbs":
        continue

    Q1 = df[col].quantile(0.25)
    Q3 = df[col].quantile(0.75)

    IQR = Q3 - Q1

    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR

    df[col] = df[col].clip(lower=lower_bound, upper=upper_bound)

In [7]:
X = df.loc[:, "age":"thal"]
X

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,52,1,0,125,212,0,1,168,0,1.0,2,2.0,3.0
1,53,1,0,140,203,1,0,155,1,3.1,0,0.0,3.0
2,70,1,0,145,174,0,1,125,1,2.6,0,0.0,3.0
3,61,1,0,148,203,0,1,161,0,0.0,2,1.0,3.0
4,62,0,0,138,294,1,1,106,0,1.9,1,2.5,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,59,1,1,140,221,0,1,164,1,0.0,2,0.0,2.0
1021,60,1,0,125,258,0,0,141,1,2.8,1,1.0,3.0
1022,47,1,0,110,275,0,0,118,1,1.0,1,1.0,2.0
1023,50,0,0,110,254,0,0,159,0,0.0,2,0.0,2.0


In [8]:
y = df[['target']]
y

Unnamed: 0,target
0,0
1,0
2,0
3,0
4,0
...,...
1020,1
1021,0
1022,0
1023,1


In [9]:
X_mean = X.mean()
X_mean

age          54.434146
sex           0.695610
cp            0.942439
trestbps    131.260488
chol        244.981463
fbs           0.149268
restecg       0.529756
thalach     149.153171
exang         0.336585
oldpeak       1.062244
slope         1.385366
ca            0.694146
thal          2.327317
dtype: float64

In [10]:
X_std = X.std()
X_std

age          9.072290
sex          0.460373
cp           1.029641
trestbps    16.532208
chol        47.746162
fbs          0.356527
restecg      0.527878
thalach     22.881210
exang        0.472772
oldpeak      1.141865
slope        0.617755
ca           0.890414
thal         0.609123
dtype: float64

In [11]:
X_norm = (X - X_mean) / X_std
X_norm

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,-0.268306,0.661181,-0.915309,-0.378684,-0.690767,-0.418674,0.890820,0.823682,-0.711940,-0.054511,0.994948,1.466569,1.104347
1,-0.158080,0.661181,-0.915309,0.528635,-0.879264,2.386166,-1.003559,0.255530,1.403243,1.784585,-2.242580,-0.779577,1.104347
2,1.715758,0.661181,-0.915309,0.831075,-1.486642,-0.418674,0.890820,-1.055590,1.403243,1.346705,-2.242580,-0.779577,1.104347
3,0.723726,0.661181,-0.915309,1.012539,-0.879264,-0.418674,0.890820,0.517754,-0.711940,-0.930271,0.994948,0.343496,1.104347
4,0.833952,-1.510969,-0.915309,0.407660,1.026649,2.386166,0.890820,-1.885965,-0.711940,0.733673,-0.623816,2.028106,-0.537358
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1020,0.503275,0.661181,0.055904,0.528635,-0.502270,-0.418674,0.890820,0.648866,1.403243,-0.930271,0.994948,-0.779577,-0.537358
1021,0.613500,0.661181,-0.915309,-0.378684,0.272661,-0.418674,-1.003559,-0.356326,1.403243,1.521857,-0.623816,0.343496,1.104347
1022,-0.819434,0.661181,-0.915309,-1.286004,0.628711,-0.418674,-1.003559,-1.361518,1.403243,-0.054511,-0.623816,0.343496,-0.537358
1023,-0.488757,-1.510969,-0.915309,-1.286004,0.188885,-0.418674,-1.003559,0.430346,-0.711940,-0.930271,0.994948,-0.779577,-0.537358


In [12]:
X_train, X_test, y_train, y_test = train_test_split(
    X_norm, y, test_size=0.2, random_state=42
)

In [13]:
X_train

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
835,-0.598983,0.661181,1.027116,-0.802100,-2.010245,-0.418674,-1.003559,-1.011886,-0.711940,-0.229663,0.994948,2.028106,-0.537358
137,1.054403,-1.510969,-0.915309,2.343275,1.675916,-0.418674,0.890820,0.211826,1.403243,-0.930271,0.994948,-0.779577,-0.537358
534,-0.047854,-1.510969,1.027116,-1.406980,0.461158,-0.418674,-1.003559,0.779978,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
495,0.503275,0.661181,-0.915309,0.226196,-0.229997,-0.418674,0.890820,0.517754,-0.711940,-0.492391,-0.623816,-0.779577,1.104347
244,-0.378531,0.661181,1.027116,-0.378684,0.000388,2.386166,-1.003559,0.736274,-0.711940,1.171553,-0.623816,-0.779577,-0.537358
...,...,...,...,...,...,...,...,...,...,...,...,...,...
700,-1.480789,0.661181,1.027116,-0.076244,-0.648879,-0.418674,-1.003559,0.823682,-0.711940,0.821249,-0.623816,-0.779577,-0.537358
71,0.723726,0.661181,-0.915309,0.528635,-0.795487,-0.418674,-1.003559,-0.487438,1.403243,0.733673,0.994948,0.343496,1.104347
106,-0.378531,0.661181,-0.915309,0.528635,1.131369,-0.418674,0.890820,1.042201,1.403243,0.470945,0.994948,-0.779577,1.104347
270,-1.260337,0.661181,-0.915309,-1.286004,-0.711711,-0.418674,0.890820,0.517754,-0.711940,-0.930271,0.994948,-0.779577,1.104347


In [14]:
y_train

Unnamed: 0,target
835,0
137,1
534,1
495,1
244,1
...,...
700,1
71,0
106,0
270,1


In [15]:
X_test

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
527,0.833952,-1.510969,-0.915309,-0.439172,-0.753599,-0.418674,0.890820,0.605162,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
359,-0.158080,-1.510969,1.027116,-0.197220,-0.606990,-0.418674,-1.003559,-1.492630,-0.711940,-0.930271,0.994948,-0.779577,-2.999917
447,0.062372,0.661181,-0.915309,1.738395,0.921928,-0.418674,-1.003559,-0.181510,1.403243,-0.229663,-0.623816,0.343496,1.104347
31,-0.488757,-1.510969,0.055904,-0.681124,-0.020556,-0.418674,0.890820,0.561458,-0.711940,0.033065,0.994948,-0.779577,-0.537358
621,-0.709209,0.661181,-0.915309,-0.076244,0.230773,2.386166,-1.003559,0.037010,1.403243,-0.930271,0.994948,1.466569,1.104347
...,...,...,...,...,...,...,...,...,...,...,...,...,...
832,1.495306,0.661181,1.027116,-0.802100,0.670599,-0.418674,0.890820,0.080714,-0.711940,-0.054511,0.994948,0.343496,1.104347
796,-1.480789,0.661181,0.055904,0.226196,-0.879264,-0.418674,0.890820,-0.749662,-0.711940,-0.930271,-0.623816,-0.779577,-2.179064
644,-1.150112,0.661181,1.027116,-0.681124,-0.397550,-0.418674,0.890820,0.867385,-0.711940,-0.930271,0.994948,-0.779577,-0.537358
404,0.723726,0.661181,-0.915309,0.528635,-0.795487,-0.418674,-1.003559,-0.487438,1.403243,0.733673,0.994948,0.343496,1.104347


In [16]:
y_test

Unnamed: 0,target
527,1
359,1
447,0
31,1
621,0
...,...
832,1
796,1
644,1
404,0


In [17]:
model = models.Sequential([
    layers.Dense(30, input_dim=X.shape[1],
                 kernel_initializer=tf.keras.initializers.HeNormal()),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Dropout(0.5),

    layers.Dense(30, kernel_initializer=tf.keras.initializers.HeNormal()),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Dropout(0.5),

    layers.Dense(30, kernel_initializer=tf.keras.initializers.HeNormal()),
    layers.BatchNormalization(),
    layers.ReLU(),
    layers.Dropout(0.5),

    layers.Dense(1, activation='sigmoid', kernel_initializer=tf.keras.initializers.GlorotNormal())
])

In [18]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss='binary_crossentropy',
    metrics=['accuracy']
)

In [19]:
model.fit(
    X_train,
    y_train,
    epochs=1000,
    batch_size=64,
    verbose=1,
    validation_data=(X_test, y_test)
)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x29c0a8ab350>

In [20]:
loss, acc = model.evaluate(X_test, y_test)

print(f"\n테스트 정확도: {acc * 100:.4f}%")

print(f"\ncost: {loss:.4f}")


테스트 정확도: 93.1707%

cost: 0.1159


In [21]:
new_patient_1 = pd.DataFrame({
    'age': [59],
    'sex': [0],
    'cp': [0],
    'trestbps': [98],
    'chol': [253],
    'fbs': [0],
    'restecg': [0],
    'thalach': [119],
    'exang': [0],
    'oldpeak': [1.0],
    'slope': [1],
    'ca': [0],
    'thal': [2]
})

new_patient_1

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,59,0,0,98,253,0,0,119,0,1.0,1,0,2


In [22]:
new_patient_1_scaled = (new_patient_1 - X_mean) / X_std

new_patient_1_scaled

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,0.503275,-1.510969,-0.915309,-2.01186,0.167941,-0.418674,-1.003559,-1.317814,-0.71194,-0.054511,-0.623816,-0.779577,-0.537358


In [23]:
pred_prob = model.predict(new_patient_1_scaled)

print(f"\n환자 1의 심장병 예측 확률: {pred_prob}")


환자 1의 심장병 예측 확률: [[0.9870695]]
