In [None]:
%pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
import pandas as pd
from ucimlrepo import fetch_ucirepo

# Fetch dataset
heart_disease = fetch_ucirepo(id=45)

# Extract features and target
X = heart_disease.data.features
y = heart_disease.data.targets

# Save to CSV
X.to_csv("heart_disease_features.csv", index=False)
y.to_csv("heart_disease_targets.csv", index=False)


In [None]:
features_path = "heart_disease_features.csv"
targets_path = "heart_disease_targets.csv"

features_df = pd.read_csv(features_path)
targets_df = pd.read_csv(targets_path)

# Display basic info of the datasets
features_df.info(), targets_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 13 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        299 non-null    float64
 12  thal      301 non-null    float64
dtypes: float64(3), int64(10)
memory usage: 30.9 KB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   num     303 non-null    int64
dtypes: int64(1)
memory usage: 2.5 KB


(None, None)

In [None]:
print(features_df.isnull().sum())  # Check missing values in features
print(targets_df.isnull().sum())  # Check missing values in target


age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          4
thal        2
dtype: int64
num    0
dtype: int64


In [None]:
features_df

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal
0,63,1,1,145,233,1,2,150,0,2.3,3,0.0,6.0
1,67,1,4,160,286,0,2,108,1,1.5,2,3.0,3.0
2,67,1,4,120,229,0,2,129,1,2.6,2,2.0,7.0
3,37,1,3,130,250,0,0,187,0,3.5,3,0.0,3.0
4,41,0,2,130,204,0,2,172,0,1.4,1,0.0,3.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
298,45,1,1,110,264,0,0,132,0,1.2,2,0.0,7.0
299,68,1,4,144,193,1,0,141,0,3.4,2,2.0,7.0
300,57,1,4,130,131,0,0,115,1,1.2,2,1.0,7.0
301,57,0,2,130,236,0,2,174,0,0.0,2,1.0,3.0


In [None]:
targets_df

Unnamed: 0,num
0,0
1,2
2,1
3,0
4,0
...,...
298,1
299,2
300,3
301,1


In [None]:
targets_df[('num')].value_counts()

Unnamed: 0_level_0,count
num,Unnamed: 1_level_1
0,164
1,55
2,36
3,35
4,13


# Data Preprocessing

In [None]:
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import numpy as np

# Handling missing values using mean imputation
imputer = SimpleImputer(strategy="mean")
features_df.iloc[:, :] = imputer.fit_transform(features_df)

# Normalize numerical features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features_df)

# Convert target to binary classification (assuming 0 = no disease, 1 = disease)
targets = np.where(targets_df.values.flatten() > 0, 1, 0)

# Split data into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(features_scaled, targets, test_size=0.2, random_state=42)


X_train.shape, X_test.shape, y_train.shape, y_test.shape


((242, 13), (61, 13), (242,), (61,))

# ANN

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

def create_ann(hidden_layers=[16, 8], activation='relu', optimizer='adam', learning_rate=0.001):
    model = keras.Sequential()

    model.add(layers.Dense(hidden_layers[0], activation=activation, input_shape=(X_train.shape[1],)))

    for nodes in hidden_layers[1:]:
        model.add(layers.Dense(nodes, activation=activation))

    model.add(layers.Dense(1, activation='sigmoid'))

    if optimizer == 'adam':
        opt = keras.optimizers.Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = keras.optimizers.SGD(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = keras.optimizers.RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer. Choose from: 'adam', 'sgd', 'rmsprop'.")

    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model

ann_model = create_ann(hidden_layers=[32, 16], activation='relu', optimizer='adam', learning_rate=0.001)
history = ann_model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=1)

# Evaluate model performance
test_loss, test_accuracy = ann_model.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {test_accuracy:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 94ms/step - accuracy: 0.5076 - loss: 0.8521 - val_accuracy: 0.4754 - val_loss: 0.7912
Epoch 2/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.5256 - loss: 0.7135 - val_accuracy: 0.5738 - val_loss: 0.6852
Epoch 3/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6784 - loss: 0.6168 - val_accuracy: 0.6230 - val_loss: 0.6061
Epoch 4/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7558 - loss: 0.5532 - val_accuracy: 0.7377 - val_loss: 0.5473
Epoch 5/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.8246 - loss: 0.4966 - val_accuracy: 0.8197 - val_loss: 0.4978
Epoch 6/50
[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - accuracy: 0.7752 - loss: 0.4986 - val_accuracy: 0.8361 - val_loss: 0.4562
Epoch 7/50
[1m16/16[0m [32m━━━━━━━━━

# Define a Tuning Function

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam, SGD, RMSprop
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import numpy as np

def create_ann(hidden_layers=[32, 16], activation='relu', optimizer='adam', learning_rate=0.001):
    model = keras.Sequential()

    model.add(layers.Dense(hidden_layers[0], activation=activation, input_shape=(X_train.shape[1],)))

    for nodes in hidden_layers[1:]:
        model.add(layers.Dense(nodes, activation=activation))

    model.add(layers.Dense(1, activation='sigmoid'))

    if optimizer == 'adam':
        opt = Adam(learning_rate=learning_rate)
    elif optimizer == 'sgd':
        opt = SGD(learning_rate=learning_rate)
    elif optimizer == 'rmsprop':
        opt = RMSprop(learning_rate=learning_rate)
    else:
        raise ValueError("Unsupported optimizer. Choose from: 'adam', 'sgd', 'rmsprop'.")

    model.compile(optimizer=opt, loss='binary_crossentropy', metrics=['accuracy'])
    return model


# Define Different Hyperparameter Values for Tuning

In [None]:
hidden_layer_options = [[32, 16], [64, 32, 16], [128, 64, 32]]
activation_options = ['relu', 'tanh', 'sigmoid']
optimizer_options = ['adam', 'sgd', 'rmsprop']
learning_rate_options = [0.01, 0.001, 0.0005]


# Run Hyperparameter Tuning (Grid Search)

In [None]:
from itertools import product

best_accuracy = 0
best_config = {}

for hidden_layers, activation, optimizer, learning_rate in product(hidden_layer_options, activation_options, optimizer_options, learning_rate_options):

    print(f"Testing Configuration: Layers {hidden_layers}, Activation {activation}, Optimizer {optimizer}, Learning Rate {learning_rate}")

    ann_model = create_ann(hidden_layers=hidden_layers, activation=activation, optimizer=optimizer, learning_rate=learning_rate)

    history = ann_model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=0)

    test_loss, test_accuracy = ann_model.evaluate(X_test, y_test, verbose=0)

    print(f"Test Accuracy: {test_accuracy:.4f}")

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_config = {
            "hidden_layers": hidden_layers,
            "activation": activation,
            "optimizer": optimizer,
            "learning_rate": learning_rate
        }

print("\n Best Configuration Found:")
print(best_config)
print(f"Best Test Accuracy: {best_accuracy:.4f}")


Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.01
Test Accuracy: 0.8525
Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.001
Test Accuracy: 0.8852
Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.0005
Test Accuracy: 0.8525
Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.01
Test Accuracy: 0.8852
Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.001
Test Accuracy: 0.7541
Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.0005
Test Accuracy: 0.8361
Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Learning Rate 0.01
Test Accuracy: 0.8197
Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Learning Rate 0.001
Test Accuracy: 0.8525
Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Le

In [None]:
import pandas as pd
from itertools import product

results = []

best_accuracy = 0
best_config = {}

for hidden_layers, activation, optimizer, learning_rate in product(hidden_layer_options, activation_options, optimizer_options, learning_rate_options):

    print(f"Testing Configuration: Layers {hidden_layers}, Activation {activation}, Optimizer {optimizer}, Learning Rate {learning_rate}")

    ann_model = create_ann(hidden_layers=hidden_layers, activation=activation, optimizer=optimizer, learning_rate=learning_rate)

    history = ann_model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=0)

    test_loss, test_accuracy = ann_model.evaluate(X_test, y_test, verbose=0)

    results.append({
        "hidden_layers": str(hidden_layers),
        "activation": activation,
        "optimizer": optimizer,
        "learning_rate": learning_rate,
        "test_accuracy": test_accuracy
    })

    print(f"Test Accuracy: {test_accuracy:.4f}\n")

    if test_accuracy > best_accuracy:
        best_accuracy = test_accuracy
        best_config = {
            "hidden_layers": hidden_layers,
            "activation": activation,
            "optimizer": optimizer,
            "learning_rate": learning_rate
        }

results_df = pd.DataFrame(results)

results_csv_path = "ann_tuning_results.csv"
results_df.to_csv(results_csv_path, index=False)

print("\n Best Configuration Found:")
print(best_config)
print(f" Best Test Accuracy: {best_accuracy:.4f}")



Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.01


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Test Accuracy: 0.8033

Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.001
Test Accuracy: 0.8525

Testing Configuration: Layers [32, 16], Activation relu, Optimizer adam, Learning Rate 0.0005
Test Accuracy: 0.8689

Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.01
Test Accuracy: 0.8852

Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.001
Test Accuracy: 0.8525

Testing Configuration: Layers [32, 16], Activation relu, Optimizer sgd, Learning Rate 0.0005
Test Accuracy: 0.5410

Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Learning Rate 0.01
Test Accuracy: 0.7541

Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Learning Rate 0.001
Test Accuracy: 0.8525

Testing Configuration: Layers [32, 16], Activation relu, Optimizer rmsprop, Learning Rate 0.0005
Test Accuracy: 0.8689

Testing Configuration: Layers [32, 16], Ac

Hasil test accuracy yang didapatkan bervariasi antara 47%-93% dengan rata rata 81%
Konfigurasi terbaik didapatkan dengan kombinasi:
{'hidden_layers': [128, 64, 32], 'activation': 'tanh', 'optimizer': 'rmsprop', 'learning_rate': 0.01}
 Best Test Accuracy: 0.9344

Dengan melihat beberapa tes lainya paramater yang konsisten terhadap performa model adalah activation tanh, sementara parameter lainnya seperti optimizer masih dapat berubah antara rmsprop dan adam.


# Evaluation of Best Model

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
best_ann_model = create_ann(hidden_layers=[128, 64, 32], activation='tanh', optimizer='rmsprop', learning_rate=0.01)

best_ann_model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test), verbose=0)

y_pred_probs = best_ann_model.predict(X_test)
y_pred = (y_pred_probs > 0.5).astype(int)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print(f"Best Model Evaluation Metrics:")
print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 217ms/step
Best Model Evaluation Metrics:
Accuracy: 0.9344
Precision: 0.9118
Recall: 0.9688
F1 Score: 0.9394


# Save the best model

In [None]:
best_ann_model.save("best_heart_disease_model.h5")


