# Import Libraries


In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from xgboost import XGBClassifier
from sklearn.datasets import fetch_openml

# Load the heart disease dataset using fetch_openml


In [5]:
# Load the heart disease dataset using fetch_openml
heart_data = fetch_openml(name='heart', version=1, as_frame=False)



In [6]:
# Check the shape of the data and target
print(f"Shape of data: {heart_data.data.shape}")
print(f"Shape of target: {heart_data.target.shape}")


Shape of data: (270, 13)
Shape of target: (270,)


In [8]:
print(f"Number of columns in data: {heart_data.data.shape[1]}")
print(f"Number of feature names: {len(heart_data.feature_names)}")


Number of columns in data: 13
Number of feature names: 13


In [9]:
print(f"Data shape: {heart_data.data.shape}")
print(f"Feature names: {heart_data.feature_names}")
print(f"First row of data: {heart_data.data[0]}")
print(f"Data type: {type(heart_data.data)}")


Data shape: (270, 13)
Feature names: ['att_1', 'att_2', 'att_3', 'att_4', 'att_5', 'att_6', 'att_7', 'att_8', 'att_9', 'att_10', 'att_11', 'att_12', 'att_13']
First row of data:   (0, 0)	0.708333
  (0, 1)	1.0
  (0, 2)	1.0
  (0, 3)	-0.320755
  (0, 4)	-0.105023
  (0, 5)	-1.0
  (0, 6)	1.0
  (0, 7)	-0.419847
  (0, 8)	-1.0
  (0, 9)	-0.225806
  (0, 11)	1.0
  (0, 12)	-1.0
Data type: <class 'scipy.sparse._csr.csr_matrix'>


# Splitting Data into Features (X) and Target (y)


In [10]:
X = heart_data.data
y = heart_data.target
y = (y + 1) / 2

# Splitting Data into Training and Testing Sets


In [11]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Defining Hyperparameters


In [12]:
learning_rates = [0.01, 0.1]
max_depths = [3, 6]
n_estimators_list = [50, 100]

# Model Evaluation Loop

In [13]:
results = []

for rate_learning in learning_rates:
    for depth_max in max_depths:
        for estimators_n in n_estimators_list:
            # Create XGBoost model
            model = XGBClassifier(
                learning_rate=rate_learning,
                max_depth=depth_max,
                n_estimators=estimators_n,
                random_state=42
            )

            # Train the model
            model.fit(X_train, y_train)

            # Predictions on training and testing sets
            y_train_pred = model.predict(X_train)
            y_test_pred = model.predict(X_test)

            # Calculate accuracy for training and testing sets
            train_accuracy = accuracy_score(y_train, y_train_pred)
            test_accuracy = accuracy_score(y_test, y_test_pred)

            # Save results
            results.append({
                'learning_rate': rate_learning,
                'max_depth': depth_max,
                'n_estimators': estimators_n,
                'train_accuracy': train_accuracy,
                'test_accuracy': test_accuracy
            })


# Displaying Results


In [14]:
result_df = pd.DataFrame(results)
print(result_df)


   learning_rate  max_depth  n_estimators  train_accuracy  test_accuracy
0           0.01          3            50        0.861111       0.759259
1           0.01          3           100        0.898148       0.777778
2           0.01          6            50        0.907407       0.777778
3           0.01          6           100        0.958333       0.740741
4           0.10          3            50        0.953704       0.870370
5           0.10          3           100        0.976852       0.888889
6           0.10          6            50        0.995370       0.851852
7           0.10          6           100        1.000000       0.851852
