In [None]:
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import BaggingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix

In [None]:
df = pd.read_csv("../dataset/heart.csv")

In [None]:

# Initialize LabelEncoder.
label_encoder = LabelEncoder()

# Select columns with categorical data.
categorical_columns = df.select_dtypes(include=['object']).columns

# Apply label encoding to each categorical column.
for column in categorical_columns:
    df[column] = label_encoder.fit_transform(df[column])

In [None]:

X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [None]:
# Base model.
knn = KNeighborsClassifier(n_neighbors=5)

# Bagging ensemble.
ensemble_knn = BaggingClassifier(
    estimator=knn,
    n_estimators=10,         
    max_samples=0.8,  # sample 80% of training data for each base model.
    bootstrap=True,
    random_state=42
)

ensemble_knn.fit(X_train, y_train)

y_pred = ensemble_knn.predict(X_test)

print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))