In [11]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt


from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from xgboost import XGBClassifier

from sklearn.metrics import accuracy_score
from itertools import product

df = pd.read_csv('heart.csv')

df.head()


Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [2]:
# extract features and split
X = df[['restecg', 'oldpeak']].copy()
y = df['target'].copy()

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# first split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)

# second split
X_train_main, X_val, y_train_main, y_val = train_test_split(X_train, y_train, test_size=0.3, random_state=0)



In [5]:
def make_estimators(max_depth=3):
    return [
        ('dt', DecisionTreeClassifier(max_depth=max_depth, random_state=0)),
        ('knn', KNeighborsClassifier(n_neighbors =5)),
        ('rf', RandomForestClassifier(max_depth=max_depth, random_state=0)),
        ('xgb', XGBClassifier(max_depth=max_depth, eval_metric='logloss', random_state=0))
    ]

estimators = make_estimators()

In [8]:
# compare soft voting and hard voting
voting_soft = VotingClassifier(estimators=estimators, voting='soft')
voting_hard = VotingClassifier(estimators=estimators, voting='hard')

voting_soft.fit(X_train_main, y_train_main)
voting_hard.fit(X_train_main, y_train_main)

accsoft = accuracy_score(y_val, voting_soft.predict(X_val))
acchard = accuracy_score(y_val, voting_hard.predict(X_val))

print(f"Validation Accuracy → Soft: {accsoft:.3f}, Hard: {acchard:.3f}")

Validation Accuracy → Soft: 0.724, Hard: 0.687


In [12]:
# find best weights using validatoin score
best_acc = 0
best_weights = None
best_vote = None

for vote in ['soft','hard']:
    for w in product([1,2,3], repeat=4):
        vc = VotingClassifier(estimators=estimators, voting=vote, weights=w)
        vc.fit(X_train_main, y_train_main)
        acc = accuracy_score(y_val, vc.predict(X_val))
        if acc > best_acc:
            best_acc = acc
            best_weights = w
            best_vote = vote

print(f"Best Voting: {best_vote}, Best Weights: {best_weights}, Validation Acc: {best_acc:.3f}")

Best Voting: soft, Best Weights: (1, 2, 1, 3), Validation Acc: 0.732
