In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
import numpy as np

In [2]:
df = pd.read_csv('/content/heart_2020_cleaned.csv')
df.head()

Unnamed: 0,HeartDisease,BMI,Smoking,AlcoholDrinking,Stroke,PhysicalHealth,MentalHealth,DiffWalking,Sex,AgeCategory,Race,Diabetic,PhysicalActivity,GenHealth,SleepTime,Asthma,KidneyDisease,SkinCancer
0,No,16.6,Yes,No,No,3.0,30.0,No,Female,55-59,White,Yes,Yes,Very good,5.0,Yes,No,Yes
1,No,20.34,No,No,Yes,0.0,0.0,No,Female,80 or older,White,No,Yes,Very good,7.0,No,No,No
2,No,26.58,Yes,No,No,20.0,30.0,No,Male,65-69,White,Yes,Yes,Fair,8.0,Yes,No,No
3,No,24.21,No,No,No,0.0,0.0,No,Female,75-79,White,No,No,Good,6.0,No,No,Yes
4,No,23.71,No,No,No,28.0,0.0,Yes,Female,40-44,White,No,Yes,Very good,8.0,No,No,No


In [3]:
df.shape

(319795, 18)

In [4]:
label_encoders = {}
for column in df.select_dtypes(include=['object']).columns:
    label_encoders[column] = LabelEncoder()
    df[column] = label_encoders[column].fit_transform(df[column])

In [5]:
X = df.drop('HeartDisease', axis=1)
y = df['HeartDisease']

In [7]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [8]:
class AdaBoost:
    def __init__(self, n_estimators=5):
        self.n_estimators = n_estimators
        self.models = []
        self.alphas = []

    def fit(self, X, y):
        n = len(X)
        weights = np.ones(n) / n

        for _ in range(self.n_estimators):
            model = LogisticRegression()
            model.fit(X, y, sample_weight=weights)

            predictions = model.predict(X)
            error = np.sum(weights * (predictions != y))

            alpha = 0.5 * np.log((1 - error) / max(error, 1e-10))

            weights *= np.exp(-alpha * y * predictions)
            weights /= np.sum(weights)

            self.models.append(model)
            self.alphas.append(alpha)

    def predict(self, X):
        preds = np.zeros(len(X))
        for alpha, model in zip(self.alphas, self.models):
            preds += alpha * model.predict(X)
        return np.sign(preds)

In [9]:
adaboost = AdaBoost(n_estimators=5)

In [10]:
adaboost.fit(X_train, y_train)

In [11]:
predictions = adaboost.predict(X_test)

In [12]:
accuracy = accuracy_score(y_test, predictions)

In [13]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9125689895089042
