In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [2]:
data=pd.read_csv('train_set.csv')
data.head()

Unnamed: 0,age,relaxation,Gtp,triglyceride,eyesight(left),ALT,height(cm),weight(kg),waist(cm),hearing(left),Urine protein,systolic,HDL,hearing(right),hemoglobin,smoking
0,40,64,10,40,1.2,10,160,50,68.0,1,1,105,70,1,11.7,0
1,60,86,111,160,0.6,55,160,65,88.0,1,3,135,55,1,14.8,0
2,40,84,28,163,1.0,16,175,75,88.0,1,1,128,48,1,15.2,1
3,40,89,12,53,1.2,17,175,60,73.0,1,1,127,64,1,15.3,1
4,55,80,34,53,0.6,24,170,70,87.1,1,1,130,85,1,14.6,1


In [3]:
X=data.drop(['smoking'],axis=1)
y=data['smoking']


In [4]:
X.head()


Unnamed: 0,age,relaxation,Gtp,triglyceride,eyesight(left),ALT,height(cm),weight(kg),waist(cm),hearing(left),Urine protein,systolic,HDL,hearing(right),hemoglobin
0,40,64,10,40,1.2,10,160,50,68.0,1,1,105,70,1,11.7
1,60,86,111,160,0.6,55,160,65,88.0,1,3,135,55,1,14.8
2,40,84,28,163,1.0,16,175,75,88.0,1,1,128,48,1,15.2
3,40,89,12,53,1.2,17,175,60,73.0,1,1,127,64,1,15.3
4,55,80,34,53,0.6,24,170,70,87.1,1,1,130,85,1,14.6


In [5]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier


class BaggingClassifierMe:
    def __init__(self, n_estimators=100, max_depth=None):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.models = [DecisionTreeClassifier(max_depth=self.max_depth) for _ in range(n_estimators)]

    def fit(self, X, y):
        for model in self.models:
            indices = np.random.choice(len(X), len(X), replace=True)
            X_subset, y_subset = X.iloc[indices], y.iloc[indices]
            model.fit(X_subset, y_subset)

    def predict(self, X):

        predictions = np.zeros((len(X), self.n_estimators))
        for i, model in enumerate(self.models):
            predictions[:, i] = model.predict(X)

        # Calculate the average prediction across all models
        avg_predictions = np.mean(predictions, axis=1)

        # Apply threshold to convert to binary predictions
        binary_predictions = (avg_predictions >= 0.5).astype(int)

        return binary_predictions


def score(self, X, y):
    predictions = self.predict(X)
    accuracy = np.mean(predictions == y)
    return accuracy


def get_params(self, deep=True):
    return {'n_estimators': self.n_estimators, 'max_depth': self.max_depth}


def set_params(self, **parameters):
    for parameter, value in parameters.items():
        setattr(self, parameter, value)
    return self


In [6]:
class AdaBoostClassifierMe:
    def __init__(self, n_estimators=100, max_depth=1):
        self.n_estimators = n_estimators
        self.models = []
        self.alphas = []
        self.max_depth = max_depth

    def fit(self, X, y):
        # Initialize weights
        weights = np.ones(len(X)) / len(X)

        for _ in range(self.n_estimators):
            # Create a new model
            model = DecisionTreeClassifier(max_depth=self.max_depth)

            # Fit the model with weighted samples
            model.fit(X, y, sample_weight=weights)

            # Predictions of the current model
            predictions = model.predict(X)

            # Calculate error and alpha
            error = np.sum(weights * (predictions != y))
            alpha = 0.5 * np.log((1 - error) / max(error, 1e-10))

            # Update weights
            weights *= np.exp(-alpha * y * predictions)
            weights /= np.sum(weights)

            # Save the model and its corresponding alpha
            self.models.append(model)
            self.alphas.append(alpha)

    def predict(self, X):
        # Initialize predictions
        predictions = np.zeros(len(X))

        for model, alpha in zip(self.models, self.alphas):
            # Accumulate weighted predictions
            predictions += alpha * model.predict(X)

        # Apply sign function to get final predictions
        return np.sign(predictions)

In [7]:
class RandomForestClassifierMe:
    def __init__(self, n_estimators=100, max_features=None, max_depth=None):
        self.n_estimators = n_estimators
        self.max_features = max_features
        self.max_depth = max_depth  
        self.models = [DecisionTreeClassifier(max_features=max_features, max_depth=max_depth) for _ in range(n_estimators)]
        
    def fit(self, X, y):
        for model in self.models:
            indices = np.random.choice(len(X), len(X), replace=True)
            X_subset, y_subset = X.iloc[indices], y.iloc[indices]
            model.fit(X_subset, y_subset)
    
    def predict(self, X, threshold=0.5):
        predictions = np.zeros((len(X), self.n_estimators))
        for i, model in enumerate(self.models):
             predictions[:, i] = model.predict(X)
    
        # Calculate the average prediction across all models
        avg_predictions = np.mean(predictions, axis=1)
    
        # Apply threshold to convert to binary predictions
        binary_predictions = (avg_predictions >= threshold).astype(int)
    
        return binary_predictions

In [8]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Instantiate and fit each model
bagging_model = BaggingClassifierMe(n_estimators=1000, max_depth=5)
bagging_model.fit(X_train, y_train)

boosting_model = AdaBoostClassifierMe(n_estimators=1000, max_depth=5)
boosting_model.fit(X_train, y_train)

random_forest_model = RandomForestClassifierMe(n_estimators=1000, max_depth=5)
random_forest_model.fit(X_train, y_train)

# Make predictions
bagging_predictions = bagging_model.predict(X_test)
boosting_predictions = boosting_model.predict(X_test)
random_forest_predictions = random_forest_model.predict(X_test)

# Evaluate the models
bagging_accuracy = accuracy_score(y_test, bagging_predictions)
boosting_accuracy = accuracy_score(y_test, boosting_predictions)
random_forest_accuracy = accuracy_score(y_test, random_forest_predictions)

print(f"Bagging Accuracy: {bagging_accuracy}")
print(f"Boosting Accuracy: {boosting_accuracy}")
print(f"Random Forest Accuracy: {random_forest_accuracy}")

Bagging Accuracy: 0.7545253863134658
Boosting Accuracy: 0.7499141525631592
Random Forest Accuracy: 0.7545744419916606
