### AdaBoost Implementation

AdaBoost is an Boosting algorithm which means that we will take a weak learner model which in this case will be a Decision Stumps and then using those do upsampling and proceed to the consecutive steps

### Components

#### 1- Weak Learners
    This will be a model whose accuracy will be slightly greater than 50%

#### 2- Decision Stumps
    This will be a decision tree whose maxdepth will be only 1 which means a single split
    

### Steps Followed
    
##### 1- Prepare a Decision stump for the given data and calulate the error and the error rate(Alpha)
    
            error  = summation of misclassified points
            error_rate(alpha) = 0.5*log((1-error)/error)
    
##### 2- Update the weight and then normalize it
            
            Misclassified
                new_wt = wt*exp(alpha)
            Correctly Classified
                new_wt = wt*exp(-alpha)
                
##### 3- Creating the range for the dataset

                upper_limit = cumsum(normalized_wt)
                lower_limit = upper_limit - normalized_wt
    
##### 4- Create the new Dataset

                Generating the new number using random and take the indices in which this lies and there is high chance these will lies in the wider range and the wider ranges will be for misclassified points
                
                Using those we will create the dataset
                
##### 5- Inference 
                The inference will done on those all decision stumps which have been trained and their sign values will be taken
            
           

In [2]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

In [20]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier

class AdaBoost:
    
    def __init__(self, n_estimators=20):
        self.n_estimators = n_estimators
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Initializing weights
        weights = np.ones(n_samples) / n_samples
        
        # To store classifiers
        self.clfs = []
        
        # to store the alpha values for the final predicion
        self.alphas = []
        
        for _ in range(self.n_estimators):
            
            # Train decision stump
            clf = DecisionTreeClassifier(max_depth=1)
            clf.fit(X, y, sample_weight=weights)
            y_pred = clf.predict(X)
            
            
            # Calculate error and alpha (classifier weight)
            error = np.sum(weights * (y_pred != y)) / np.sum(weights)
            alpha = 0.5 * np.log((1 - error) / (error + 1e-10)) 
            
            # Update weights and normalize them
            for i in range(n_samples):
                # Misclassified sample
                if y[i] != y_pred[i]:  
                    weights[i] *= np.exp(alpha)
                else: 
                    weights[i] *= np.exp(-alpha)
            
            # Normalize weights
            weights /= np.sum(weights)  
            
            # Store classifier and its alpha
            self.clfs.append(clf)
            self.alphas.append(alpha)
    
    def predict(self, X):
        # Initialize array to store final predictions
        final_pred = np.zeros(X.shape[0])
        
        # Aggregate predictions from all classifiers
        for alpha, clf in zip(self.alphas, self.clfs):
            final_pred += alpha * clf.predict(X)
        
        # Return the sign of the aggregated predictions
        return np.sign(final_pred)


### Adaboost implementation for the multiclass Classification

Induces the Label Encoding for the Multiclass classification

In [29]:
import numpy as np
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

class AdaBoost:
    def __init__(self, n_estimators=20):
        self.n_estimators = n_estimators
    
    def fit(self, X, y):
        n_samples, n_features = X.shape
        
        # Encode labels to handle multiclass classification using OvR (One-vs-Rest)
        self.le = LabelEncoder()
        y = self.le.fit_transform(y)
        
        # Initialize weights uniformly
        weights = np.ones(n_samples) / n_samples
        
        # To store classifiers and their corresponding alpha values
        self.clfs = []
        self.alphas = []
        
        for _ in range(self.n_estimators):
            # Train weak classifier
            clf = DecisionTreeClassifier(max_depth=1)
            clf.fit(X, y, sample_weight=weights)
            
            # Get predictions
            y_pred = clf.predict(X)
            
            # Calculate error and alpha (classifier weight)
            error = np.sum(weights * (y_pred != y)) / np.sum(weights)
            alpha = 0.5 * np.log((1 - error) / (error + 1e-10))  # Add small value to avoid division by zero
            
            # Update weights
            for i in range(n_samples):
                if y[i] != y_pred[i]:  # Misclassified sample
                    weights[i] *= np.exp(alpha)
                else:  # Correctly classified sample
                    weights[i] *= np.exp(-alpha)
            
            # Normalize weights
            weights /= np.sum(weights)
            
            # Store classifier and its alpha
            self.clfs.append(clf)
            self.alphas.append(alpha)
    
    def predict(self, X):
        # Initialize array to store final predictions for each class
        final_pred = np.zeros((X.shape[0], len(self.le.classes_)))
        
        # Aggregate predictions from all classifiers
        for alpha, clf in zip(self.alphas, self.clfs):
            class_pred = clf.predict(X)
            for i in range(len(self.le.classes_)):
                final_pred[:, i] += alpha * (class_pred == i)
        
        # Return the class with the highest aggregated score
        return self.le.inverse_transform(np.argmax(final_pred, axis=1))

In [30]:
from sklearn.datasets import load_iris
import pandas as pd

iris = load_iris()
df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
df['target'] = iris.target 

In [31]:
# Train the updated AdaBoost model
model = AdaBoost(n_estimators=50)  # Increased n_estimators for better performance
model.fit(df.iloc[:, :-1], df.iloc[:, -1])

y_pred = model.predict(df.iloc[:, :-1])

In [32]:
accuracy = accuracy_score(df['target'], y_pred)
accuracy

0.9733333333333334