# Importing Libraries

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin, clone
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.preprocessing import LabelEncoder
import numpy as np

# SimpleMultiClassBoosting

In [None]:
class SimpleMultiClassBoosting(BaseEstimator, ClassifierMixin):
    def __init__(self, base_estimator=None, n_estimators=50):
        self.base_estimator = base_estimator if base_estimator is not None else DecisionTreeClassifier(max_depth=1)
        self.n_estimators = n_estimators
        self.learners = []
        self.learner_weights = []
        self.label_encoder = LabelEncoder()

    def fit(self, X, y):
        y_encoded = self.label_encoder.fit_transform(y)
        n_classes = len(self.label_encoder.classes_)

        sample_weights = np.full(len(y_encoded), fill_value=1 / len(y_encoded))

        for _ in range(self.n_estimators):
              learner = clone(self.base_estimator)
              learner.fit(X, y_encoded, sample_weight=sample_weights)
              learner_preds = learner.predict(X)
              incorrect = learner_preds != y_encoded
              learner_error = np.dot(sample_weights, incorrect) / np.sum(sample_weights)


              learner_weight = np.log((1 - learner_error) / max(learner_error, 1e-10)) + np.log(n_classes - 1)
              self.learners.append(learner)
              self.learner_weights.append(learner_weight)


              sample_weights[incorrect] *= np.exp(learner_weight)
              sample_weights /= np.sum(sample_weights)

    def predict(self, X):
        class_preds = np.array([learner.predict(X) for learner in self.learners]).T
        weighted_votes = np.zeros((X.shape[0], len(self.label_encoder.classes_)))

        for i, learner_weight in enumerate(self.learner_weights):
            np.add.at(weighted_votes, (np.arange(X.shape[0]), class_preds[:, i]), learner_weight)


        final_preds = np.argmax(weighted_votes, axis=1)
        return self.label_encoder.inverse_transform(final_preds)


# Evaluation

In [None]:
import pandas as pd
from sklearn.datasets import load_iris

iris = load_iris()

df = pd.DataFrame(data=iris.data, columns=iris.feature_names)


df['target'] = iris.target


print(df.count())

sepal length (cm)    150
sepal width (cm)     150
petal length (cm)    150
petal width (cm)     150
target               150
dtype: int64


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report


X = df.iloc[:, :-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


model = SimpleMultiClassBoosting(n_estimators=10)

model.fit(X_train, y_train)


# print(f'y_train shape : {y_train.shape}') : (105 , )

# print(f'X_test shape : {X_test.shape}') : (45 , 4)


y_pred = model.predict(X_test)


accuracy = accuracy_score(y_test, y_pred)
conf_mat = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

print("Accuracy:", accuracy)
print("Confusion Matrix:\n", conf_mat)
print("Classification Report:\n", class_report)

X_train shape : (105, 4)
y_train shape : (105,)
X_test shape : (45, 4)
Accuracy: 1.0
Confusion Matrix:
 [[19  0  0]
 [ 0 13  0]
 [ 0  0 13]]
Classification Report:
               precision    recall  f1-score   support

           0       1.00      1.00      1.00        19
           1       1.00      1.00      1.00        13
           2       1.00      1.00      1.00        13

    accuracy                           1.00        45
   macro avg       1.00      1.00      1.00        45
weighted avg       1.00      1.00      1.00        45

