In [2]:
import os
import time
import numpy as np
import pandas as pd

from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification

n_samples = 50

weights = [np.ones(n_samples) * (1 / n_samples)]
weights_bounded = [np.ones(n_samples) * (1 / n_samples)]

class MadaBoostClassifier(AdaBoostClassifier):
  def _boost(self, iboost, X, y, sample_weight, random_state):
    n_samples = X.shape[0]
    d0 = 1 / n_samples
    new_weights = np.zeros(n_samples)
    sample_weight, estimator_weight, estimator_error = super()._boost(iboost, X, y, sample_weight, random_state)

    if sample_weight is None:
      return sample_weight, estimator_weight, estimator_error

    for idx, weight in enumerate(sample_weight):
      if weight < d0:
        new_weights[idx] = weight
      else:
        new_weights[idx] = d0
    
    weights.append(list(sample_weight))
    weights_bounded.append(list(new_weights))
    
    return new_weights, estimator_weight, estimator_error


X, y = make_classification(n_samples=n_samples)

dt = DecisionTreeClassifier(max_depth=2, random_state=1)
clf = MadaBoostClassifier(base_estimator=dt, n_estimators=100, random_state=0)
clf.fit(X, y)

results_dict = {}
for idx, the_weights in enumerate(weights):
    weights_display = []
    weights_bounded_display = []
    for idx_w, _ in enumerate(the_weights):
        weights_display.append('{:.4f}'.format(weights[idx][idx_w]))
        weights_bounded_display.append('{:.4f}'.format(weights_bounded[idx][idx_w]))
    results_dict['Iteration {}'.format(idx + 1)] = weights_display
    results_dict['Iteration {} Bounded'.format(idx + 1)] = weights_bounded_display

weights_datset = pd.DataFrame(results_dict)
weights_datset.to_csv('weights_datset.csv', index=False)