In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.DataFrame()

In [None]:
df['X1'] = [1,2,3,4,5,6,6,7,9,9]
df['X2'] = [5,3,6,8,1,9,5,8,9,2]
df['label'] = [1,1,0,1,0,1,0,1,0,0]

In [None]:
df

Unnamed: 0,X1,X2,label
0,1,5,1
1,2,3,1
2,3,6,0
3,4,8,1
4,5,1,0
5,6,9,1
6,6,5,0
7,7,8,1
8,9,9,0
9,9,2,0


In [None]:
df['weights'] = 1/df.shape[0]

In [None]:
df

Unnamed: 0,X1,X2,label,weights
0,1,5,1,0.1
1,2,3,1,0.1
2,3,6,0,0.1
3,4,8,1,0.1
4,5,1,0,0.1
5,6,9,1,0.1
6,6,5,0,0.1
7,7,8,1,0.1
8,9,9,0,0.1
9,9,2,0,0.1


In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dt1 = DecisionTreeClassifier(max_depth=1)

In [None]:
X = df.iloc[:,0:2].values
y = df.iloc[:,2].values

In [None]:
# Step 2 - Train 1st model
dt1.fit(X,y)

In [None]:
df['y_pred'] = dt1.predict(X)

In [None]:
df

Unnamed: 0,X1,X2,label,weights,y_pred
0,1,5,1,0.1,1
1,2,3,1,0.1,1
2,3,6,0,0.1,0
3,4,8,1,0.1,0
4,5,1,0,0.1,0
5,6,9,1,0.1,0
6,6,5,0,0.1,0
7,7,8,1,0.1,0
8,9,9,0,0.1,0
9,9,2,0,0.1,0


In [None]:
def calculate_model_weight(error):

  return 0.5*np.log((1-error)/(error))

In [None]:
# Step 3 - calculate model weight
alpha1 = calculate_model_weight(0.3)
alpha1

0.42364893019360184

In [None]:
# Step 4 - Update weights
def update_row_weights(row,alpha=0.423):
  if row['label'] == row['y_pred']:
    return row['weights'] * np.exp(-alpha)
  else:
    return row['weights'] * np.exp(alpha)

In [None]:
df['updated_weights'] = df.apply(update_row_weights,axis=1)

In [None]:
df

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights
0,1,5,1,0.1,1,0.065508
1,2,3,1,0.1,1,0.065508
2,3,6,0,0.1,0,0.065508
3,4,8,1,0.1,0,0.152653
4,5,1,0,0.1,0,0.065508
5,6,9,1,0.1,0,0.152653
6,6,5,0,0.1,0,0.065508
7,7,8,1,0.1,0,0.152653
8,9,9,0,0.1,0,0.065508
9,9,2,0,0.1,0,0.065508


In [None]:
df['updated_weights'].sum()

0.9165153319682015

In [None]:
df['nomalized_weights'] = df['updated_weights']/df['updated_weights'].sum()

In [None]:
df

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,nomalized_weights
0,1,5,1,0.1,1,0.065508,0.071475
1,2,3,1,0.1,1,0.065508,0.071475
2,3,6,0,0.1,0,0.065508,0.071475
3,4,8,1,0.1,0,0.152653,0.166559
4,5,1,0,0.1,0,0.065508,0.071475
5,6,9,1,0.1,0,0.152653,0.166559
6,6,5,0,0.1,0,0.065508,0.071475
7,7,8,1,0.1,0,0.152653,0.166559
8,9,9,0,0.1,0,0.065508,0.071475
9,9,2,0,0.1,0,0.065508,0.071475


In [None]:
df['nomalized_weights'].sum()

1.0

In [None]:
df['upper_limit'] = np.cumsum(df['nomalized_weights'])

In [None]:
df['lower_limit'] = df['upper_limit'] - df['nomalized_weights']

In [None]:
df[['X1','X2','label','weights','y_pred','updated_weights','lower_limit','upper_limit']]

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,lower_limit,upper_limit
0,1,5,1,0.1,1,0.065508,0.0,0.071475
1,2,3,1,0.1,1,0.065508,0.071475,0.14295
2,3,6,0,0.1,0,0.065508,0.14295,0.214425
3,4,8,1,0.1,0,0.152653,0.214425,0.380983
4,5,1,0,0.1,0,0.065508,0.380983,0.452458
5,6,9,1,0.1,0,0.152653,0.452458,0.619017
6,6,5,0,0.1,0,0.065508,0.619017,0.690492
7,7,8,1,0.1,0,0.152653,0.690492,0.85705
8,9,9,0,0.1,0,0.065508,0.85705,0.928525
9,9,2,0,0.1,0,0.065508,0.928525,1.0


In [None]:
def create_new_dataset(df):

  indices = []

  for i in range(df.shape[0]):
    a = np.random.random()
    for index,row in df.iterrows():
      if row['upper_limit'] > a and a > row['lower_limit']:
        indices.append(index)
  return indices

In [None]:
index_values = create_new_dataset(df)

index_values

[7, 0, 3, 7, 6, 1, 4, 8, 2, 7]

In [None]:
second_df = df.iloc[index_values,[0,1,2,3]]

In [None]:
second_df

Unnamed: 0,X1,X2,label,weights
7,7,8,1,0.1
0,1,5,1,0.1
3,4,8,1,0.1
7,7,8,1,0.1
6,6,5,0,0.1
1,2,3,1,0.1
4,5,1,0,0.1
8,9,9,0,0.1
2,3,6,0,0.1
7,7,8,1,0.1


In [None]:
dt2 = DecisionTreeClassifier(max_depth=1)

In [None]:
X = second_df.iloc[:,0:2].values
y = second_df.iloc[:,2].values

In [None]:
dt2.fit(X,y)

In [None]:
second_df['y_pred'] = dt2.predict(X)

In [None]:
second_df

Unnamed: 0,X1,X2,label,weights,y_pred
7,7,8,1,0.1,1
0,1,5,1,0.1,0
3,4,8,1,0.1,1
7,7,8,1,0.1,1
6,6,5,0,0.1,0
1,2,3,1,0.1,0
4,5,1,0,0.1,0
8,9,9,0,0.1,1
2,3,6,0,0.1,0
7,7,8,1,0.1,1


In [None]:
alpha2 = calculate_model_weight(0.1)

In [None]:
alpha2

1.0986122886681098

In [None]:
# Step 4 - Update weights
def update_row_weights(row,alpha=1.09):
  if row['label'] == row['y_pred']:
    return row['weights'] * np.exp(-alpha)
  else:
    return row['weights'] * np.exp(alpha)

In [None]:
second_df['updated_weights'] = second_df.apply(update_row_weights,axis=1)

In [None]:
second_df

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights
7,7,8,1,0.1,1,0.033622
0,1,5,1,0.1,0,0.297427
3,4,8,1,0.1,1,0.033622
7,7,8,1,0.1,1,0.033622
6,6,5,0,0.1,0,0.033622
1,2,3,1,0.1,0,0.297427
4,5,1,0,0.1,0,0.033622
8,9,9,0,0.1,1,0.297427
2,3,6,0,0.1,0,0.033622
7,7,8,1,0.1,1,0.033622


In [None]:
second_df['nomalized_weights'] = second_df['updated_weights']/second_df['updated_weights'].sum()

In [None]:
second_df

Unnamed: 0,X1,X2,label,weights,y_pred,updated_weights,nomalized_weights
7,7,8,1,0.1,1,0.033622,0.029816
0,1,5,1,0.1,0,0.297427,0.263762
3,4,8,1,0.1,1,0.033622,0.029816
7,7,8,1,0.1,1,0.033622,0.029816
6,6,5,0,0.1,0,0.033622,0.029816
1,2,3,1,0.1,0,0.297427,0.263762
4,5,1,0,0.1,0,0.033622,0.029816
8,9,9,0,0.1,1,0.297427,0.263762
2,3,6,0,0.1,0,0.033622,0.029816
7,7,8,1,0.1,1,0.033622,0.029816


In [None]:
second_df['nomalized_weights'].sum()

1.0

In [None]:
second_df['upper_limit'] = np.cumsum(second_df['nomalized_weights'])

In [None]:
second_df['lower_limit'] = second_df['upper_limit'] - second_df['nomalized_weights']

In [None]:
second_df[['X1','X2','label','weights','y_pred','nomalized_weights','lower_limit','upper_limit']]

Unnamed: 0,X1,X2,label,weights,y_pred,nomalized_weights,lower_limit,upper_limit
7,7,8,1,0.1,1,0.029816,0.0,0.029816
0,1,5,1,0.1,0,0.263762,0.029816,0.293579
3,4,8,1,0.1,1,0.029816,0.293579,0.323395
7,7,8,1,0.1,1,0.029816,0.323395,0.353211
6,6,5,0,0.1,0,0.029816,0.353211,0.383027
1,2,3,1,0.1,0,0.263762,0.383027,0.646789
4,5,1,0,0.1,0,0.029816,0.646789,0.676605
8,9,9,0,0.1,1,0.263762,0.676605,0.940368
2,3,6,0,0.1,0,0.029816,0.940368,0.970184
7,7,8,1,0.1,1,0.029816,0.970184,1.0


In [None]:
index_values = create_new_dataset(second_df)

In [None]:
third_df = second_df.iloc[index_values,[0,1,2,3]]

In [None]:
third_df

Unnamed: 0,X1,X2,label,weights
2,3,6,0,0.1
7,7,8,1,0.1
7,7,8,1,0.1
0,1,5,1,0.1
2,3,6,0,0.1
4,5,1,0,0.1
2,3,6,0,0.1
7,7,8,1,0.1
7,7,8,1,0.1
7,7,8,1,0.1


In [None]:
dt3 = DecisionTreeClassifier(max_depth=1)

X = second_df.iloc[:,0:2].values
y = second_df.iloc[:,2].values

dt3.fit(X,y)

In [None]:
third_df

Unnamed: 0,X1,X2,label,weights
2,3,6,0,0.1
7,7,8,1,0.1
7,7,8,1,0.1
0,1,5,1,0.1
2,3,6,0,0.1
4,5,1,0,0.1
2,3,6,0,0.1
7,7,8,1,0.1
7,7,8,1,0.1
7,7,8,1,0.1


In [None]:
alpha3 = calculate_model_weight(0.7)
alpha3

-0.4236489301936017

In [None]:
print(alpha1,alpha2,alpha3)

0.42364893019360184 1.0986122886681098 -0.4236489301936017


### Prediction

In [None]:
query = np.array([1,5]).reshape(1,2)
dt1.predict(query)

array([1])

In [None]:
dt2.predict(query)

array([0])

In [None]:
dt3.predict(query)

array([0])

In [None]:
alpha1*1 + alpha2*(1) + alpha3*(1)

1.09861228866811

In [None]:
np.sign(1.09)

1.0

In [None]:
query = np.array([9,9]).reshape(1,2)
dt1.predict(query)

array([0])

In [None]:
dt2.predict(query)

array([1])

In [None]:
dt3.predict(query)

array([1])

In [None]:
alpha1*(1) + alpha2*(-1) + alpha3*(-1)

-0.2513144282809062

In [None]:
np.sign(-0.25)

-1.0