In [None]:
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Load data
train_file = '../Datasets/adaboost-train-24.txt'
test_file = '../Datasets/adaboost-test-24.txt'
column_names = ['X1', 'X2', 'Y']
df_train = pd.read_csv(train_file, sep=r'\s+', header=None, names=column_names)
df_test = pd.read_csv(test_file, sep=r'\s+', header=None, names=column_names)

X_train = df_train[['X1', 'X2']].to_numpy()
Y_train = df_train['Y'].to_numpy()
X_test = df_test[['X1', 'X2']].to_numpy()
Y_test = df_test['Y'].to_numpy()

num_samples = X_train.shape[0]
sample_weights = np.ones(num_samples) / num_samples
clf = DecisionTreeClassifier(max_depth=1)
clf.fit(X_train, Y_train, sample_weight=sample_weights)

y_pred = clf.predict(X_test)
# print("Predictions:", y_pred)

# Number of classifiers we want
num_classifiers = 50

######################################################
# above the # we can reuse.


# A: Weighted weak linear classifier (wwlc) - note we need to do our implementation of this
wwlc = DecisionTreeClassifier(max_depth=1)

# B: AdaBoost algorithm - note we need to do our implementation of this, including fit and predict functions.
ada_boost = AdaBoostClassifier(estimator=wwlc, 
                               n_estimators=num_classifiers, 
                               algorithm='SAMME', 
                               learning_rate=1.0, 
                               random_state=42)

# Train
ada_boost.fit(X_train, Y_train)

# Prediction
y_pred = ada_boost.predict(X_test)

# Accuracy score
accuracy = accuracy_score(Y_test, y_pred)
print(f"Sktlearn AdaBoost Classifier Accuracy: {accuracy:.4f}")

# For A:
# step 1 - use above code and observe how the sktlearn DecisionTreeClassifier & AdaBoostClassifier behave
# step 2 - replace DecisionTreeClassifier with our wwlc implementation, and test using the sktlearn AdaBoostClassifier
# step 3 - replace AdaBoostClassifier with our implementation of the adaboost algorithm and test.
#          if needed this step can be 3a. test our algorithm with sktlean DecisionTreeClassifier, 3b test our algorithm with our wwlc
# note the accuracy score output from running above code is 0.9683. That should be our target.




Sktlearn AdaBoost Classifier Accuracy: 0.9683
