## Run below code cell to load dataset to colab

In [1]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, r2_score, classification_report
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingRegressor
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier

data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


## Task 1.1
### Train an AdaBoostClassifier with n_estimators=1 (just one tree).

## Task 1.2
### Train an AdaBoostClassifier with n_estimators=50 (multiple trees).

## Task 1.3
### Compare both of them , which one is better.

In [5]:
base_learner = DecisionTreeClassifier(max_depth=1, random_state=42)

model = AdaBoostClassifier(
    n_estimators=1, # number of tree
    estimator=base_learner,
    learning_rate=0.3,
    random_state=42
)

model.fit(X_train, y_train)

In [6]:
y_pred_ada = model.predict(X_test)
print('AdaBoost Accuracy: ', accuracy_score(y_test, y_pred_ada))

AdaBoost Accuracy:  0.8947368421052632


In [7]:
model_2 = AdaBoostClassifier(
    n_estimators=50, # number of tree
    estimator=base_learner,
    learning_rate=0.3,
    random_state=42
)

model_2.fit(X_train, y_train)
y_pred_ada_2 = model_2.predict(X_test)
print('AdaBoost Accuracy: ', accuracy_score(y_test, y_pred_ada_2))

AdaBoost Accuracy:  0.9649122807017544


In [8]:

print("\nClassification Report (n_estimators=1)")
print(classification_report(y_test, y_pred_ada))

print("\nClassification Report (n_estimators=50)")
print(classification_report(y_test, y_pred_ada_2))



Classification Report (n_estimators=1)
              precision    recall  f1-score   support

           0       0.83      0.91      0.87        43
           1       0.94      0.89      0.91        71

    accuracy                           0.89       114
   macro avg       0.89      0.90      0.89       114
weighted avg       0.90      0.89      0.90       114


Classification Report (n_estimators=50)
              precision    recall  f1-score   support

           0       0.98      0.93      0.95        43
           1       0.96      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.97      0.96      0.96       114
weighted avg       0.97      0.96      0.96       114



## Task 2.1
### Train a model with Gradient Boost with both low and high learning rate.

## Task 2.2
### Compare  "Fast Learner" (high learning rate) vs. a "Slow Learner" (low learning rate).

In [9]:
learning_rates = [0.01, 0.05, 0.1, 0.2, 0.8]
results = []

for lr in learning_rates:
  model = GradientBoostingRegressor(
      n_estimators = 100,
      learning_rate=lr,
      random_state=42,
      max_depth=3
  )
  model.fit(X_train,y_train)
  preds = model.predict(X_test)
  results.append((lr, r2_score(y_test, preds)))

pd.DataFrame(results, columns=["Learning Rate", "R2 Score"])

Unnamed: 0,Learning Rate,R2 Score
0,0.01,0.740175
1,0.05,0.862813
2,0.1,0.866877
3,0.2,0.849819
4,0.8,0.787427


## Task 3.2
### Train a model with XGBoost with both shallow and deep tree

## Task 3.2
### Compare a "Shallow" tree (depth=2) vs. a "Deep" tree (depth=15).

In [10]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

# SHALLOW tree (depth = 2)
xgb_shallow = XGBClassifier(
    max_depth=2,
    n_estimators=100,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=42
)

xgb_shallow.fit(X_train, y_train)
y_pred_shallow = xgb_shallow.predict(X_test)
acc_shallow = accuracy_score(y_test, y_pred_shallow)


# DEEP tree (depth = 15)
xgb_deep = XGBClassifier(
    max_depth=15,
    n_estimators=100,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=42
)

xgb_deep.fit(X_train, y_train)
y_pred_deep = xgb_deep.predict(X_test)
acc_deep = accuracy_score(y_test, y_pred_deep)

print("Accuracy (Shallow Tree):", acc_shallow)
print("Accuracy (Deep Tree):", acc_deep)


Accuracy (Shallow Tree): 0.956140350877193
Accuracy (Deep Tree): 0.956140350877193


In [11]:
print("\nClassification Report {SHALLOW tree (depth = 2)}")
print(classification_report(y_test, y_pred_shallow))

print("\nClassification Report {DEEP tree (depth = 15)}")
print(classification_report(y_test, y_pred_deep))


Classification Report {SHALLOW tree (depth = 2)}
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114


Classification Report {DEEP tree (depth = 15)}
              precision    recall  f1-score   support

           0       0.95      0.93      0.94        43
           1       0.96      0.97      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114

