# **GRADIENT BOOSTING FRAMEWORK**

In [None]:
import warnings
warnings.filterwarnings('ignore')

# Load datset

In [None]:
import pandas as pd
from sklearn.datasets import load_breast_cancer,fetch_california_housing

# reg - dataset
california_housing = fetch_california_housing(as_frame=True)
housing_df = california_housing.frame
print("REGRESSION")
display(housing_df.head())

# cls - dataset
breast_cancer = load_breast_cancer(as_frame=True)
Bcancer_df = breast_cancer.frame
print("\nCLASSIFICATION")
display(Bcancer_df.head())

In [None]:
california_housing

# shape

In [None]:
print("reg",housing_df.shape)
print("cls",Bcancer_df.shape)

# info

In [None]:
housing_df.info()

In [None]:
Bcancer_df.info()

In [None]:
housing_df["MedHouseVal"].median()

# remove noise columns

In [None]:
housing_df.drop(columns=["Population", "Latitude", "Longitude"],inplace=True)

In [None]:
Bcancer_df.drop(columns=[col for col in Bcancer_df.columns if "error" in col],inplace=True)

In [None]:
Bcancer_df.columns

In [None]:
print("reg",housing_df.shape)
print("cls",Bcancer_df.shape)

# train test split

In [None]:
from sklearn.model_selection import train_test_split

# For housing_df
X_housing = housing_df.drop(columns=['MedHouseVal'])
y_housing = housing_df['MedHouseVal']
X_train_housing, X_test_housing, y_train_housing, y_test_housing = train_test_split(X_housing, y_housing, test_size=0.2, random_state=42)


# For Bcancer_df
X_bcancer = Bcancer_df.drop(columns=['target'])
y_bcancer = Bcancer_df['target']
X_train_bcancer, X_test_bcancer, y_train_bcancer, y_test_bcancer = train_test_split(X_bcancer, y_bcancer, test_size=0.2, random_state=42)

# XGBoost

REGRESSION

In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error, r2_score

# Model
xgb_reg = xgb.XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_reg.fit(X_train_housing, y_train_housing)

# Predict & Evaluate
y_pred = xgb_reg.predict(X_test_housing)
print("XGBoost Regression MSE:", mean_squared_error(y_test_housing, y_pred))
print("R2:", r2_score(y_test_housing, y_pred))

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from xgboost import plot_tree

plt.figure(figsize=(20,10))
plot_tree(xgb_reg, num_trees=0)
plt.title("XGBoost Regression Tree (Housing)")
plt.show()

CLASSIFICATION

In [None]:
from sklearn.metrics import accuracy_score

# Model
xgb_clf = xgb.XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_clf.fit(X_train_bcancer, y_train_bcancer)

# Predict & Evaluate
y_pred_c = xgb_clf.predict(X_test_bcancer)
print("XGBoost Classification Accuracy:", accuracy_score(y_test_bcancer, y_pred_c))


In [None]:
plt.figure(figsize=(15,8))
plot_tree(xgb_clf, num_trees=0)
plt.title("XGBoost Classification Tree (Breast Cancer)")
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test_bcancer, y_pred_c)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("XGBoost Classification Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# LightGBM

REGRESSION

In [None]:
import lightgbm as lgb
from sklearn.metrics import mean_squared_error, r2_score

lgb_reg = lgb.LGBMRegressor(
    n_estimators=200,
    learning_rate=0.05,
    max_depth=-1,
    num_leaves=31,
    min_child_samples=5,
    random_state=42,
     verbose=-1)
lgb_reg.fit(X_train_housing, y_train_housing)
y_pred_L = lgb_reg.predict(X_test_housing)
print("LightGBM Regression MSE:", mean_squared_error(y_test_housing, y_pred_L))
print("R2:", r2_score(y_test_housing, y_pred_L))


In [None]:
import matplotlib.pyplot as plt
import lightgbm as lgb
from lightgbm import plot_tree

ax = plot_tree(lgb_reg, tree_index=0, figsize=(15,8))
plt.title("LightGBM Regression Tree (Housing)")
plt.show()

CLASSIFICATION

In [None]:

lgb_clf = lgb.LGBMClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
lgb_clf.fit(X_train_bcancer, y_train_bcancer)

y_pred_LC = lgb_clf.predict(X_test_bcancer)
print("LightGBM Classification Accuracy:", accuracy_score(y_test_bcancer, y_pred_LC))


In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test_bcancer, y_pred_LC)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("XGBoost Classification Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
ax = plot_tree(lgb_clf, tree_index=0, figsize=(15,8))
plt.title("LightGBM Classification Tree (Breast Cancer)")
plt.show()

# CatBoost

In [None]:
pip install catboost

REGRESSION

In [None]:
from catboost import CatBoostRegressor

cat_reg = CatBoostRegressor(iterations=100, learning_rate=0.1, depth=3, verbose=0, random_state=42)
cat_reg.fit(X_train_housing, y_train_housing)

y_pred_cat = cat_reg.predict(X_test_housing)
print("CatBoost Regression MSE:", mean_squared_error(y_test_housing, y_pred_cat))
print("R2:", r2_score(y_test_housing, y_pred_cat))


In [None]:
cat_reg.plot_tree(tree_idx=0)

CLASSIFICATION

In [None]:
from catboost import CatBoostClassifier


cat_clf = CatBoostClassifier(iterations=100, learning_rate=0.1, depth=3, verbose=0, random_state=42)
cat_clf.fit(X_train_bcancer, y_train_bcancer)

y_pred_catc = cat_clf.predict(X_test_bcancer)
print("CatBoost Classification Accuracy:", accuracy_score(y_test_bcancer, y_pred_catc))


In [None]:
from sklearn.metrics import confusion_matrix

cm = confusion_matrix(y_test_bcancer, y_pred_catc)
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("XGBoost Classification Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

In [None]:
cat_clf.plot_tree(tree_idx=0)