## GBM

In [None]:
from sklearn.datasets import make_classification
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# 1. Generate synthetic data
X, y = make_classification(
    n_samples=1000,      # 1000 rows
    n_features=20,       # 20 features
    n_informative=15,    # 15 actually useful
    n_redundant=5,       # 5 redundant
    random_state=42
)

# 2. Split into train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Define the Gradient Boosting model
model = GradientBoostingClassifier(
    n_estimators=100,        # number of trees (iterations)
    learning_rate=0.1,       # shrinkage of each tree's contribution
    max_depth=3,             # depth of each tree
    random_state=42
)

# 4. Train the model
model.fit(X_train, y_train)

# 5. Predict on test data
y_pred = model.predict(X_test)

# 6. Evaluate
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

Accuracy: 0.91
              precision    recall  f1-score   support

           0       0.91      0.92      0.92       106
           1       0.91      0.89      0.90        94

    accuracy                           0.91       200
   macro avg       0.91      0.91      0.91       200
weighted avg       0.91      0.91      0.91       200



In [18]:
from sklearn.datasets import fetch_california_housing
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# 1. Load the California housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# 2. Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Initialize Gradient Boosting Regressor
model = GradientBoostingRegressor(
    n_estimators=100,      # number of trees
    learning_rate=0.1,     # shrinkage
    max_depth=3,           # depth of each tree
    random_state=42
)

# 4. Train the model
model.fit(X_train, y_train)

# 5. Predict on test set
y_pred = model.predict(X_test)

# 6. Evaluate
print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.4f}")
print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.4f}")
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")


Mean Squared Error (MSE): 0.2940
Mean Absolute Error (MAE): 0.3716
R² Score: 0.7756


## AdaBoost

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier

# 1. Generate synthetic data
X, y = make_classification(
    n_samples=1000,      # 1000 rows
    n_features=20,       # 20 features
    n_informative=15,    # 15 actually useful
    n_redundant=5,       # 5 redundant
    random_state=42
)

# 2. Train-test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Define AdaBoost with a simple decision stump as the base estimator
base_learner = DecisionTreeClassifier(max_depth=1)

model = AdaBoostClassifier(
    estimator=base_learner,
    n_estimators=50,         # number of weak learners
    learning_rate=1.0,       # controls contribution of each learner
    random_state=42
)

# 4. Train the model
model.fit(X_train, y_train)

# 5. Predict and evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.82
              precision    recall  f1-score   support

           0       0.84      0.82      0.83       106
           1       0.80      0.82      0.81        94

    accuracy                           0.82       200
   macro avg       0.82      0.82      0.82       200
weighted avg       0.82      0.82      0.82       200



In [None]:
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor

# 1. Load California Housing dataset
data = fetch_california_housing()
X, y = data.data, data.target

# 2. Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Define base learner
base_learner = DecisionTreeRegressor(max_depth=3, random_state=42)

# 4. Create AdaBoost regressor model
model = AdaBoostRegressor(
    estimator=base_learner,
    n_estimators=100,
    learning_rate=0.1,
    random_state=42
)

# 5. Train the model
model.fit(X_train, y_train)

# 6. Predict on test data
y_pred = model.predict(X_test)

# 7. Evaluate
print(f"Mean Squared Error (MSE): {mean_squared_error(y_test, y_pred):.4f}")
print(f"Mean Absolute Error (MAE): {mean_absolute_error(y_test, y_pred):.4f}")
print(f"R² Score: {r2_score(y_test, y_pred):.4f}")


Mean Squared Error (MSE): 0.5682
Mean Absolute Error (MAE): 0.6068
R² Score: 0.5664


## XGBoost

In [None]:
import xgboost as xgb

# 1. Generate synthetic data
X, y = make_classification(
    n_samples=1000,      # 1000 rows
    n_features=20,       # 20 features
    n_informative=15,    # 15 actually useful
    n_redundant=5,       # 5 redundant
    random_state=42
)

# 2. Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Create XGBoost classifier
model = xgb.XGBClassifier(
    n_estimators=100,
    max_depth=5,
    learning_rate=0.1,
    eval_metric='logloss',
    random_state=42
)

# 4. Train the model
model.fit(X_train, y_train)

# 5. Predict on test data
y_pred = model.predict(X_test)

# 6. Evaluate performance
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9200
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92       106
           1       0.91      0.91      0.91        94

    accuracy                           0.92       200
   macro avg       0.92      0.92      0.92       200
weighted avg       0.92      0.92      0.92       200



In [22]:
# 1. Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# 2. Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Create XGBoost regressor
model = xgb.XGBRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    random_state=42,
    objective='reg:squarederror'
)

# 4. Train
model.fit(X_train, y_train)

# 5. Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse:.4f}")
print(f"R^2 Score: {r2:.4f}")


Mean Squared Error: 0.2273
R^2 Score: 0.8266


## LightGBM

In [23]:
import lightgbm as lgb

# 1. Generate synthetic binary classification data
X, y = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    n_classes=2,
    random_state=42
)

# 2. Split into train/test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 3. Create LightGBM dataset
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# 4. Set parameters
params = {
    'objective': 'binary',
    'metric': 'binary_logloss',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'seed': 42
}

# 5. Train the model
model = lgb.train(
    params,
    train_data,
    num_boost_round=100,
)

# 6. Predict class labels (threshold = 0.5)
y_pred_proba = model.predict(X_test, num_iteration=model.best_iteration)
y_pred = (y_pred_proba >= 0.5).astype(int)

# 7. Evaluate performance
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.9100
Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.92      0.92       106
           1       0.90      0.90      0.90        94

    accuracy                           0.91       200
   macro avg       0.91      0.91      0.91       200
weighted avg       0.91      0.91      0.91       200



In [None]:
# Load dataset
data = fetch_california_housing()
X, y = data.data, data.target

# Split train/test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Create LightGBM dataset
train_data = lgb.Dataset(X_train, label=y_train)
test_data = lgb.Dataset(X_test, label=y_test, reference=train_data)

# Parameters
params = {
    'objective': 'regression',
    'metric': 'rmse',
    'learning_rate': 0.1,
    'num_leaves': 31,
    'seed': 42
}

# Train model
model = lgb.train(
    params,
    train_data,
    num_boost_round=100
)

# Predict
y_pred = model.predict(X_test, num_iteration=model.best_iteration)

# Evaluate
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {mse**0.5:.4f}")
print(f"R^2 Score: {r2:.4f}")


RMSE: 0.4635
R^2 Score: 0.8360


## CatBoost

In [None]:
from catboost import CatBoostClassifier

# Generate synthetic data
X, y = make_classification(
    n_samples=1000,      # 1000 rows
    n_features=20,       # 20 features
    n_informative=15,    # 15 actually useful
    n_redundant=5,       # 5 redundant
    random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize CatBoost classifier
model = CatBoostClassifier(
    iterations=100,
    learning_rate=0.1,
    depth=6,
    verbose=False,
    random_seed=42
)

# Fit model
model.fit(X_train, y_train)

# Predict
y_pred = model.predict(X_test)

# Evaluate performance
acc = accuracy_score(y_test, y_pred)
print(f"Accuracy: {acc:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.8900
Classification Report:
              precision    recall  f1-score   support

           0       0.84      0.94      0.89        93
           1       0.94      0.85      0.89       107

    accuracy                           0.89       200
   macro avg       0.89      0.89      0.89       200
weighted avg       0.89      0.89      0.89       200



In [26]:
from catboost import CatBoostRegressor

# Load California Housing data
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize CatBoost regressor
model = CatBoostRegressor(iterations=200, learning_rate=0.1, depth=6, verbose=100, random_seed=42)

# Fit model
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f"RMSE: {mse**0.5:.4f}")
print(f"R^2 Score: {r2:.4f}")


0:	learn: 1.0934740	total: 2.23ms	remaining: 444ms
100:	learn: 0.4867395	total: 160ms	remaining: 157ms
199:	learn: 0.4323190	total: 315ms	remaining: 0us
RMSE: 0.4799
R^2 Score: 0.8243
