# AdaBoost Classifier

In [4]:
import pandas as pd
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

# Load the dataset (replace with your dataset)
# Example: Telco Customer Churn dataset
data = pd.read_csv("https://raw.githubusercontent.com/treselle-systems/customer_churn_analysis/refs/heads/master/WA_Fn-UseC_-Telco-Customer-Churn.csv")

# Preprocess the data (simplified example)
X = data.drop(columns=["Churn"])  # Features
y = data["Churn"]  # Target (Churn: Yes/No)

# Convert categorical variables to numerical (if needed)
X = pd.get_dummies(X, drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Create an AdaBoost model with Decision Stumps
ada = AdaBoostClassifier(random_state=42)

# Train the model
ada.fit(X_train, y_train)

# Make predictions
y_pred = ada.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy: 0.7979176526265973
Classification Report:
               precision    recall  f1-score   support

          No       0.82      0.92      0.87      1539
         Yes       0.68      0.48      0.56       574

    accuracy                           0.80      2113
   macro avg       0.75      0.70      0.72      2113
weighted avg       0.79      0.80      0.79      2113



In [2]:
data

Unnamed: 0,customerID,gender,SeniorCitizen,Partner,Dependents,tenure,PhoneService,MultipleLines,InternetService,OnlineSecurity,...,DeviceProtection,TechSupport,StreamingTV,StreamingMovies,Contract,PaperlessBilling,PaymentMethod,MonthlyCharges,TotalCharges,Churn
0,7590-VHVEG,Female,0,Yes,No,1,No,No phone service,DSL,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.85,29.85,No
1,5575-GNVDE,Male,0,No,No,34,Yes,No,DSL,Yes,...,Yes,No,No,No,One year,No,Mailed check,56.95,1889.5,No
2,3668-QPYBK,Male,0,No,No,2,Yes,No,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Mailed check,53.85,108.15,Yes
3,7795-CFOCW,Male,0,No,No,45,No,No phone service,DSL,Yes,...,Yes,Yes,No,No,One year,No,Bank transfer (automatic),42.30,1840.75,No
4,9237-HQITU,Female,0,No,No,2,Yes,No,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Electronic check,70.70,151.65,Yes
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7038,6840-RESVB,Male,0,Yes,Yes,24,Yes,Yes,DSL,Yes,...,Yes,Yes,Yes,Yes,One year,Yes,Mailed check,84.80,1990.5,No
7039,2234-XADUH,Female,0,Yes,Yes,72,Yes,Yes,Fiber optic,No,...,Yes,No,Yes,Yes,One year,Yes,Credit card (automatic),103.20,7362.9,No
7040,4801-JZAZL,Female,0,Yes,Yes,11,No,No phone service,DSL,Yes,...,No,No,No,No,Month-to-month,Yes,Electronic check,29.60,346.45,No
7041,8361-LTMKD,Male,1,Yes,No,4,Yes,Yes,Fiber optic,No,...,No,No,No,No,Month-to-month,Yes,Mailed check,74.40,306.6,Yes


# AdaBoost Regression

In [25]:
# Step 1: Import necessary libraries
import pandas as pd
from sklearn.ensemble import AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score

# Step 2: Load the dataset
from sklearn.datasets import fetch_california_housing
california = fetch_california_housing()

# Convert the dataset into a pandas DataFrame for better visualization
X = pd.DataFrame(california.data, columns=california.feature_names)  # Features
y = pd.Series(california.target)  # Target (house prices)

# Step 3: Split the data into training and testing sets
# 80% of the data is used for training, and 20% is used for testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Create an AdaBoostRegressor model
# base_estimator = The weak model (Decision Tree with max_depth=3)
# n_estimators = Number of weak models to train
ada = AdaBoostRegressor(n_estimators=100, learning_rate=0.1,random_state=42)

# Step 5: Train the model
ada.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = ada.predict(X_test)

# Step 7: Evaluate the model
# Mean Squared Error (MSE): Lower is better
mse = mean_squared_error(y_test, y_pred)
# R² Score: Closer to 1 is better
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("R² Score:", r2)


Mean Squared Error: 0.568195768061393
R² Score: 0.5663981417281327


In [26]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [50, 100, 200],
    'learning_rate': [0.01, 0.1, 0.5]
}

grid_search = GridSearchCV(AdaBoostRegressor(), param_grid, cv=5, scoring='r2', n_jobs=-1)
grid_search.fit(X_train, y_train)

print(f"\nBest Parameters: {grid_search.best_params_}")
best_ada_reg = grid_search.best_estimator_
r2_best = best_ada_reg.score(X_test, y_test)
print(f"Test R² with Best Model: {r2_best:.4f}")


Best Parameters: {'learning_rate': 0.1, 'n_estimators': 50}
Test R² with Best Model: 0.5684


In [55]:
# Step 1: Import necessary libraries
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.datasets import fetch_california_housing

# Step 2: Load the dataset
california = fetch_california_housing()

# Convert the dataset into a pandas DataFrame for better visualization
X = pd.DataFrame(california.data, columns=california.feature_names)  # Features
y = pd.Series(california.target)  # Target (house prices)

# Step 3: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Create an XGBoost Regressor model
ada = xgb.XGBRegressor(n_estimators=500, learning_rate=0.05, max_depth=8)

# Step 5: Train the model
ada.fit(X_train, y_train)

# Step 6: Make predictions
y_pred = ada.predict(X_test)

# Step 7: Evaluate the model
# Mean Squared Error (MSE): Lower is better
mse = mean_squared_error(y_test, y_pred)

# R² Score: Closer to 1 is better
r2 = r2_score(y_test, y_pred)

# Print the results
print("Mean Squared Error:", mse)
print("R² Score:", r2)


Mean Squared Error: 0.2031830983179499
R² Score: 0.8449468053929972
