<a href="https://colab.research.google.com/github/Subhranshu-123/BIKE2/blob/main/Copy_of_ENSEMBLE_METHOD.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# 1. Generate a synthetic dataset
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, n_redundant=5, random_state=42)

# 2. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# 3. Define the base estimator (e.g., Decision Tree)
base_estimator = DecisionTreeClassifier(random_state=42)

# 4. Create a BaggingClassifier instance
# n_estimators: Number of base estimators in the ensemble
# estimator: The base estimator to fit on random subsets of the dataset
bagging_model = BaggingClassifier(estimator=base_estimator, n_estimators=10, random_state=42)

# 5. Train the Bagging Classifier
bagging_model.fit(X_train, y_train)

# 6. Make predictions on the test set
y_pred = bagging_model.predict(X_test)

# 7. Evaluate the model performance
accuracy = accuracy_score(y_test, y_pred)
print(f"Bagging Classifier Accuracy: {accuracy:.4f}")

Bagging Classifier Accuracy: 0.8700


In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

# Generate sample data
X, y = make_classification(n_samples=1000, n_features=10, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize AdaBoostClassifier with a base estimator (e.g., DecisionTreeClassifier)
# You can adjust n_estimators (number of boosting stages) and learning_rate
adaboost_model = AdaBoostClassifier(
    estimator=DecisionTreeClassifier(max_depth=1),  # Weak learner
    n_estimators=100,
    learning_rate=1.0,
    random_state=42
)

# Fit the model
adaboost_model.fit(X_train, y_train)

# Make predictions and evaluate
y_pred = adaboost_model.predict(X_test)
score = adaboost_model.score(X_test, y_test)
print(f"AdaBoost Accuracy: {score:.3f}")

AdaBoost Accuracy: 0.850


In [None]:
import numpy as np
from sklearn.ensemble import GradientBoostingClassifier, GradientBoostingRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.datasets import make_classification, make_regression

# To ensure reproducible results
RANDOM_SEED = 42


In [None]:
# 2a. Generate a synthetic classification dataset
X_class, y_class = make_classification(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    n_redundant=5,
    random_state=RANDOM_SEED
)

# 2b. Split data into training and testing sets
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(
    X_class,
    y_class,
    test_size=0.3,
    random_state=RANDOM_SEED
)

# 2c. Initialize and train the GradientBoostingClassifier
gb_classifier = GradientBoostingClassifier(
    n_estimators=100,      # Number of boosting stages
    learning_rate=0.1,     # Step size shrinkage
    max_depth=3,           # Maximum depth of the individual regression estimators
    random_state=RANDOM_SEED
)
gb_classifier.fit(X_train_class, y_train_class)

# 2d. Make predictions on the test set
y_pred_class = gb_classifier.predict(X_test_class)

# 2e. Evaluate the model
accuracy = accuracy_score(y_test_class, y_pred_class)
print(f"Gradient Boosting Classifier Accuracy: {accuracy:.4f}")


Gradient Boosting Classifier Accuracy: 0.8867


In [None]:
# 3a. Generate a synthetic regression dataset
X_reg, y_reg = make_regression(
    n_samples=1000,
    n_features=20,
    n_informative=15,
    noise=0.1,
    random_state=RANDOM_SEED
)

# 3b. Split data into training and testing sets
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(
    X_reg,
    y_reg,
    test_size=0.3,
    random_state=RANDOM_SEED
)

# 3c. Initialize and train the GradientBoostingRegressor
gb_regressor = GradientBoostingRegressor(
    n_estimators=100,
    learning_rate=0.1,
    max_depth=3,
    random_state=RANDOM_SEED,
    loss='squared_error'   # Loss function to optimize
)
gb_regressor.fit(X_train_reg, y_train_reg)

# 3d. Make predictions on the test set
y_pred_reg = gb_regressor.predict(X_test_reg)

# 3e. Evaluate the model
mse = mean_squared_error(y_test_reg, y_pred_reg)
print(f"Gradient Boosting Regressor Mean Squared Error: {mse:.4f}")


Gradient Boosting Regressor Mean Squared Error: 8988.5796


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Assuming '/content/students.csv' contains your dataset with 'height' as the dependent variable
data = pd.read_csv('/content/students.csv')
X = data.drop('height', axis=1)  # Features
y = data['height']  # Target variable

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Initialize the Random Forest Classifier
# n_estimators: Number of trees in the forest
# random_state: For reproducibility
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
rf_classifier.fit(X_train, y_train)

In [None]:
from sklearn.ensemble import RandomForestRegressor

# Initialize the Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)

# Train the model
rf_regressor.fit(X_train, y_train)

In [None]:
from sklearn.metrics import accuracy_score, mean_squared_error

# For Classification
y_pred_classifier = rf_classifier.predict(X_test)
accuracy = accuracy_score(y_test, y_pred_classifier)
print(f"Classifier Accuracy: {accuracy}")

# For Regression
y_pred_regressor = rf_regressor.predict(X_test)
mse = mean_squared_error(y_test, y_pred_regressor)
print(f"Regressor Mean Squared Error: {mse}")

Classifier Accuracy: 0.88
Regressor Mean Squared Error: 0.0846195


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier
from sklearn.metrics import accuracy_score

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

# 1. Bagging with SVM
svm_clf = SVC(probability=True, kernel='rbf', random_state=42)
bagging_svm = BaggingClassifier(estimator=svm_clf, n_estimators=30, random_state=42)
bagging_svm.fit(X_train, y_train)
y_pred_bagging = bagging_svm.predict(X_test)
acc_bagging_svm = accuracy_score(y_test, y_pred_bagging)

# 2. Boosting with Logistic Regression
log_reg = LogisticRegression(max_iter=200, random_state=42)
boosting_lr = AdaBoostClassifier(estimator=log_reg, n_estimators=50, random_state=42)
boosting_lr.fit(X_train, y_train)
y_pred_boosting = boosting_lr.predict(X_test)
acc_boosting_lr = accuracy_score(y_test, y_pred_boosting)

# 3. Bagging with Decision Tree
dt_clf = DecisionTreeClassifier(random_state=42)
bagging_dt = BaggingClassifier(estimator=dt_clf, n_estimators=30, random_state=42)
bagging_dt.fit(X_train, y_train)
y_pred_bagging_dt = bagging_dt.predict(X_test)
acc_bagging_dt = accuracy_score(y_test, y_pred_bagging_dt)

# 4. Boosting with Decision Tree
boosting_dt = AdaBoostClassifier(estimator=dt_clf, n_estimators=50, random_state=42)
boosting_dt.fit(X_train, y_train)
y_pred_boosting_dt = boosting_dt.predict(X_test)
acc_boosting_dt = accuracy_score(y_test, y_pred_boosting_dt)

# Print results
print(f"Bagging with SVM Accuracy: {acc_bagging_svm:.4f}")
print(f"Boosting with Logistic Regression Accuracy: {acc_boosting_lr:.4f}")
print(f"Bagging with Decision Tree Accuracy: {acc_bagging_dt:.4f}")
print(f"Boosting with Decision Tree Accuracy: {acc_boosting_dt:.4f}")

Bagging with SVM Accuracy: 0.9556
Boosting with Logistic Regression Accuracy: 0.9333
Bagging with Decision Tree Accuracy: 0.9333
Boosting with Decision Tree Accuracy: 0.9111
