<a href="https://colab.research.google.com/github/Freesoul-tech/Louis-Mahobe/blob/main/Ensemble31.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor

# Load dataset
df = pd.read_csv("/content/sample_data/california_housing_train.csv")
# Select features and target variable using available columns
X = df[["total_rooms", "median_income", "housing_median_age"]] # Example features, you can change these
y = df["median_house_value"]

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

In [13]:
# Train models
model1 = LinearRegression().fit(X_train_scaled, y_train)
model2 = SVR().fit(X_train_scaled, y_train)
model3 = DecisionTreeRegressor().fit(X_train_scaled, y_train)

# Predict
pred1 = model1.predict(X_val_scaled)
pred2 = model2.predict(X_val_scaled)
pred3 = model3.predict(X_val_scaled)

# Blend predictions
blended_pred = (0.4 * pred1 + 0.3 * pred2 + 0.3 * pred3)


In [14]:
from sklearn.ensemble import BaggingRegressor

bagging_model = BaggingRegressor(estimator=DecisionTreeRegressor(), n_estimators=10, random_state=42)
bagging_model.fit(X_train_scaled, y_train)
bagging_pred = bagging_model.predict(X_val_scaled)

In [15]:
from sklearn.ensemble import StackingRegressor

base_models = [
    ('lr', LinearRegression()),
    ('svr', SVR()),
    ('dt', DecisionTreeRegressor())
]

meta_model = LinearRegression()

stacking_model = StackingRegressor(estimators=base_models, final_estimator=meta_model)
stacking_model.fit(X_train_scaled, y_train)
stacking_pred = stacking_model.predict(X_val_scaled)


In [16]:
from sklearn.metrics import mean_squared_error

print("Blending MSE:", mean_squared_error(y_val, blended_pred))
print("Bagging MSE:", mean_squared_error(y_val, bagging_pred))
print("Stacking MSE:", mean_squared_error(y_val, stacking_pred))


Blending MSE: 7407915627.448985
Bagging MSE: 7025376402.86053
Stacking MSE: 6199720484.784053


In [7]:
import pandas as pd
df = pd.read_csv("/content/sample_data/california_housing_train.csv")


In [17]:
feature_sets = [
    ["total_rooms", "median_income", "housing_median_age"],
    ["total_rooms", "total_bedrooms", "median_income"],
    ["median_income", "population", "households"]
]

for i, features in enumerate(feature_sets):
    X = df[features]
    y = df["median_house_value"]

    # Train-test split and scaling
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42+i)
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)

    # Train models
    model1 = LinearRegression().fit(X_train_scaled, y_train)
    model2 = SVR().fit(X_train_scaled, y_train)
    model3 = DecisionTreeRegressor().fit(X_train_scaled, y_train)

    # Predict
    pred1 = model1.predict(X_val_scaled)
    pred2 = model2.predict(X_val_scaled)
    pred3 = model3.predict(X_val_scaled)
    blended_pred = 0.4 * pred1 + 0.3 * pred2 + 0.3 * pred3

    # MSEs
    mse_blend = mean_squared_error(y_val, blended_pred)
    mse_lr = mean_squared_error(y_val, pred1)
    mse_svr = mean_squared_error(y_val, pred2)
    mse_dt = mean_squared_error(y_val, pred3)

    print(f"\nExample {i+1}")
    print("Blending MSE:", mse_blend)
    print("Linear Regression MSE:", mse_lr)
    print("SVR MSE:", mse_svr)
    print("Decision Tree MSE:", mse_dt)



Example 1
Blending MSE: 7450680909.801436
Linear Regression MSE: 6307253554.289143
SVR MSE: 14578254146.057575
Decision Tree MSE: 12192486068.120295

Example 2
Blending MSE: 7326791530.565487
Linear Regression MSE: 6559992579.842346
SVR MSE: 14153031130.428663
Decision Tree MSE: 11724107234.82294

Example 3
Blending MSE: 6770603682.375311
Linear Regression MSE: 6769091537.586275
SVR MSE: 13382877997.782719
Decision Tree MSE: 10332982748.874706


In [18]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

def manual_bagging(X_train, y_train, X_val, n_estimators=10):
    preds = []
    for _ in range(n_estimators):
        idx = np.random.choice(len(X_train), len(X_train), replace=True)
        X_sample, y_sample = X_train[idx], y_train[idx]
        model = DecisionTreeRegressor()
        model.fit(X_sample, y_sample)
        preds.append(model.predict(X_val))
    return np.mean(preds, axis=0)


bagging_pred_scratch = manual_bagging(X_train_scaled, y_train.values, X_val_scaled)
mse_bagging_scratch = mean_squared_error(y_val, bagging_pred_scratch)
mse_dt = mean_squared_error(y_val, pred3)

print("Manual Bagging MSE:", mse_bagging_scratch)
print("Single Decision Tree MSE:", mse_dt)


Manual Bagging MSE: 6217990121.941231
Single Decision Tree MSE: 10332982748.874706


In [19]:
# Train base models
base_preds_train = []
base_preds_val = []

for model in [LinearRegression(), SVR(), DecisionTreeRegressor()]:
    model.fit(X_train_scaled, y_train)
    base_preds_train.append(model.predict(X_train_scaled))
    base_preds_val.append(model.predict(X_val_scaled))

# Stack predictions
X_meta_train = np.column_stack(base_preds_train)
X_meta_val = np.column_stack(base_preds_val)

# Train meta-model
meta_model = LinearRegression()
meta_model.fit(X_meta_train, y_train)
stacking_pred_scratch = meta_model.predict(X_meta_val)

# Compare
mse_stacking_scratch = mean_squared_error(y_val, stacking_pred_scratch)
mse_lr = mean_squared_error(y_val, pred1)

print("Manual Stacking MSE:", mse_stacking_scratch)
print("Single Linear Regression MSE:", mse_lr)


Manual Stacking MSE: 10441436913.876177
Single Linear Regression MSE: 6769091537.586275
