In [6]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import mean_squared_error, r2_score

# Load the Boston Housing dataset
file_path = r"C:\Users\popov\BostonHousingAnalysis\data\BostonHousing.csv"
df_original = pd.read_csv(file_path)

# Display the first few rows to ensure the data is loaded correctly
df_original.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [7]:
# Splitting the data into features and target variable
X = df_original.drop(columns='medv')
y = df_original['medv']

# Splitting the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Display the shapes to ensure everything is split correctly
print("\nShapes of the training and testing data:")
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)



Shapes of the training and testing data:
(404, 13) (102, 13) (404,) (102,)


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor

# Initialize base models
lr_model = LinearRegression()
gb_model = GradientBoostingRegressor(random_state=42)

# Train the models
lr_model.fit(X_train, y_train)
gb_model.fit(X_train, y_train)


GradientBoostingRegressor(random_state=42)

In [9]:
# Predictions
lr_preds = lr_model.predict(X_test)
gb_preds = gb_model.predict(X_test)

# Averaging the predictions
hybrid_preds = (lr_preds + gb_preds) / 2

# Evaluating the Hybrid Model
hybrid_mse = mean_squared_error(y_test, hybrid_preds)
hybrid_rmse = np.sqrt(hybrid_mse)
hybrid_r2 = r2_score(y_test, hybrid_preds)

print(f"Hybrid Model Performance:\nMSE: {hybrid_mse}\nRMSE: {hybrid_rmse}\nR2 Score: {hybrid_r2}")


Hybrid Model Performance:
MSE: 11.409133707250962
RMSE: 3.3777409177216304
R2 Score: 0.8444218583090386


In [10]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score

# Initialize models
linear_model = LinearRegression()
gradient_boosting_model = GradientBoostingRegressor(random_state=42)

# Train models
linear_model.fit(X_train, y_train)
gradient_boosting_model.fit(X_train, y_train)

# Predictions
linear_preds = linear_model.predict(X_test)
gb_preds = gradient_boosting_model.predict(X_test)

# Blend predictions
# Here, we're giving equal weight to both models. This can be adjusted based on model performance or domain knowledge.
blended_preds = 0.5 * linear_preds + 0.5 * gb_preds

# Evaluate hybrid model
blended_mse = mean_squared_error(y_test, blended_preds)
blended_rmse = np.sqrt(blended_mse)
blended_r2 = r2_score(y_test, blended_preds)

print(f"Blended Model Performance:\nMSE: {blended_mse}\nRMSE: {blended_rmse}\nR2 Score: {blended_r2}")


Blended Model Performance:
MSE: 11.409133707250962
RMSE: 3.3777409177216304
R2 Score: 0.8444218583090386


In [11]:
# Import necessary libraries
from sklearn.ensemble import StackingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR

# Define base models
base_models = [
    ('LinearRegression', LinearRegression()),
    ('GradientBoostingRegressor', GradientBoostingRegressor()),
    ('SVR', SVR())
]

# Define meta-model
meta_model = GradientBoostingRegressor()

# Define stacking regressor
stacking_regressor = StackingRegressor(
    estimators=base_models,
    final_estimator=meta_model
)

# Train the stacking regressor
stacking_regressor.fit(X_train, y_train)

# Evaluate the model
stacked_preds = stacking_regressor.predict(X_test)
stacked_mse = mean_squared_error(y_test, stacked_preds)
stacked_rmse = np.sqrt(stacked_mse)
stacked_r2 = r2_score(y_test, stacked_preds)

print(f"Stacked Model Performance:\nMSE: {stacked_mse}\nRMSE: {stacked_rmse}\nR2 Score: {stacked_r2}")


Stacked Model Performance:
MSE: 9.3394856689384
RMSE: 3.0560572096965726
R2 Score: 0.8726441584430421
