In [35]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import VotingRegressor
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.ensemble import HistGradientBoostingRegressor

In [37]:
data = pd.read_csv('./HousingData.csv')
data

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,MEDV
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1,296,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2,242,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3,222,18.7,396.90,,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1,273,21.0,391.99,,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1,273,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1,273,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1,273,21.0,393.45,6.48,22.0


In [38]:
# Check for missing values
print(data.isnull().sum())

CRIM       20
ZN         20
INDUS      20
CHAS       20
NOX         0
RM          0
AGE        20
DIS         0
RAD         0
TAX         0
PTRATIO     0
B           0
LSTAT      20
MEDV        0
dtype: int64


In [39]:
# Handle missing values with SimpleImputer
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

In [40]:
# Separate features and target variable
X = data_imputed.drop(columns=['MEDV'])  # Replace 'MEDV' with your target column name
y = data_imputed['MEDV']

In [41]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=2)

In [42]:
# Define estimators
estimators = [
    ('Linear Regression', LinearRegression()),
    ('Decision Tree', DecisionTreeRegressor()),
    ('Support Vector Regressor', SVR())
]

In [43]:
# Evaluate each estimator using cross-validation
for name, estimator in estimators:
    scores = cross_val_score(estimator, X_train, y_train, scoring='r2', cv=10)
    print(f"{name}: {np.round(np.mean(scores), 2)}")

Linear Regression: 0.68
Decision Tree: 0.75
Support Vector Regressor: 0.16


In [44]:
# Voting Regressor
vr = VotingRegressor(estimators)
scores = cross_val_score(vr, X_train, y_train, scoring='r2', cv=10)
print("Voting Regressor:", np.round(np.mean(scores), 2))

Voting Regressor: 0.73


In [45]:
# Trying different weights for the Voting Regressor
for i in range(1, 4):
    for j in range(1, 4):
        for k in range(1, 4):
            vr = VotingRegressor(estimators, weights=[i, j, k])
            scores = cross_val_score(vr, X_train, y_train, scoring='r2', cv=10)
            print(f"For i={i}, j={j}, k={k}: {np.round(np.mean(scores), 2)}")

# Using Decision Trees with varying depths
depth_estimators = [
    ('dt1', DecisionTreeRegressor(max_depth=1)),
    ('dt2', DecisionTreeRegressor(max_depth=3)),
    ('dt3', DecisionTreeRegressor(max_depth=5)),
    ('dt4', DecisionTreeRegressor(max_depth=7)),
    ('dt5', DecisionTreeRegressor(max_depth=None))
]

For i=1, j=1, k=1: 0.72
For i=1, j=1, k=2: 0.63
For i=1, j=1, k=3: 0.56
For i=1, j=2, k=1: 0.76
For i=1, j=2, k=2: 0.7
For i=1, j=2, k=3: 0.64
For i=1, j=3, k=1: 0.79
For i=1, j=3, k=2: 0.74
For i=1, j=3, k=3: 0.68
For i=2, j=1, k=1: 0.74
For i=2, j=1, k=2: 0.67
For i=2, j=1, k=3: 0.62
For i=2, j=2, k=1: 0.77
For i=2, j=2, k=2: 0.72
For i=2, j=2, k=3: 0.68
For i=2, j=3, k=1: 0.77
For i=2, j=3, k=2: 0.75
For i=2, j=3, k=3: 0.71
For i=3, j=1, k=1: 0.73
For i=3, j=1, k=2: 0.69
For i=3, j=1, k=3: 0.65
For i=3, j=2, k=1: 0.76
For i=3, j=2, k=2: 0.73
For i=3, j=2, k=3: 0.69
For i=3, j=3, k=1: 0.79
For i=3, j=3, k=2: 0.75
For i=3, j=3, k=3: 0.72


In [46]:
for name, estimator in depth_estimators:
    scores = cross_val_score(estimator, X_train, y_train, scoring='r2', cv=10)
    print(f"{name}: {np.round(np.mean(scores), 2)}")

# Voting Regressor with decision trees
vr = VotingRegressor(depth_estimators)
scores = cross_val_score(vr, X_train, y_train, scoring='r2', cv=10)
print("Voting Regressor with Depths:", np.round(np.mean(scores), 2))

dt1: 0.33
dt2: 0.72
dt3: 0.74
dt4: 0.76
dt5: 0.74
Voting Regressor with Depths: 0.77
