In [9]:
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split

X, y = fetch_california_housing(return_X_y=True)
X = X[:2000,:]
y = y[:2000]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

In [10]:
X.shape

(2000, 8)

In [12]:
fetch_california_housing()

{'data': array([[   8.3252    ,   41.        ,    6.98412698, ...,    2.55555556,
           37.88      , -122.23      ],
        [   8.3014    ,   21.        ,    6.23813708, ...,    2.10984183,
           37.86      , -122.22      ],
        [   7.2574    ,   52.        ,    8.28813559, ...,    2.80225989,
           37.85      , -122.24      ],
        ...,
        [   1.7       ,   17.        ,    5.20554273, ...,    2.3256351 ,
           39.43      , -121.22      ],
        [   1.8672    ,   18.        ,    5.32951289, ...,    2.12320917,
           39.43      , -121.32      ],
        [   2.3886    ,   16.        ,    5.25471698, ...,    2.61698113,
           39.37      , -121.24      ]]),
 'target': array([4.526, 3.585, 3.521, ..., 0.923, 0.847, 0.894]),
 'frame': None,
 'target_names': ['MedHouseVal'],
 'feature_names': ['MedInc',
  'HouseAge',
  'AveRooms',
  'AveBedrms',
  'Population',
  'AveOccup',
  'Latitude',
  'Longitude'],
 'DESCR': '.. _california_housing_dataset:\n

# BASELINE CLASSIFIER

In [13]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

models = {
    "Linear Regression": LinearRegression(),
    "Decision Stump": DecisionTreeRegressor(max_depth=1, criterion="squared_error"),
    "SVR (Linear Kernel)": SVR(kernel="linear"),
}

# Cross-validation for each model

print("For Training Dataset")
for label, model in models.items():
    scores = cross_val_score(model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")
for label, model in models.items():
    scores = cross_val_score(model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))


For Training Dataset
MSE: 0.30 (+/- 0.05) [Linear Regression]
MSE: 0.55 (+/- 0.08) [Decision Stump]
MSE: 0.34 (+/- 0.09) [SVR (Linear Kernel)]

**********************************************************************
For Testing Dataset
MSE: 0.25 (+/- 0.04) [Linear Regression]
MSE: 0.50 (+/- 0.06) [Decision Stump]
MSE: 0.27 (+/- 0.07) [SVR (Linear Kernel)]


# BAGGING ENSEMBLE

In [14]:
from sklearn.ensemble import BaggingRegressor

print("For Training Dataset")
for label, model in models.items():
    bagging_model = BaggingRegressor(estimator=model, n_estimators=50, random_state=42)
    scores = cross_val_score(bagging_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")
for label, model in models.items():
    bagging_model = BaggingRegressor(estimator=model, n_estimators=50, random_state=42)
    scores = cross_val_score(bagging_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))


For Training Dataset
MSE: 0.30 (+/- 0.05) [Linear Regression]
MSE: 0.51 (+/- 0.08) [Decision Stump]
MSE: 0.33 (+/- 0.08) [SVR (Linear Kernel)]

**********************************************************************
For Testing Dataset
MSE: 0.25 (+/- 0.04) [Linear Regression]
MSE: 0.43 (+/- 0.07) [Decision Stump]
MSE: 0.27 (+/- 0.07) [SVR (Linear Kernel)]


# ADABOOST ENSEMBLE

1. By default Learning Rate

In [15]:
from sklearn.ensemble import AdaBoostRegressor
print("For Training Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42)
    scores = cross_val_score(adaboost_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42)
    scores = cross_val_score(adaboost_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))


For Training Dataset
MSE: 0.34 (+/- 0.03) [Linear Regression]
MSE: 0.52 (+/- 0.05) [Decision Stump]
MSE: 0.57 (+/- 0.21) [SVR (Linear Kernel)]

**********************************************************************
For Testing Dataset
MSE: 0.29 (+/- 0.06) [Linear Regression]
MSE: 0.39 (+/- 0.05) [Decision Stump]
MSE: 0.53 (+/- 0.17) [SVR (Linear Kernel)]


2. Learning Rate=0.01

In [16]:
from sklearn.ensemble import AdaBoostRegressor
print("For Training Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42, learning_rate=0.01)
    scores = cross_val_score(adaboost_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.01)
    scores = cross_val_score(adaboost_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))


For Training Dataset
MSE: 0.31 (+/- 0.04) [Linear Regression]
MSE: 0.52 (+/- 0.08) [Decision Stump]
MSE: 0.30 (+/- 0.06) [SVR (Linear Kernel)]

**********************************************************************
For Testing Dataset
MSE: 0.25 (+/- 0.04) [Linear Regression]
MSE: 0.44 (+/- 0.06) [Decision Stump]
MSE: 0.25 (+/- 0.04) [SVR (Linear Kernel)]


2. Learning Rate=0.001

In [17]:
from sklearn.ensemble import AdaBoostRegressor
print("For Training Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42, learning_rate=0.001)
    scores = cross_val_score(adaboost_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.001)
    scores = cross_val_score(adaboost_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

For Training Dataset
MSE: 0.29 (+/- 0.05) [Linear Regression]
MSE: 0.54 (+/- 0.08) [Decision Stump]
MSE: 0.33 (+/- 0.08) [SVR (Linear Kernel)]

**********************************************************************
For Testing Dataset
MSE: 0.25 (+/- 0.04) [Linear Regression]
MSE: 0.47 (+/- 0.06) [Decision Stump]
MSE: 0.27 (+/- 0.07) [SVR (Linear Kernel)]


3. Learning Rate=0.05

In [None]:
from sklearn.ensemble import AdaBoostRegressor
print("For Training Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.05)
    scores = cross_val_score(adaboost_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.05)
    scores = cross_val_score(adaboost_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

For Training Dataset
MSE: 0.37 (+/- 0.04) [Linear Regression]
MSE: 0.51 (+/- 0.07) [Decision Stump]


4. Learning Rate=0.005

In [None]:
from sklearn.ensemble import AdaBoostRegressor
print("For Training Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.005)
    scores = cross_val_score(adaboost_model, X_train_scaled, y_train, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))

print("\n**********************************************************************")

print("For Testing Dataset")

for label, model in models.items():
    adaboost_model = AdaBoostRegressor(estimator=model, n_estimators=200, random_state=42,learning_rate=0.005)
    scores = cross_val_score(adaboost_model, X_test_scaled, y_test, cv=5, scoring="neg_mean_squared_error")
    print("MSE: %0.2f (+/- %0.2f) [%s]" % (-scores.mean(), scores.std(), label))


In [None]:
import pandas as pd

# Define the template with empty values for each field
data = {
    "Classifier name": ["Linear Regression", "Decision Tree regression", "Support Vector Regression"],
    "Baseline classifier - Training data (cv=5)": ["0.30 (+/- 0.05)", "0.55 (+/- 0.08)", "0.34 (+/- 0.09)"],
    "Baseline classifier - Test data": ["0.25 (+/- 0.04)", "0.50 (+/- 0.06)", "0.27 (+/- 0.07)"],
    "Bagging Ensemble - Training data (cv=5)": ["0.30 (+/- 0.05)", "0.51 (+/- 0.08)", "0.33 (+/- 0.08)"],
    "Bagging Ensemble - Test data": ["0.25 (+/- 0.04)", "0.43 (+/- 0.07)", "0.27 (+/- 0.07)"],
    "Adaboost Ensemble - Training data (cv=5)": ["0.31 (+/- 0.04)", "0.52 (+/- 0.08)", "0.30 (+/- 0.06)"],
    "Adaboost Ensemble - Test data": ["0.25 (+/- 0.04)", "0.44 (+/- 0.06)", "0.25 (+/- 0.04)"]
}

# Create DataFrame from the template
results_df = pd.DataFrame(data)

# Display the empty template
results_df