<a href="https://colab.research.google.com/github/Saifullah785/machine-learning-engineer-roadmap/blob/main/Lecture_63_Voting_Ensemble_Classification%26Regression/Lecture_63_Voting_regressor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [74]:
# importing necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [59]:
# Load the Boston Housing dataset from a CSV file into a pandas DataFrame.
boston = pd.read_csv('/content/BostonHousing.csv')
# Display the first 5 rows of the DataFrame to get a preview of the data.
boston.head()

Unnamed: 0,crim,zn,indus,chas,nox,rm,age,dis,rad,tax,ptratio,b,lstat,medv
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.09,1,296,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222,18.7,396.9,5.33,36.2


In [60]:
# Separate the features (independent variables) from the target variable (dependent variable).
# 'medv' is the target variable (median value of owner-occupied homes).
# .drop('medv', axis=1) creates a new DataFrame by dropping the 'medv' column.
# .values converts the DataFrame into a NumPy array.
x = boston.drop('medv', axis=1).values
# Select the 'medv' column as the target variable.
# .values converts the pandas Series into a NumPy array.
# .reshape(-1, 1) reshapes the array to have one column, which is required by some scikit-learn models.
y = boston['medv'].values.reshape(-1,1)

In [61]:
# Print the shape of the features array (number of rows, number of columns).
x.shape

(506, 13)

In [62]:
# Print the shape of the target variable array (number of rows, number of columns).
y.shape

(506, 1)

In [63]:
# Import necessary regression models from scikit-learn.
from sklearn.linear_model import LinearRegression # Linear Regression model
from sklearn.tree import DecisionTreeRegressor # Decision Tree Regression model
from sklearn.svm import SVR # Support Vector Regressor model
# Import cross_val_score for cross-validation.
from sklearn.model_selection import cross_val_score

In [64]:
# Instantiate the different regression models.
lr = LinearRegression()
dt = DecisionTreeRegressor()
svr = SVR()

In [65]:
# Create a list of tuples, where each tuple contains the name and the instance of a regression model.
estimators = [('lr',lr),('dt',dt),('svr',svr)]

In [66]:
# Iterate through each estimator in the 'estimators' list.
for estimator in estimators:
  # Perform 10-fold cross-validation for each estimator.
  # The scoring metric is R-squared ('r2').
  scores = cross_val_score(estimator[1],x,y,scoring='r2',cv=10)
  # Print the name of the estimator and the mean R-squared score rounded to 2 decimal places.
  print(estimator[0],np.round(np.mean(scores),2))

lr 0.2
dt -0.14
svr -0.41


In [67]:
# Import the VotingRegressor class from scikit-learn.
from sklearn.ensemble import VotingRegressor

In [68]:
# Create a VotingRegressor instance with the previously defined estimators.
# By default, the weights are uniform.
vr = VotingRegressor(estimators)
# Perform 10-fold cross-validation on the VotingRegressor using R-squared as the scoring metric.
scores = cross_val_score(vr,x,y,scoring='r2',cv=10)
# Print the name "Voting Regressor" and the mean R-squared score rounded to 2 decimal places.
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.45


In [69]:
# Iterate through different combinations of weights for the VotingRegressor.
# The weights are for the Linear Regression, Decision Tree, and Support Vector Regressor models, respectively.
for i in range(1, 4):
  for j in range(1, 4):
    for k in range(1, 4):
      # Create a VotingRegressor instance with the specified weights.
      vr = VotingRegressor(estimators, weights=[i, j, k])
      # Perform 10-fold cross-validation with the current weights and calculate the mean R-squared score.
      scores = cross_val_score(vr, x, y, scoring='r2', cv=10)
      # Print the weights and the corresponding mean R-squared score rounded to 2 decimal places.
      print("For i={}, j={}, k={}".format(i, j, k), np.round(np.mean(scores), 2))

For i=1, j=1, k=1 0.45
For i=1, j=1, k=2 0.31
For i=1, j=1, k=3 0.25
For i=1, j=2, k=1 0.38
For i=1, j=2, k=2 0.41
For i=1, j=2, k=3 0.3
For i=1, j=3, k=1 0.34
For i=1, j=3, k=2 0.37
For i=1, j=3, k=3 0.38
For i=2, j=1, k=1 0.47
For i=2, j=1, k=2 0.42
For i=2, j=1, k=3 0.35
For i=2, j=2, k=1 0.38
For i=2, j=2, k=2 0.38
For i=2, j=2, k=3 0.41
For i=2, j=3, k=1 0.41
For i=2, j=3, k=2 0.43
For i=2, j=3, k=3 0.42
For i=3, j=1, k=1 0.44
For i=3, j=1, k=2 0.44
For i=3, j=1, k=3 0.4
For i=3, j=2, k=1 0.4
For i=3, j=2, k=2 0.46
For i=3, j=2, k=3 0.43
For i=3, j=3, k=1 0.43
For i=3, j=3, k=2 0.44
For i=3, j=3, k=3 0.4


In [70]:
# Create multiple Decision Tree Regressor instances with different maximum depths.
# max_depth controls the maximum depth of the tree.
dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
# max_depth=None means the nodes are expanded until all leaves are pure or until all leaves contain less than min_samples_split samples.
dt5 = DecisionTreeRegressor(max_depth=None)

In [71]:
# Create a list of tuples, where each tuple contains the name and the instance of a Decision Tree Regressor with a specific max_depth.
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]
# This list will be used to evaluate the performance of each Decision Tree with different depths.

In [72]:
# Iterate through each estimator (Decision Tree with different max_depth) in the 'estimators' list.
for estimator in estimators:
  # Perform 10-fold cross-validation for each Decision Tree.
  # The scoring metric is R-squared ('r2').
  scores = cross_val_score(estimator[1], x, y, scoring='r2', cv=10)
  # Print the name of the estimator (indicating the max_depth) and the mean R-squared score rounded to 2 decimal places.
  print(estimator[0], np.round(np.mean(scores), 2))

dt1 -0.85
dt2 -0.11
dt3 0.08
dt4 0.03
dt5 -0.12


In [73]:
# Create a VotingRegressor instance using the Decision Tree Regressors with different max_depths as estimators.
# By default, uniform weights are used.
vr = VotingRegressor(estimators)
# Perform 10-fold cross-validation on this VotingRegressor using R-squared as the scoring metric.
scores = cross_val_score(vr, x, y, scoring='r2', cv=10)
# Print the name "Voting Regressor" and the mean R-squared score rounded to 2 decimal places.
print("Voting Regressor", np.round(np.mean(scores), 2))

Voting Regressor 0.2
