In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import GridSearchCV , train_test_split , cross_val_score

In [2]:
data , target = fetch_california_housing(return_X_y=True , as_frame=True)
df = pd.concat([data , target] , axis=1)
df.head()

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.80226,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422


In [3]:
X = df.iloc[:,0:8]
y = df.iloc[:,-1]

In [4]:
X.head(2)

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude
0,8.3252,41.0,6.984127,1.02381,322.0,2.555556,37.88,-122.23
1,8.3014,21.0,6.238137,0.97188,2401.0,2.109842,37.86,-122.22


In [5]:
y.head(2)

0    4.526
1    3.585
Name: MedHouseVal, dtype: float64

In [6]:
reg1 = LinearRegression()
reg2 = DecisionTreeRegressor()
reg3 = SVR()
reg4 = RandomForestRegressor()

In [7]:
estimators = [('LR' , reg1),('DT' , reg2),('SVR' , reg3),('RF' , reg4)]

In [8]:
X_train , X_test , y_train , y_test = train_test_split(X,y,test_size=0.2)

In [9]:
for estimator in estimators:
    scores = cross_val_score(estimator[1],X_train,y_train,cv=10,scoring='r2')
    print(estimator[0] , np.round(np.mean(scores),2))

LR 0.6
DT 0.6
SVR -0.03
RF 0.8


In [10]:
from sklearn.ensemble import VotingRegressor

In [11]:
vc = VotingRegressor(estimators=estimators)
x = cross_val_score(vc,X_train,y_train,cv=10,scoring='r2')
print(np.round(np.mean(x),2))

0.7


In [12]:
print("X Train: ",X_train.shape)
print("Y Train: ",y_train.shape)

X Train:  (16512, 8)
Y Train:  (16512,)


In [13]:
#for i in range(1,4):
#    for j in range(1,4):
#        for k in range(1,4):
#            for m in range(1,4):
#                vc1 = VotingRegressor(estimators=estimators , weights=[i,j,k,m])
#                x = cross_val_score(vc1,X_train,y_train,cv=10,scoring='r2')
#                print("For i={},j={},k={},m={}".format(i,j,k,m),np.round(np.mean(x),2))

In [15]:
#For i=1,j=1,k=1,m=1 0.7
#For i=1,j=1,k=1,m=2 0.74
#For i=1,j=1,k=1,m=3 0.76
#For i=1,j=1,k=2,m=1 0.62
#For i=1,j=1,k=2,m=2 0.68
#For i=1,j=1,k=2,m=3 0.71
#For i=1,j=1,k=3,m=1 0.55
#For i=1,j=1,k=3,m=2 0.62
#For i=1,j=1,k=3,m=3 0.66
#For i=1,j=2,k=1,m=1 0.72
#For i=1,j=2,k=1,m=2 0.75
#For i=1,j=2,k=1,m=3 0.76
#For i=1,j=2,k=2,m=1 0.67
#For i=1,j=2,k=2,m=2 0.7
#For i=1,j=2,k=2,m=3 0.73
#For i=1,j=2,k=3,m=1 0.61

In [17]:
vc2 = VotingRegressor(estimators=estimators , weights=[1,1,1,3])
x = cross_val_score(vc2,X_train,y_train,cv=10,scoring='r2')
print(np.round(np.mean(x),2))

0.75
