# Implementing voting_regressor using Sales dataset

In [27]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.model_selection import cross_val_score
# import warnings
# from sklearn.exceptions import DataConversionWarning

# warnings.filterwarnings("ignore", category=DataConversionWarning)




# Load Sales dataset

In [28]:
df=pd.read_csv('sales.csv')

# Data Manupulation

In [29]:
df.drop(["Unnamed: 9",'Unnamed: 10'],axis=1,inplace=True)

In [30]:
# df=pd.get_dummies(df,columns=["Order_Priority"],drop_first=True,sparse=True,dtype=int)

In [31]:
df.drop(["Region"],axis=1,inplace=True)

In [32]:
from sklearn.preprocessing import LabelEncoder
Label=LabelEncoder()
df["Sales_Channel"]=Label.fit_transform(df["Sales_Channel"]) 

# Splitting dataset into feature and target (X,y)

In [33]:
X=df.drop(['Country','Item_Type','Total_Profit','Ship_Date','Order_Priority'],axis=1)

In [41]:
y=df.iloc[:,6:7].values.ravel()

In [42]:
X

Unnamed: 0,Sales_Channel,Unit_Cost,Total_Revenue
0,0,159.42,2533654.00
1,1,117.11,576782.80
2,0,524.96,1158502.59
3,1,6.92,75591.66
4,0,524.96,3296425.02
...,...,...,...
95,1,35.84,97040.64
96,0,6.92,58471.11
97,0,90.93,228779.10
98,0,56.67,471336.91


# Model building

In [44]:
lr = LinearRegression()
dt = DecisionTreeRegressor()
svr = SVR()

In [45]:
estimators = [('lr',lr),('dt',dt),('svr',svr)]

In [None]:
Accuracy

In [46]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

lr 1.0
dt 0.99
svr -0.29


# Applying VotingRegressor

In [47]:
from sklearn.ensemble import VotingRegressor

In [48]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.86


# Checking each and every parameter

In [49]:
for i in range(1,4):
  for j in range(1,4):
    for k in range(1,4):
      vr = VotingRegressor(estimators,weights=[i,j,k])
      scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
      print("For i={},j={},k={}".format(i,j,k),np.round(np.mean(scores),2))


For i=1,j=1,k=1 0.86
For i=1,j=1,k=2 0.67
For i=1,j=1,k=3 0.54
For i=1,j=2,k=1 0.92
For i=1,j=2,k=2 0.79
For i=1,j=2,k=3 0.68
For i=1,j=3,k=1 0.95
For i=1,j=3,k=2 0.85
For i=1,j=3,k=3 0.76
For i=2,j=1,k=1 0.92
For i=2,j=1,k=2 0.8
For i=2,j=1,k=3 0.68
For i=2,j=2,k=1 0.95
For i=2,j=2,k=2 0.86
For i=2,j=2,k=3 0.77
For i=2,j=3,k=1 0.96
For i=2,j=3,k=2 0.9
For i=2,j=3,k=3 0.82
For i=3,j=1,k=1 0.95
For i=3,j=1,k=2 0.86
For i=3,j=1,k=3 0.77
For i=3,j=2,k=1 0.96
For i=3,j=2,k=2 0.9
For i=3,j=2,k=3 0.82
For i=3,j=3,k=1 0.97
For i=3,j=3,k=2 0.92
For i=3,j=3,k=3 0.86


# using the same algorithm

In [50]:


dt1 = DecisionTreeRegressor(max_depth=1)
dt2 = DecisionTreeRegressor(max_depth=3)
dt3 = DecisionTreeRegressor(max_depth=5)
dt4 = DecisionTreeRegressor(max_depth=7)
dt5 = DecisionTreeRegressor(max_depth=None)

In [51]:
estimators = [('dt1',dt1),('dt2',dt2),('dt3',dt3),('dt4',dt4),('dt5',dt5)]

In [52]:
for estimator in estimators:
  scores = cross_val_score(estimator[1],X,y,scoring='r2',cv=10)
  print(estimator[0],np.round(np.mean(scores),2))

dt1 0.68
dt2 0.96
dt3 0.99
dt4 0.99
dt5 0.99


In [53]:
vr = VotingRegressor(estimators)
scores = cross_val_score(vr,X,y,scoring='r2',cv=10)
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.98


In [54]:
print("Voting Regressor",np.round(np.mean(scores),2))

Voting Regressor 0.98
