In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_csv('House power consumption cleaned data')

In [3]:
df.drop(['Unnamed: 0'],axis=1,inplace=True)

In [4]:
df.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Sub_metering,power_consumption
0,50.852,2.812,6474.97,146.0,694.125
1,48.726,3.576,8422.2,400.0,412.1
2,19.566,2.928,10840.13,129.0,197.1
3,66.596,4.302,7181.28,714.0,383.933333
4,29.112,5.026,8703.04,192.0,293.2


In [5]:
x = df.drop(['power_consumption'],axis=1)
y = df['power_consumption']

In [6]:
from sklearn.model_selection import train_test_split

In [7]:
X_train,X_test,y_train,y_test = train_test_split(x,y,train_size=0.33,random_state=7)

# Desicion tree

In [8]:
from sklearn import tree

In [9]:
model = tree.DecisionTreeRegressor()

In [10]:
model.fit(X_train,y_train)

DecisionTreeRegressor()

In [11]:
y_predict_DT = model.predict(X_test)

R square score

In [12]:
from sklearn.metrics import r2_score
DT_r2 = r2_score(y_test,y_predict_DT)
print(DT_r2)

0.9306390715862848


adjusted R square score

In [13]:
adjusted_DT_r2 = 1 - (1-DT_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_DT_r2

0.9303473326655121

# Random forest regressor

In [14]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.tree import DecisionTreeRegressor

In [15]:
model = RandomForestRegressor().fit(X_train,y_train)

In [16]:
y_predict_Rf = model.predict(X_test)

R square score

In [17]:
from sklearn.metrics import r2_score
Rf_r2 = r2_score(y_test,y_predict_Rf)
print(DT_r2)

0.9306390715862848


adjusted R square score

In [18]:
adjusted_Rf_r2 = 1 - (1-Rf_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_Rf_r2

0.9675104995520856

Hyperparameter tunning

In [19]:
grid_param = {
    "n_estimators":[90,100,115,130],
    'criterion': ['squared_error', 'absolute_error', 'poisson'],
    'max_depth' : range(2,20,1),
    'min_samples_leaf' : range(1,10,1),
    'min_samples_split': range(2,10,1),
    'max_features':['sqrt','log2']
}

In [20]:
from sklearn.model_selection import GridSearchCV
grid_search=GridSearchCV(estimator=model,param_grid=grid_param,cv=3,verbose=2,n_jobs=-1)

In [21]:
grid_search.fit(X_train,y_train)

Fitting 3 folds for each of 31104 candidates, totalling 93312 fits


GridSearchCV(cv=3, estimator=RandomForestRegressor(), n_jobs=-1,
             param_grid={'criterion': ['squared_error', 'absolute_error',
                                       'poisson'],
                         'max_depth': range(2, 20),
                         'max_features': ['sqrt', 'log2'],
                         'min_samples_leaf': range(1, 10),
                         'min_samples_split': range(2, 10),
                         'n_estimators': [90, 100, 115, 130]},
             verbose=2)

In [22]:
grid_search.best_params_

{'criterion': 'absolute_error',
 'max_depth': 15,
 'max_features': 'sqrt',
 'min_samples_leaf': 1,
 'min_samples_split': 3,
 'n_estimators': 115}

In [23]:
model_best_param = RandomForestRegressor(criterion='squared_error',
 max_depth= 17,
 max_features='sqrt',
 min_samples_leaf= 1,
 min_samples_split= 2,
 n_estimators= 115).fit(X_train,y_train)

In [24]:
y_predict_grid = model_best_param.predict(X_test)

R square score

In [25]:
from sklearn.metrics import r2_score
grid_r2 = r2_score(y_test,y_predict_grid)
print(grid_r2)

0.9225874375782896


adjusted R square score

In [26]:
adjusted_grid_r2 = 1 - (1-grid_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_grid_r2

0.9222618326890291

# Bagging Regressor

In [27]:
from sklearn.ensemble import BaggingRegressor

In [28]:
model = BaggingRegressor(base_estimator=DecisionTreeRegressor(),n_estimators=50, random_state=0).fit(X_train, y_train)

In [29]:
from sklearn.model_selection import cross_val_score
results = cross_val_score(model, X_train, y_train, cv = 20)
print("accuracy :")
print(results.mean())

accuracy :
0.9531401785064915


R square score

In [30]:
from sklearn.metrics import r2_score
Bagging_r2 = r2_score(y_test,y_predict_grid)
print(Bagging_r2)

0.9225874375782896


adjusted R square score

In [31]:
adjusted_Bagging_r2 = 1 - (1-Bagging_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_Bagging_r2

0.9222618326890291

# Extra tree regressor

In [32]:
from sklearn.ensemble import ExtraTreesRegressor

In [33]:
model = ExtraTreesRegressor(n_estimators=100,random_state=7).fit(X_train, y_train)

R square score

In [34]:
from sklearn.metrics import r2_score
extra_r2 = r2_score(y_test,y_predict_grid)
print(extra_r2)

0.9225874375782896


adjusted R square score

In [35]:
adjusted_extra_r2 = 1 - (1-extra_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_extra_r2

0.9222618326890291

Coss validation

In [36]:
from sklearn.model_selection import cross_val_score
results = cross_val_score(model, X_train, y_train, cv = 20)
print("accuracy :")
print(results.mean())

accuracy :
0.9742557707013066


# Voting regressor

In [37]:
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR

In [38]:
r1 = LinearRegression()
r2 = RandomForestRegressor(n_estimators=10)
r3 = SVR()

In [39]:
model = VotingRegressor([('lr', r1), ('rf', r2), ('r3', r3)])

In [40]:
model.fit(X_train,y_train)

VotingRegressor(estimators=[('lr', LinearRegression()),
                            ('rf', RandomForestRegressor(n_estimators=10)),
                            ('r3', SVR())])

In [41]:
y_predict_voting = model.predict(X_test)

R square score

In [42]:
from sklearn.metrics import r2_score
grid_r2 = r2_score(y_test,y_predict_voting)
print(grid_r2)

0.8642397025764191


adjusted R square score

In [43]:
adjusted_grid_r2 = 1 - (1-grid_r2)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1)
adjusted_grid_r2

0.8636686813464567

In [44]:
from sklearn.model_selection import cross_val_score
results = cross_val_score(model, X_train, y_train, cv = 20)
print("accuracy :")
print(results.mean())

accuracy :
0.8593070849716739
