## <span style='color:red '>1.0 Importing required libraries</span>

In [1]:
### Pandas and Numpy
import pandas as pd
import numpy as np

### MongoDB Library
import pymongo

### Machine Learning libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import r2_score

### To ignore warnings
import warnings
warnings.filterwarnings('ignore')

## <span style='color:red '>2.0 Retrieving data from MongoDB</span>

In [3]:
### Retriving data from Mongodb
### creating connection with MongoDB

client = pymongo.MongoClient("mongodb+srv://{username}:{password}@clustershub.jujlbeo.mongodb.net/?retryWrites=true&w=majority")

In [4]:
### creating database and collection in MongoDB
db=client['Power_consumption']
collection=db['Household_power_data']

In [5]:
### Locating our collection and data in MongoDb using find() method
data_from_mongodb=collection.find()

In [6]:
### converting data from MongoDb to Dataframe in pandas
data_mongodb=pd.DataFrame(data_from_mongodb)

In [7]:
### first 5 records in dataset
data_mongodb.head()

Unnamed: 0,_id,index,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,635f74daf2daa299ed76d0b6,0,0.246,0.076,246.66,1.0,0,0,0,7,12,0
1,635f74daf2daa299ed76d0b7,2,0.61,0.152,233.27,2.8,0,0,0,27,5,0
2,635f74daf2daa299ed76d0b8,3,0.848,0.0,236.38,3.6,0,0,0,3,6,0
3,635f74daf2daa299ed76d0b9,4,0.216,0.0,241.19,0.8,0,0,0,13,3,0
4,635f74daf2daa299ed76d0ba,5,0.312,0.264,242.21,1.6,0,1,0,20,4,1


In [8]:
### dropping _id and index feature from dataset imported from MongoDB
data_mongodb.drop(['_id','index'], axis=1, inplace=True)
data_mongodb.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,0.246,0.076,246.66,1.0,0,0,0,7,12,0
1,0.61,0.152,233.27,2.8,0,0,0,27,5,0
2,0.848,0.0,236.38,3.6,0,0,0,3,6,0
3,0.216,0.0,241.19,0.8,0,0,0,13,3,0
4,0.312,0.264,242.21,1.6,0,1,0,20,4,1


## <span style='color:red '>3.0 Model and Evaluation</span>

### <span style='color:red '>3.1 Seperating Independent and Dependent features</span>

In [9]:
### Splitting data into independent feature dataframe and dependent feature series
X=data_mongodb.iloc[:,:-1]
y=data_mongodb.iloc[:,-1]
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
0,0.246,0.076,246.66,1.0,0,0,0,7,12
1,0.61,0.152,233.27,2.8,0,0,0,27,5
2,0.848,0.0,236.38,3.6,0,0,0,3,6
3,0.216,0.0,241.19,0.8,0,0,0,13,3
4,0.312,0.264,242.21,1.6,0,1,0,20,4


In [10]:
y.head()

0    0
1    0
2    0
3    0
4    1
Name: Total_power_use, dtype: int64

### <span style='color:red '>3.2 Train Test Split</span>

In [11]:
### random state train test split will be same with all people using random_state=19

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)
X_train.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
22600,0.412,0.236,243.53,1.8,0,1,1,15,11
18073,1.314,0.0,240.87,5.4,0,0,18,1,6
13326,0.604,0.196,242.58,2.8,0,1,0,2,11
629,0.352,0.228,239.24,1.8,0,1,0,14,9
32533,0.326,0.22,241.55,1.6,0,1,1,15,7


In [12]:
y_train.head()

22600     2
18073    18
13326     1
629       1
32533     2
Name: Total_power_use, dtype: int64

In [13]:
X_test.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
7443,0.296,0.114,241.18,1.2,0,0,0,9,6
36009,0.366,0.2,238.02,1.6,0,0,0,3,11
24214,1.37,0.068,237.46,5.6,0,0,18,24,7
21985,0.4,0.17,241.97,1.8,0,0,0,19,9
1822,0.298,0.144,241.75,1.4,0,0,0,3,6


In [14]:
y_test.head()

7443      0
36009     0
24214    18
21985     0
1822      0
Name: Total_power_use, dtype: int64

In [15]:
### both will have same shape
X_train.shape, y_train.shape

((36273, 9), (36273,))

In [16]:
### both will have same shape
X_test.shape, y_test.shape

((12091, 9), (12091,))

### <span style='color:red '>3.3 Feature Scaling</span>

In [18]:
scaler=StandardScaler()
scaler

In [19]:
X_train=scaler.fit_transform(X_train)
X_train

array([[-0.63965   ,  0.97938968,  0.81974574, ..., -0.64329152,
        -0.08447783,  1.36115865],
       [ 0.21452558, -1.09709835, -0.00470465, ...,  1.37538448,
        -1.67223824, -0.10001375],
       [-0.45782992,  0.62744256,  0.52529917, ..., -0.76203717,
        -1.55882678,  1.36115865],
       ...,
       [ 4.48540351,  1.12016853, -1.61021332, ...,  1.25663884,
        -1.21859241, -0.68448271],
       [-0.73434796, -0.30521732, -1.93255483, ..., -0.76203717,
        -1.33200387, -0.39224823],
       [ 0.5061953 ,  0.39867693, -0.2557591 , ...,  1.37538448,
         1.7301055 ,  1.65339313]])

In [20]:
X_test=scaler.transform(X_test)
X_test

array([[-0.74949963, -0.09404905,  0.09137791, ..., -0.76203717,
        -0.76494657, -0.10001375],
       [-0.68321106,  0.66263727, -0.88804437, ..., -0.76203717,
        -1.44541532,  1.36115865],
       [ 0.26755644, -0.49878824, -1.06161287, ...,  1.37538448,
         0.9362253 ,  0.19222073],
       ...,
       [-0.84609155, -0.37560674, -1.303369  , ..., -0.76203717,
        -0.4247122 ,  0.77668969],
       [ 0.10656991, -0.51638559, -0.6431888 , ..., -0.76203717,
         1.04963675,  1.36115865],
       [-0.45972388, -1.09709835,  0.34243236, ..., -0.76203717,
         0.14234509, -0.39224823]])

### <span style='color:red '>3.4 Linear Regression</span>

In [21]:
linear_reg=LinearRegression()
linear_reg

In [22]:
linear_reg.fit(X_train, y_train)

In [23]:
linear_reg_pred=linear_reg.predict(X_test)
linear_reg_pred

array([-3.73034936e-14, -3.37507799e-14,  1.80000000e+01, ...,
       -4.26325641e-14,  1.59872116e-14, -1.95399252e-14])

In [24]:
r2score_linear_reg=r2_score(y_test, linear_reg_pred)
print("Our Linear Regression model has {} % accuracy".format(round(r2score_linear_reg*100,3)))

Our Linear Regression model has 100.0 % accuracy


In [25]:
adjusted_r2_score_linear_reg=1-((1-r2score_linear_reg)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_linear_reg*100,3)))

Adjusted R square accuracy is 100.0 % 


### <span style='color:red '>3.5 Ridge Regression</span>

In [26]:
ridge=Ridge()
ridge

In [27]:
ridge.fit(X_train, y_train)

In [28]:
ridge_pred=ridge.predict(X_test)
ridge_pred

array([1.04704779e-04, 1.50368268e-04, 1.79995062e+01, ...,
       5.64697313e-05, 8.32211242e-04, 3.94637776e-04])

In [29]:
r2score_ridge=r2_score(y_test, ridge_pred)
print("Our Ridge Regression model has {} % accuracy".format(round(r2score_ridge*100,3)))

Our Linear Regression model has 100.0 % accuracy


In [30]:
adjusted_r2_score_ridge=1-((1-r2score_ridge)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_ridge*100,3)))

Adjusted R square accuracy is 100.0 % 


### <span style='color:red '>3.5 Lasso Regression</span>

In [31]:
lasso=Lasso()
lasso

In [32]:
lasso.fit(X_train, y_train)

In [33]:
lasso_pred=lasso.predict(X_test)
lasso_pred

array([ 0.80002264,  0.87506701, 16.78103875, ...,  0.69067228,
        1.76916703,  1.12807373])

In [36]:
r2score_lasso=r2_score(y_test, lasso_pred)
print("Our Lasso Regression model has {} % accuracy".format(round(r2score_lasso*100,5)))

Our Lasso Regression model has 98.19007 % accuracy


In [37]:
adjusted_r2_score_lasso=1-((1-r2score_lasso)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_lasso*100,5)))

Adjusted R square accuracy is 98.18872 % 


### <span style='color:red '>3.6 Elastic-Net Regression</span>

In [38]:
elastic=ElasticNet()
elastic

In [39]:
elastic.fit(X_train, y_train)

In [40]:
elastic_pred=elastic.predict(X_test)
elastic_pred

array([ 1.52822886,  1.95848109, 14.16168528, ...,  1.28132049,
        5.29570672,  2.6792371 ])

In [43]:
r2score_elastic=r2_score(y_test, elastic_pred)
print("Our Elastic-Net Regression model has {} % accuracy".format(round(r2score_elastic*100,5)))

Our Elastic-Net Regression model has 90.2359 % accuracy


In [44]:
adjusted_r2_score_elastic=1-((1-r2score_elastic)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_elastic*100,5)))

Adjusted R square accuracy is 90.22863 % 


### <span style='color:red '>3.7 Support Vector Regressor</span>

In [45]:
svr=SVR()
svr

In [46]:
svr.fit(X_train, y_train)

In [47]:
svr_pred=svr.predict(X_test)
svr_pred

array([ 1.79493760e-02,  3.64429265e-01,  1.80721069e+01, ...,
        7.39772657e-02, -2.65744180e-02,  1.31389232e-01])

In [48]:
r2score_svr=r2_score(y_test, svr_pred)
print("Our Support Vector Regressor model has {} % accuracy".format(round(r2score_svr*100,3)))

Our Support Vector Regressor model has 95.002 % accuracy


In [49]:
adjusted_r2_score_svr=1-((1-r2score_svr)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_svr*100,3)))

Adjusted R square accuracy is 94.998 % 


### <span style='color:red '>4.0 Model Comparision</span>

In [56]:
print("Accuracy of all the models is as below:\n")
print("Linear Regression: {} %\nRidge Regression: {} %\nLasso Regression: {} %\nElastic-Net Regression: {}%".format(round(r2score_linear_reg*100,3),round(r2score_ridge*100,3),round(r2score_lasso*100,3), round(r2score_elastic*100,3) ))
print("Support Vector Regressor: {} %\n".format(round(r2score_svr*100,3)))

print("Best Model is 'Linear Regression' and 'Ridge Regression'")


Accuracy of all the models is as below:

Linear Regression: 100.0 %
Ridge Regression: 100.0 %
Lasso Regression: 98.19 %
Elastic-Net Regression: 90.236%
Support Vector Regressor: 95.002 %

Best Model is 'Linear Regression' and 'Ridge Regression'
