## <span style='color:red '>1.0 Importing required libraries</span>

In [1]:
### Pandas and Numpy
import pandas as pd
import numpy as np

### MongoDB Library
import pymongo

### Machine Learning libraries
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
from sklearn.svm import SVR
from sklearn.metrics import r2_score

### To ignore warnings
import warnings
warnings.filterwarnings('ignore')



## <span style='color:red '>2.0 Retrieving data from MongoDB</span>

In [3]:
### Retriving data from Mongodb
### creating connection with MongoDB
client=pymongo.MongoClient("mongodb+srv://kishore:mongodb@cluster0.4xwrt.mongodb.net/?retryWrites=true&w=majority")

In [4]:
### creating database and collection in MongoDB
db=client['Power_consumption']
collection=db['Household_power_data']

In [5]:
### Locating our collection and data in MongoDb using find() method
data_from_mongodb=collection.find()

In [6]:
### converting data from MongoDb to Dataframe in pandas
data_mongodb=pd.DataFrame(data_from_mongodb)

In [7]:
### first 5 records in dataset
data_mongodb.head()

Unnamed: 0,_id,index,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,6365ed0b17fb3db1340562eb,0,0.16,0.14,241.16,0.8,0,2,1,14,8,3
1,6365ed0b17fb3db1340562ec,1,0.358,0.168,240.59,1.6,1,0,0,8,7,1
2,6365ed0b17fb3db1340562ed,3,1.522,0.306,240.23,6.4,0,2,19,17,5,21
3,6365ed0b17fb3db1340562ee,4,0.216,0.126,242.49,1.0,0,1,1,4,8,2
4,6365ed0b17fb3db1340562ef,5,0.148,0.0,237.37,0.6,0,0,0,13,5,0


In [8]:
### dropping _id and index feature from dataset imported from MongoDB
data_mongodb.drop(['_id','index'], axis=1, inplace=True)
data_mongodb.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month,Total_power_use
0,0.16,0.14,241.16,0.8,0,2,1,14,8,3
1,0.358,0.168,240.59,1.6,1,0,0,8,7,1
2,1.522,0.306,240.23,6.4,0,2,19,17,5,21
3,0.216,0.126,242.49,1.0,0,1,1,4,8,2
4,0.148,0.0,237.37,0.6,0,0,0,13,5,0


## <span style='color:red '>3.0 Model and Evaluation</span>

### <span style='color:red '>3.1 Seperating Independent and Dependent features</span>

In [9]:
### Splitting data into independent feature dataframe and dependent feature series
X=data_mongodb.iloc[:,:-1]
y=data_mongodb.iloc[:,-1]
X.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
0,0.16,0.14,241.16,0.8,0,2,1,14,8
1,0.358,0.168,240.59,1.6,1,0,0,8,7
2,1.522,0.306,240.23,6.4,0,2,19,17,5
3,0.216,0.126,242.49,1.0,0,1,1,4,8
4,0.148,0.0,237.37,0.6,0,0,0,13,5


In [10]:
y.head()

0     3
1     1
2    21
3     2
4     0
Name: Total_power_use, dtype: int64

### <span style='color:red '>3.2 Train Test Split</span>

In [11]:
### random state train test split will be same with all people using random_state=19

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=19)
X_train.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
11718,0.444,0.0,246.49,1.8,0,0,1,7,3
2261,1.266,0.1,238.68,5.2,0,0,18,28,9
34429,0.408,0.23,242.16,1.8,0,2,0,19,1
33737,0.32,0.102,244.75,1.4,0,0,1,18,11
39090,0.144,0.046,242.23,0.6,0,0,1,15,9


In [12]:
y_train.head()

11718     1
2261     18
34429     2
33737     1
39090     1
Name: Total_power_use, dtype: int64

In [13]:
X_test.head()

Unnamed: 0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3,day,month
47006,1.582,0.33,244.59,6.6,0,1,19,15,1
15337,2.622,0.0,239.8,10.8,0,1,19,25,1
20533,2.538,0.0,236.51,10.6,1,0,18,4,9
25050,1.716,0.0,235.82,7.2,0,0,17,27,7
38699,1.458,0.088,239.26,6.0,0,0,18,7,5


In [14]:
y_test.head()

47006    20
15337    20
20533    19
25050    17
38699    18
Name: Total_power_use, dtype: int64

In [15]:
### both will have same shape
X_train.shape, y_train.shape

((36232, 9), (36232,))

In [16]:
### both will have same shape
X_test.shape, y_test.shape

((12078, 9), (12078,))

### <span style='color:red '>3.3 Feature Scaling</span>

In [17]:
scaler=StandardScaler()
scaler

In [18]:
X_train=scaler.fit_transform(X_train)
X_train

array([[-0.60944023, -1.10359086,  1.77614545, ..., -0.64642454,
        -0.98571082, -0.99113289],
       [ 0.1683389 , -0.22099022, -0.66200951, ...,  1.37193262,
         1.39773018,  0.78374096],
       [-0.64350355,  0.92639061,  0.42438988, ..., -0.76515143,
         0.37625547, -1.58275751],
       ...,
       [ 0.37461122, -0.27394626,  0.23395781, ..., -0.76515143,
        -1.09920801,  1.67117789],
       [ 0.26674404, -0.55637847,  1.63254094, ...,  1.49065951,
        -0.0777333 , -1.2869452 ],
       [-0.75326314, -1.10359086, -0.03139837, ..., -0.76515143,
        -1.2127052 ,  1.07955327]])

In [19]:
X_test=scaler.transform(X_test)
X_test

array([[ 0.46733915,  1.80899126,  1.18299636, ...,  1.49065951,
        -0.0777333 , -1.58275751],
       [ 1.4513906 , -1.10359086, -0.31236373, ...,  1.49065951,
         1.05723861, -1.58275751],
       [ 1.37190953, -1.10359086, -1.33944821, ...,  1.37193262,
        -1.32620239,  0.78374096],
       ...,
       [ 0.36514919,  0.67926243, -0.07510409, ..., -0.76515143,
        -1.09920801, -0.99113289],
       [-0.63404152,  0.94404263,  0.93324937, ..., -0.76515143,
        -0.98571082, -1.2869452 ],
       [ 0.24403517, -0.66229054, -0.59645093, ...,  1.25320573,
         0.26275828,  1.67117789]])

### <span style='color:red '>3.4 Linear Regression</span>

In [20]:
linear_reg=LinearRegression()
linear_reg

In [21]:
linear_reg.fit(X_train, y_train)

In [22]:
linear_reg_pred=linear_reg.predict(X_test)
linear_reg_pred

array([20., 20., 19., ...,  1.,  1., 17.])

In [23]:
r2score_linear_reg=r2_score(y_test, linear_reg_pred)
print("Our Linear Regression model has {} % accuracy".format(round(r2score_linear_reg*100,3)))

Our Linear Regression model has 100.0 % accuracy


In [24]:
adjusted_r2_score_linear_reg=1-((1-r2score_linear_reg)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_linear_reg*100,3)))

Adjusted R square accuracy is 100.0 % 


### <span style='color:red '>3.5 Ridge Regression</span>

In [25]:
ridge=Ridge()
ridge

In [26]:
ridge.fit(X_train, y_train)

In [27]:
ridge_pred=ridge.predict(X_test)
ridge_pred

array([19.99937858, 20.00041987, 19.00043677, ...,  1.00103406,
        1.00006353, 16.99953668])

In [28]:
r2score_ridge=r2_score(y_test, ridge_pred)
print("Our Ridge Regression model has {} % accuracy".format(round(r2score_ridge*100,3)))

Our Ridge Regression model has 100.0 % accuracy


In [29]:
adjusted_r2_score_ridge=1-((1-r2score_ridge)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_ridge*100,3)))

Adjusted R square accuracy is 100.0 % 


### <span style='color:red '>3.5 Lasso Regression</span>

In [30]:
lasso=Lasso()
lasso

In [31]:
lasso.fit(X_train, y_train)

In [32]:
lasso_pred=lasso.predict(X_test)
lasso_pred

array([18.58621677, 19.77637146, 18.87475904, ...,  2.92822841,
        1.71976365, 15.91063702])

In [33]:
r2score_lasso=r2_score(y_test, lasso_pred)
print("Our Lasso Regression model has {} % accuracy".format(round(r2score_lasso*100,5)))

Our Lasso Regression model has 98.18437 % accuracy


In [34]:
adjusted_r2_score_lasso=1-((1-r2score_lasso)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_lasso*100,5)))

Adjusted R square accuracy is 98.18301 % 


### <span style='color:red '>3.6 Elastic-Net Regression</span>

In [35]:
elastic=ElasticNet()
elastic

In [36]:
elastic.fit(X_train, y_train)

In [37]:
elastic_pred=elastic.predict(X_test)
elastic_pred

array([15.75438626, 20.08409824, 19.48163009, ...,  6.84512865,
        2.44533446, 13.63785689])

In [38]:
r2score_elastic=r2_score(y_test, elastic_pred)
print("Our Elastic-Net Regression model has {} % accuracy".format(round(r2score_elastic*100,5)))

Our Elastic-Net Regression model has 90.08943 % accuracy


In [39]:
adjusted_r2_score_elastic=1-((1-r2score_elastic)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_elastic*100,5)))

Adjusted R square accuracy is 90.08204 % 


### <span style='color:red '>3.7 Support Vector Regressor</span>

In [40]:
svr=SVR()
svr

In [41]:
svr.fit(X_train, y_train)

In [42]:
svr_pred=svr.predict(X_test)
svr_pred

array([19.89179579, 19.99742358, 18.89577809, ...,  0.94188813,
        0.91517085, 17.07282177])

In [43]:
r2score_svr=r2_score(y_test, svr_pred)
print("Our Support Vector Regressor model has {} % accuracy".format(round(r2score_svr*100,3)))

Our Support Vector Regressor model has 95.227 % accuracy


In [44]:
adjusted_r2_score_svr=1-((1-r2score_svr)*(len(y_test)-1)/(len(y_test)-X_test.shape[1]-1))
print("Adjusted R square accuracy is {} % ".format(round(adjusted_r2_score_svr*100,3)))

Adjusted R square accuracy is 95.223 % 


### <span style='color:red '>4.0 Model Comparision</span>

In [45]:
print("Accuracy of all the models is as below:\n")
print("Linear Regression: {} %\nRidge Regression: {} %\nLasso Regression: {} %\nElastic-Net Regression: {}%".format(round(r2score_linear_reg*100,3),round(r2score_ridge*100,3),round(r2score_lasso*100,3), round(r2score_elastic*100,3) ))
print("Support Vector Regressor: {} %\n".format(round(r2score_svr*100,3)))

print("Best Model is 'Linear Regression' and 'Ridge Regression'")


Accuracy of all the models is as below:

Linear Regression: 100.0 %
Ridge Regression: 100.0 %
Lasso Regression: 98.184 %
Elastic-Net Regression: 90.089%
Support Vector Regressor: 95.227 %

Best Model is 'Linear Regression' and 'Ridge Regression'
