In [1]:
import numpy as np 
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,GridSearchCV,RandomizedSearchCV
from sklearn.linear_model import LinearRegression,Ridge,Lasso
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE,ADASYN,SMOTENC
import joblib

In [2]:
import pandas as pd
df=pd.read_csv("Data\Processed_data.csv",parse_dates=['Date_time'])
df_sample=df.sample(n=105000,random_state=202)

In [3]:
df_sample.reset_index(inplace=True,drop=True)

In [4]:
df_sample

Unnamed: 0,Global_reactive_power,Voltage,Global_intensity,Date_converted,Date_time,Total_meter_reading
0,0.000,239.35,6.0,2009-06-23,2009-06-23 20:03:00,18.0
1,0.190,237.03,6.4,2007-01-19,2007-01-19 20:56:00,0.0
2,0.000,243.77,5.6,2008-12-31,2008-12-31 04:40:00,18.0
3,0.052,243.43,0.8,2008-10-28,2008-10-28 06:07:00,0.0
4,0.094,240.62,1.2,2009-05-14,2009-05-14 14:26:00,2.0
...,...,...,...,...,...,...
104995,0.000,248.68,1.0,2009-12-14,2009-12-14 02:12:00,1.0
104996,0.000,239.26,0.8,2007-10-29,2007-10-29 18:59:00,0.0
104997,0.146,240.11,6.4,2010-04-30,2010-04-30 12:39:00,20.0
104998,0.000,242.44,0.8,2008-09-14,2008-09-14 06:11:00,0.0


In [5]:
X=df.drop(['Date_converted','Date_time','Total_meter_reading'],axis=1)
y=df['Total_meter_reading']

In [6]:
X_train,X_test,y_train,y_test=train_test_split(X,y,random_state=2022)

In [7]:
std_scaler=StandardScaler()
std_scaler.fit(X_train)

In [8]:
from joblib import dump, load

# Name of the file
joblib_file = 'Std_scaler.joblib'
with open(joblib_file, 'wb') as f:
    dump(std_scaler, f)

In [9]:

with open('Std_scaler.joblib', 'rb') as f:
    Std_scaler=joblib.load(f)

In [10]:
column=list(X.columns)

In [11]:
X_train=pd.DataFrame(Std_scaler.transform(X_train),columns=column)
X_test=pd.DataFrame(Std_scaler.transform(X_test),columns=column)

### Without hyperparameter tuning 

#### Linear model 

In [12]:
Liner_model=LinearRegression()
Liner_model.fit(X_train,y_train)


In [13]:
Liner_model.fit(X_train,y_train)
y_Pred=Liner_model.predict(X_test)
y_Pred

array([ 0.53438294, 11.11886555,  1.51407885, ..., 31.43516712,
       36.94261242, 11.53460651])

In [14]:
from sklearn.metrics import mean_squared_error,mean_absolute_error,r2_score
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn.model_selection import cross_val_predict

In [15]:
def Eval_model(model):
    m = model[1]
    y_train_pred = cross_val_predict(model[1], X_train, y_train, cv=5)
    mae = mean_absolute_error(y_train,y_train_pred)
    mse = mean_squared_error(y_train,y_train_pred)
    rmse = np.sqrt(mse)
    rmsle = np.log(rmse)
    r2 = r2_score(y_train,y_train_pred)
    
    print(f'{model[0]} MAE: {mae}')
    print(f'{model[0]} MSE: {mse}')
    print(f'{model[0]} RMSE: {rmse}')
    print(f'{model[0]} RMSLE: {rmsle}')
    print(f'{model[0]} R2 score: {r2}')

In [16]:
from sklearn.linear_model import LinearRegression
Eval_model(('Linear Regression', LinearRegression()))

Linear Regression MAE: 4.367303726023876
Linear Regression MSE: 47.14583105417409
Linear Regression RMSE: 6.866282185737351
Linear Regression RMSLE: 1.9266227931914475
Linear Regression R2 score: 0.7153345282277974


In [17]:
from sklearn.linear_model import Ridge
Eval_model(('Ridge Regression', Ridge()))

Ridge Regression MAE: 4.367303982002985
Ridge Regression MSE: 47.145831053615225
Ridge Regression RMSE: 6.8662821856966545
Ridge Regression RMSLE: 1.9266227931855207
Ridge Regression R2 score: 0.7153345282311718


In [18]:
from sklearn.linear_model import Lasso
Eval_model(('Lasso Regression', Lasso()))

Lasso Regression MAE: 4.57584834103361
Lasso Regression MSE: 48.496021712479084
Lasso Regression RMSE: 6.963908508336327
Lasso Regression RMSLE: 1.9407408840161267
Lasso Regression R2 score: 0.7071821072791209


In [19]:
from sklearn.linear_model import ElasticNet
Eval_model(('Lasso Regression', ElasticNet()))

Lasso Regression MAE: 5.778085889203854
Lasso Regression MSE: 62.86671726587903
Lasso Regression RMSE: 7.928853464775284
Lasso Regression RMSLE: 2.070508443198222
Lasso Regression R2 score: 0.620412169451477


###  As the SMV model is taking time so reducing the batch size

In [20]:
df_mini_sample=df.sample(n=35000,random_state=202)