####**Package Installation**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sb
import matplotlib.pyplot as plt
import xgboost as xgb
from sklearn.preprocessing import OrdinalEncoder
plt.style.use('fivethirtyeight')
color_pal = sb.color_palette()


####**Loading of Train and Test Data**

In [None]:
complete_data_train = pd.read_csv('train.csv')
complete_data_train

complete_data_test = pd.read_csv('test.csv')
complete_data_test

####**Changing Categorical Data to Numerical Data**

In [None]:
ordinal_encod = OrdinalEncoder()

complete_data_train["var2"] = ordinal_encod.fit_transform(complete_data_train.var2.values.reshape(-1,1))
complete_data_test["var2"] = ordinal_encod.transform(complete_data_test.var2.values.reshape(-1,1))
complete_data_test["var2"].unique()

####**Install xgboost**

In [None]:
pip install xgboost

####**Electric Consumption Graph**

In [None]:
plot1 = complete_data_train['electricity_consumption'].plot(kind = 'hist' , bins = 500 ,color= color_pal[3])
plot1

In [None]:
complete_data_train1 = complete_data_train.query('electricity_consumption < 180')

complete_data_train = complete_data_train.query('electricity_consumption >= 180').copy()

complete_data_train.query('electricity_consumption < 180')

In [None]:
complete_data_train2 = complete_data_train.query('electricity_consumption > 750')

complete_data_train = complete_data_train.query('electricity_consumption <= 750').copy()

complete_data_train.query('electricity_consumption > 750')

complete_data_train.head()

In [None]:
plot2=complete_data_train[["datetime","electricity_consumption"]]
plot2.plot(style='|',figsize=(16,4),color= color_pal[3],x="datetime",y="electricity_consumption",title='Electricity_consumption vs Date and Time ')
plt.xlabel('Date and Time')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
plot3=complete_data_train[["windspeed","electricity_consumption"]]
plot3.plot(style='|',figsize=(12,3),color= color_pal[3],x="windspeed",y="electricity_consumption",title='Electricity_consumption vs Windspeed ')
plt.xlabel('Windspeed')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
plot4=complete_data_train[["pressure","electricity_consumption"]]
plot4.plot(style='|',figsize=(12,3),color= color_pal[3],x="pressure",y="electricity_consumption",title='Electricity Consumption vs Pressure')
plt.xlabel('Pressure')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
plot5=complete_data_train[["temperature","electricity_consumption"]]
plot5.plot(style='|',figsize=(12,3),color= color_pal[3],x="temperature",y="electricity_consumption",title='Electricity Consumption VS Temperature ')
plt.xlabel('Temperature')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
plot6=complete_data_train[["var1","electricity_consumption"]]
plot6.plot(style='|',figsize=(12,3),color= color_pal[3],x="var1",y="electricity_consumption",title='electricity_consumption VS Var1')
plt.xlabel('Var1')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
plot8=complete_data_train[["var2","electricity_consumption"]]
plot8.plot(style='|',figsize=(12,3),color= color_pal[3],x="var2",y="electricity_consumption",title='electricity_consumption VS Var2')
plt.xlabel('Var2')
plt.ylabel('Electricity Consumption Rate')
plt.show()

In [None]:
complete_data_train.tail()
complete_data_train= complete_data_train.set_index('datetime')
complete_data_train.index
complete_data_train.tail()
complete_data_train.index

In [None]:
complete_data_train.index = pd.to_datetime(complete_data_train.index)

In [None]:
complete_data_train['Year'] = complete_data_train.index.year
complete_data_train['Quarter'] = complete_data_train.index.quarter
complete_data_train['Month'] = complete_data_train.index.month
complete_data_train['Week'] = complete_data_train.index.week
complete_data_train['Days'] = complete_data_train.index.day
complete_data_train['Hours'] = complete_data_train.index.hour

complete_data_train.head()

train = complete_data_train.loc[complete_data_train.index < '2016-01-01']

test = complete_data_train.loc[complete_data_train.index >= '2016-01-01']

complete_data_train.head()

####**Boxplot for Electric Consumption per Week**

In [None]:
import seaborn as sns
fig , ax = plt.subplots(figsize=(17, 5))
sns.boxplot(data=train, x='Week', y='electricity_consumption')
ax.set_title("Electricity Consumption per Week")
plt.show()

####**Boxplot for Electric Consumption per Hour**

In [None]:
fig , ax = plt.subplots(figsize=(9, 7))
sb.boxplot(data=train, x='Hours', y='electricity_consumption')
ax.set_title("Electricity Consumption per Hour")
plt.show()

####**Boxplot for Electric Consumption per Month**

In [None]:
fig , ax = plt.subplots(figsize=(10, 5))
sb.boxplot(data=train, x='Month', y='electricity_consumption')
ax.set_title("Electricity Consumption per Month")
plt.show()

####**Split of Features**

In [None]:
x_train_features = ["temperature","pressure","windspeed","Year","Quarter","Month","Week","Days","Hours", "var1","var2"]
X_Train = train[x_train_features]

y_train_features = ["electricity_consumption"]
Y_Train  = train[y_train_features]

x_test_features = ["temperature","pressure","windspeed","Year","Quarter","Month","Week","Days","Hours", "var1","var2"]
X_Test = test[x_test_features]

y_test_features = ["electricity_consumption"]
Y_Test  = test[y_test_features]

###**XGBOOST MODEL**

In [None]:
from sklearn.metrics import mean_squared_error
reg_xgb = xgb.XGBRegressor(n_estimators=10000,learning_rate=0.01)
reg_xgb.fit(X_Train, Y_Train, eval_set =[(X_Train, Y_Train),(X_Test, Y_Test)], verbose=1000)

####**RMSE**

In [None]:
test['Prediction']=reg_xgb.predict(X_Test)

test['electricity_consumption']-test['Prediction']

rmse_score = np.sqrt(mean_squared_error(test['electricity_consumption'], test['Prediction']))

print(rmse_score)

####**R2**

In [None]:
from sklearn.metrics import r2_score
r2 = r2_score(test['electricity_consumption'],test['Prediction'])
r2

####**Test Data Prediction**

In [None]:
complete_data_test.tail()
complete_data_test= complete_data_test.set_index('datetime')
complete_data_test.index
complete_data_test.tail()
complete_data_test.index

In [None]:
complete_data_test.index = pd.to_datetime(complete_data_test.index)

In [None]:
complete_data_test['Year'] = complete_data_test.index.year
complete_data_test['Quarter'] = complete_data_test.index.quarter
complete_data_test['Month'] = complete_data_test.index.month
complete_data_test['Week'] = complete_data_test.index.week
complete_data_test['Days'] = complete_data_test.index.day
complete_data_test['Hours'] = complete_data_test.index.hour

complete_data_test.head()

complete_data_test.index = pd.to_datetime(complete_data_test.index)

complete_data_test.index

complete_data_test.head()

In [None]:
complete_data_test= complete_data_test[["Year","Quarter","Month","Week","Days","Hours","temperature","pressure","windspeed","var1","var2"]]
complete_data_test


####**Generate CSV File**

In [None]:
Csv1 = reg_xgb.predict(X_Test)

Csv1 = pd.DataFrame(data=Csv1)
Csv1.to_csv('XgBoost_Result.csv')

###**Random Forest Model**

In [None]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(n_estimators=1000, random_state=1)
rf.fit(X_Train, Y_Train)
print(rf.score(X_Train, Y_Train))

In [None]:
y_pred = rf.predict(X_Test)


In [None]:
rf.score(X_Test, Y_Test)

#### **RMSE**

In [None]:
from math import sqrt
rmse_score1 = sqrt(mean_squared_error(Y_Test, y_pred))
print(rmse_score1)

###**Linear Regression Model**

In [None]:
from sklearn.linear_model import LinearRegression
lr =LinearRegression()
lr.fit(X_Train, Y_Train)
print(lr.score(X_Train, Y_Train))

####**Generate CSV File**

In [None]:
Csv2 = rf.predict(X_Test)

Csv2 = pd.DataFrame(data=Csv2)
Csv2.to_csv('RandomForest_Result.csv')