In [None]:
# Importing needed packages
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
# Load dataset
df = pd.read_csv("../input/household-power-consumption/household_power_consumption.txt", sep = ';', low_memory = False, infer_datetime_format=True, parse_dates={'datetime':[0,1]}, index_col=['datetime'],na_values = '?', dtype = float)

In [None]:
# View and confirm data rows and columns
df.shape

In [None]:
df.dtypes

In [None]:
df.head(10)

In [None]:
df.tail(10)

In [None]:
# Finding null values
df.isnull().values.any()

In [None]:
df.isnull().sum()

In [None]:
df.replace('?', np.nan, inplace = True)

In [None]:
#df.fillna(method= 'ffill', inplace= True)
data_df = df.apply(lambda x: x.fillna(x.mean()),axis=0)

In [None]:
data_df.isnull().sum()

In [None]:
# Downsample to daily data points
df_daily = data_df.resample('D').sum()
print(df_daily)

In [None]:
import statsmodels.api as sm 
from pylab import rcParams 

In [None]:
rcParams['figure.figsize'] = (15, 8) 
decompose_series = sm.tsa.seasonal_decompose(df_daily['Global_active_power'], model = 'additive') 
decompose_series.plot() 
plt.show()

In [None]:
data1 = df['Global_active_power']
data2 = df['Global_reactive_power']

corr1 = data1.corr(data2)*100
round(corr1)

In [None]:
data3 = df['Voltage']
data4 = df['Global_intensity']

corr2 = data3.corr(data4) * 100
round(corr2)

In [None]:
from fbprophet import Prophet

In [None]:
df_daily1 = df_daily.reset_index()
df_daily1.tail(365)

In [None]:
train_dataset = df_daily1.rename(columns={"datetime": "ds", "Global_active_power": "y"})

In [None]:
X = train_dataset[['ds','Global_reactive_power', 'Voltage', 'Global_intensity', 'Sub_metering_1', 'Sub_metering_2', 'Sub_metering_3']]
y = train_dataset[['y']]

In [None]:
train_dataset= pd.DataFrame()
train_dataset['ds'] = pd.to_datetime(X["ds"])
train_dataset['y'] = y

In [None]:
train_dataset.head(12)

In [None]:
train_dataset.tail(12)

In [None]:
train_dataset.shape

In [None]:
train_df = train_dataset[0:1077]
test_df = train_dataset[1077:]

In [None]:
train_df.shape

In [None]:
test_df.shape

In [None]:
model = Prophet() 
model.fit(train_df) 

In [None]:
# Predict for the next year

future = model.make_future_dataframe(periods= 365)
forecast = model.predict(future)

In [None]:
future.head()

In [None]:
forecast.shape

In [None]:
forecast.head() 

In [None]:
forecast.tail() 

In [None]:
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper']] 

In [None]:
# yhat is the prediction while yhat_lower and yhat_upper are the upper and lower boundaries 
model.plot(forecast)
plt.show()

In [None]:
import numpy as np

def mape(actual, pred): 
    actual, pred = np.array(actual), np.array(pred)
    return np.mean(np.abs((actual - pred) / actual)) * 100

In [None]:
actual = test_df['y']
pred = forecast['yhat']
pred = pred[1077:]

In [None]:
mape0 = mape(actual, pred)
round(mape0, 2)

In [None]:
actual = test_df['y']
pred = forecast['yhat']
pred = pred[1077:]

In [None]:
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(actual, pred)

rmse = np.sqrt(mse)

round(rmse, 2)

In [None]:
fig1 = model.plot_components(forecast)

In [None]:
train_dataset['Global_reactive_power'] = X['Global_reactive_power']
train_dataset['Voltage'] = X['Voltage']
train_dataset['Global_intensity'] = X['Global_intensity']
train_dataset['Sub_metering_1'] = X['Sub_metering_1']
train_dataset['Sub_metering_2'] = X['Sub_metering_2']
train_dataset['Sub_metering_3'] = X['Sub_metering_3']

train_df1 = train_dataset[0:1077]
test_df1 = train_dataset[1077:]

In [None]:
train_dataset.head()

In [None]:
pro_regressor = Prophet()
pro_regressor.add_regressor('Global_reactive_power')
pro_regressor.add_regressor('Voltage')
pro_regressor.add_regressor('Global_intensity')
pro_regressor.add_regressor('Sub_metering_1')
pro_regressor.add_regressor('Sub_metering_2')
pro_regressor.add_regressor('Sub_metering_3')

In [None]:
pro_regressor.fit(train_df1)
future1 = pro_regressor.make_future_dataframe(periods= 365)

In [None]:
future1.head()

In [None]:
forecast1 = pro_regressor.predict(test_df1)

In [None]:
forecast1.shape

In [None]:
forecast1.head()

In [None]:
forecast1.tail()

In [None]:
forecast1[['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper']] 

In [None]:
# yhat is the prediction while yhat_lower and yhat_upper are the upper and lower boundaries 
model.plot(forecast1)
plt.show()

In [None]:
fig2 = pro_regressor.plot_components(forecast1)

In [None]:
import numpy as np

def mape(actual1, pred1): 
    actual1, pred1 = np.array(actual1), np.array(pred1)
    return np.mean(np.abs((actual1 - pred1) / actual1)) * 100

In [None]:
actual1 = test_df['y']
pred1 = forecast1['yhat']

In [None]:
mape1 = mape(actual1, pred1)
round(mape1, 2)

In [None]:
from sklearn.metrics import mean_squared_error
mse1 = mean_squared_error(actual1, pred1)

rmse1 = np.sqrt(mse1)

round(rmse1, 2)