In [None]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# when running on Kaggle Notebook
df_data = pd.read_csv('../input/electric-power-consumption-data-set/household_power_consumption.txt', sep=';')
# when running on Jupyter Notebook
df_data = pd.read_csv('household_power_consumption.csv')

In [None]:
df_data.head()

In [None]:
df_data.shape

In [None]:
df_data.tail()

In [None]:
df_data.dtypes

In [None]:
df_data['Datetime'] = df_data['Date']+" "+ df_data['Time']

In [None]:
df_data['Datetime'].head()

In [None]:
df_data['Datetime'] = pd.to_datetime(df_data['Datetime'])

In [None]:
df_data.Datetime.head()

In [None]:
df_data= df_data.drop(columns = ['Date', 'Time'])

In [None]:
df_data.head()

In [None]:
df_data.dtypes

In [None]:
df_data = df_data.set_index(df_data['Datetime'])
df_data.head()

In [None]:
# dropping The columns:
df_data.drop(columns =['Datetime'], inplace=True)

In [None]:
df_data[df_data['Global_active_power']=='?']

In [None]:
df_data=df_data.replace('?', np.nan)

In [None]:
df_data.isnull().sum()

In [None]:
# except the columns called sub_metering3 column all other column datatypes is different so we have to change the datatype to Float

for column in list(df_data.columns)[:-1]:
  df_data[column] = pd.to_numeric(df_data[column], downcast ='float')

df_data.dtypes

In [None]:
df_data.isnull().sum()

In [None]:
#Lets resample the data on the daily basis.
df_data_daily = df_data.resample('D').sum()

In [None]:
df_data_daily.head()

In [None]:
# Lets Find is there any missing Values?
df_data_daily.isnull().sum()

In [None]:
df_data_daily[df_data_daily['Global_active_power']==0.00].shape

In [None]:
# means there are 24 values which were missing and they were imputed as 0. so we have to impute some values as mean of this attribute.
df_data_daily['Global_active_power']= df_data_daily['Global_active_power'].replace(0.00, np.mean(df_data_daily['Global_active_power']))


In [None]:
df_data_daily.tail()

In [None]:
df_data_daily['Global_active_power'].plot()

In [None]:
df_data_daily[['Global_active_power', 'Global_reactive_power']].corr()

In [None]:
df_data_daily[['Voltage','Global_intensity']].corr()

In [None]:
df_data_daily.corr(), df_data_daily.shape

In [None]:
# Using Prophet Library
from fbprophet import Prophet

In [None]:
train_data=df_data_daily.head(len(df_data_daily)-365)

In [None]:
train_data.tail()

In [None]:
test_data=df_data_daily['2009-12-12':]

In [None]:
test_data.shape

In [None]:
univariate_train = train_data['Global_active_power']
univariate_train.head()

In [None]:
univariate_train = univariate_train.reset_index()

In [None]:
univariate_train.dtypes

In [None]:
univariate_train.columns =['ds', 'y']

In [None]:
univariate_train.head()

In [None]:
univariate_test= test_data[['Global_active_power']]

In [None]:
univariate_test = univariate_test.reset_index()

In [None]:
univariate_test.columns = ['ds', 'y']

In [None]:
univariate_test.head()

In [None]:
#Lets make the object of Prophet Model to make it work

model = Prophet()
model.fit(univariate_train)
forecast = model.predict(univariate_test)

In [None]:
forecast.head()

In [None]:
model.plot(forecast)
plt.show()

In [None]:
np.mean(np.abs((univariate_test['y']-forecast['yhat'])/univariate_test['y']))*100

In [None]:
univariate_test[univariate_test['y']==0.00]