In [5]:
# import necessary libraries
import numpy as np
import pandas as pd 
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [6]:
power = pd.read_csv("https://archive.ics.uci.edu/ml/machine-learning-databases/00235/household_power_consumption.zip", sep = ';', header=0, low_memory=False,na_values = '?', parse_dates={"Datetime":['Date','Time']},index_col=["Datetime"])
power_data = power.copy()
power_data.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [8]:
power_data.replace({'?': np.nan},inplace=True)    #changing missing values with '?' to the NaN
power_data.isnull().sum()    #check NaN missing values

TypeError: ignored

In [None]:
# change columns with 'object' dtype to float
for column in power_data.select_dtypes(include=['object']).columns:
  if column != 'Time':
    power_data[[column]] = power_data[[column]].astype('float')

power_data.info()

In [None]:
#Filling missing data with respective column mean
power_data = power_data.fillna(power_data.mean())
power_data.isnull().sum()

In [None]:
#downsample data to daily data points
power_daily = power_data.resample('D').sum()
power_daily.info()

In [None]:
#plot of the daily global_active_power over time
plt.figure(figsize=(10,6))
plt.plot(power_daily.index, power_daily.Global_active_power, '--')
plt.grid()
plt.xlabel('Day')
plt.ylabel('G_A_P')

In [None]:
#Pearsons correlation between global active and reactive power
print(round(power_daily['Global_active_power'].corr(power_daily['Global_reactive_power']), 2))

In [None]:
#Pearsons correlation between Voltage and Global_intensity
print(round(power_daily['Global_intensity'].corr(power_daily['Voltage']), 2))

In [None]:
# reset index
power_daily = power_daily.reset_index()
power_daily.head()

In [None]:
# select columns for predictive model
model_data = power_daily[['Datetime', 'Global_active_power']]

#rename columns to 'ds' and 'y' respectively
model_data = model_data.rename(columns={'Datetime':'ds', 'Global_active_power':'y'})

model_data.head()

In [None]:
# assign train and test data
train = model_data[:-365]
test = model_data[-365:]
print('train shape', train.shape)
print('test shape', test.shape)

In [None]:
# fit train data to Fbprophet model
from fbprophet import Prophet
model = Prophet()
model.fit(train)

In [None]:
# predict for the next 365 days
#future = model.make_future_dataframe(periods=365, freq='D') 
forecast = model.predict(test) 
forecast.head()

In [None]:
#yhat is the prediction while yhat_lower and yhat_upper are the upper and lower boundaries
forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper', 'trend', 'trend_lower', 'trend_upper']]

In [None]:
mape = np.mean(np.abs(np.array(test['y']) - np.array(forecast.yhat[-365:]))/ np.array(test['y'])) *100
 
from sklearn.metrics import mean_squared_error
rmse = np.sqrt(mean_squared_error(np.array(test['y']),np.array(forecast['yhat'])))
 
print('mean_absolute_percentage_error', round(mape,2))
print('root mean_squared_error', round(rmse,2))

In [None]:
from fbprophet.plot import plot_yearly
plot_yearly(model)

In [None]:
 multi_model = power_daily.rename(columns = {'Datetime':'ds','Global_active_power':'y',
                                           'Global_reactive_power':'add1','Voltage':'add2',
                                           'Global_intensity':'add3','Sub_metering_1':'add4',
                                          'Sub_metering_2':'add5','Sub_metering_3':'add6'})
multi_model.head()

In [None]:
# assign train and test data
train2 = multi_model[:-365]
test2 = multi_model[-365:]
print('train shape', train2.shape)
print('test shape', test2.shape)

In [None]:
 #creat multivariate model
model2 = Prophet()
model2.add_regressor('add1')
model2.add_regressor('add2')
model2.add_regressor('add3')
model2.add_regressor('add4')
model2.add_regressor('add5')
model2.add_regressor('add6')

In [None]:
 
model2.fit(train2)
 
forecast2 = model2.predict(test2)

In [None]:
forecast2

In [None]:
 
#calculating mean absolute percentage error
mape2 = np.mean(np.abs(np.array(test2['y']) -np.array(forecast2['yhat']))/ np.array(test2['y'])) *100
round(mape2,2)

In [None]:
 
#calculate rmse
rmse2 = np.sqrt(mean_squared_error(np.array(test2['y']),np.array(forecast2['yhat'])))
round(rmse2,2)

In [None]:
 
# visualize each components (trends, monthly)
from fbprophet.plot import plot_weekly
plot_weekly(model2)