# Appliances Forecating

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

from prophet import Prophet
from sklearn.metrics import mean_absolute_error, root_mean_squared_error

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
#read CSV file
df=pd.read_csv('https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/IBM-GPXX032NEN/images/data/energydata_complete.csv')
df.head()

Unnamed: 0,date,Appliances,lights,T1,RH_1,T2,RH_2,T3,RH_3,T4,...,T9,RH_9,T_out,Press_mm_hg,RH_out,Windspeed,Visibility,Tdewpoint,rv1,rv2
0,11-01-2016 17:00,60,30,19.89,47.596667,19.2,44.79,19.79,44.73,19.0,...,17.033333,45.53,6.6,733.5,92.0,7.0,63.0,5.3,13.275433,13.275433
1,11-01-2016 17:10,60,30,19.89,46.693333,19.2,44.7225,19.79,44.79,19.0,...,17.066667,45.56,6.48,733.6,92.0,6.666667,59.166667,5.2,18.606195,18.606195
2,11-01-2016 17:20,50,30,19.89,46.3,19.2,44.626667,19.79,44.933333,18.926667,...,17.0,45.5,6.37,733.7,92.0,6.333333,55.333333,5.1,28.642668,28.642668
3,11-01-2016 17:30,50,40,19.89,46.066667,19.2,44.59,19.79,45.0,18.89,...,17.0,45.4,6.25,733.8,92.0,6.0,51.5,5.0,45.41039,45.41039
4,11-01-2016 17:40,60,40,19.89,46.333333,19.2,44.53,19.79,45.0,18.89,...,17.0,45.4,6.13,733.9,92.0,5.666667,47.666667,4.9,10.084097,10.084097


we are analyzing the Appliances which are used in January 2016 to May 2016, and we will be predicting appliances for next five months which are June to Sept 2016.


workflow
1. preprocessing and EDA
datatype conversion
data cleaning: missing values, irregular dates() # note: prophet is able to handle missing values

EDA
visualize the data using a line plot to see the trends, seasonalities 
decompose the data into trends, seasonalities and holidays to understand it more intrecately
check for stationarity using ADF
#The goal is to identify the trend, seasonalities and holiday effects

modeling
create a base model, fit and predict
asses the model perfomance (using cross validation) and set it as the reffrence value for future tuning
create a future dataframe and asses the model components
visualize the predictions and actual values

#tune model
tune the models hyperparameters for better perfomance
asses perfomance & visualize the actual vs predicted values

interpretation and communication
#interpret the components
trend - increasing or decreasing
seasonalities - what are the dominant cycles; yearly, weekly
asses holiday effect 

#communicate uncertainity
highlight prediction intervals to communicate forecast uncertainities

#communicate business insights
translate the forecast into actionable reccomendations




In [4]:
# lowercase columns for easier indexing
df.columns = [col.lower() for col in df.columns]

# shape and data features info
print('df shape\n',df.shape)
print(df.info())

df shape
 (19735, 29)
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19735 entries, 0 to 19734
Data columns (total 29 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   date         19735 non-null  object 
 1   appliances   19735 non-null  int64  
 2   lights       19735 non-null  int64  
 3   t1           19735 non-null  float64
 4   rh_1         19735 non-null  float64
 5   t2           19735 non-null  float64
 6   rh_2         19735 non-null  float64
 7   t3           19735 non-null  float64
 8   rh_3         19735 non-null  float64
 9   t4           19735 non-null  float64
 10  rh_4         19735 non-null  float64
 11  t5           19735 non-null  float64
 12  rh_5         19735 non-null  float64
 13  t6           19735 non-null  float64
 14  rh_6         19735 non-null  float64
 15  t7           19735 non-null  float64
 16  rh_7         19735 non-null  float64
 17  t8           19735 non-null  float64
 18  rh_8         19735 non-n

In [None]:
# convert date column from object to datetime
df['date'] = pd.to_datetime(df['date'], dayfirst= True)

# print the number of unque dates
df['date'].nunique()

19735

In [26]:
# select the 2 columns to be used to forecast
data = df[['date', 'appliances']]

# visualize the data using line plot
fig = px.line(data, x='date', y='appliances', title= 'date vs appliances')
fig.show()

## Model Building 

create a base model, fit and predict
asses the model perfomance (using cross validation) and set it as the reffrence value for future tuning
create a future dataframe and asses the model components
visualize the predictions and actual values

### Base model

In [None]:
# rename colums 
data.columns = ['ds', 'y']

#initialize and fit model class
model = Prophet()
model.fit(data)

# create future dates
future_dates = model.make_future_dataframe(periods=,
                                           freq=,
                                           include_history=True
                                           )


# predict 
preds = model.predict(future_dates)
preds.head()

In [None]:
# asses the model perfomance 
y_vals = preds['yhat']

y = data['y']

mae = mean_absolute_error(y_vals, y)

In [None]:
# print the model components
print(model.component_modes)

#plot the model  decomposed components
fig1 = model.plot_components(preds)
fig1.show()

In [None]:
# plot the predicted vs actual values 
fig2 = px.line()

In [None]:
# plot the predicted values plus the 