# Esperanza's Prophet Notebook - Using Recleaned Data

Please note - this uses the energy values instead of the cost values, since the cost values are a fairly static transformation.

## Imports

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns

import prophet as Prophet

In [None]:
# Prophet import gives: "Importing plotly failed. Interactive plots will not work."

## Comparing Data to New Cleaned Data 

In [None]:
data =  pd.read_csv('../data/temp/combined_floors_cleaned.csv')
cleaned_training = data.loc[data['train'], :].reset_index(drop = True)
cleaned_test = data.loc[~data['train'], :].reset_index(drop = True)

In [None]:
cleaned_training.head(5)

In [None]:
cleaned_training.tail(5)

In [None]:
training_reduced_prophet = cleaned_training.loc[:, ['time', 'energy']]
test_reduced_prophet = cleaned_test.loc[:, ['time', 'energy']]

In [None]:
training_reduced_prophet = training_reduced_prophet.rename({'time': 'ds', 'energy': 'y'}, axis = 1)
test_reduced_prophet = test_reduced_prophet.rename({'time': 'ds', 'energy': 'y'}, axis = 1)


## Work with Prophet

#### For missing analysis work, please look at my development notebooks.

In [None]:
# stripping time zones since they're incompatible with Prophet
training_reduced_prophet['ds'] = training_reduced_prophet['ds'].transform(lambda x: x[0:-6])
test_reduced_prophet['ds'] = test_reduced_prophet['ds'].transform(lambda x: x[0:-6])

### Determining cap for Prophet model

In [None]:
training_reduced_prophet['y'].sort_values(ascending = False)

In [None]:
training_reduced_prophet['y'].mean()

In [None]:
training_reduced_prophet['y'].median()

In [None]:
training_reduced_prophet['cap'] = 50
# have not set a changing cap with HVAC because I'm not sure it did change over this time frame

### Training Model

In [None]:
prophet_model = Prophet.Prophet()

In [None]:
prophet_model.fit(training_reduced_prophet)
# got disabling yearly seasonality text - I think this makes sense to disable because our data < year

### Making Model Forecast

#### We want to predict every 1 hour from August 1, 2018 through the end of the data on January 7, 2019
Doesn't matter so much if we predict too long so long as the periods are right

In [None]:
prophet_forecast = prophet_model.make_future_dataframe(periods = 365 * 24, freq = '1H')
prophet_forecast = prophet_model.predict(prophet_forecast)

In [None]:
prophet_forecast.tail(5)

In [None]:
# This got us a little past the time I wanted to be at but it works

## Plot Analysis

In [None]:
fig2 = prophet_model.plot_components(prophet_forecast)

In [None]:
# These figures look almost the same as the ones without imputation

## Finding MSE

In [None]:
prophet_forecast_reduced = prophet_forecast.loc[:, ['ds', 'yhat']]

In [None]:
prophet_forecast['ds'].head(5)

In [None]:
test_reduced_prophet['ds'] = pd.to_datetime(test_reduced_prophet['ds'])

In [None]:
merged = prophet_forecast_reduced.merge(test_reduced_prophet, left_on = 'ds', right_on = 'ds')

In [None]:
merged.shape

In [None]:
test_reduced_prophet.shape

In [None]:
sum((merged['y'] - merged['yhat']) ** 2) / test_reduced_prophet.shape[0]

This gives an MSE of 47:
- Floor 2 original 5-min floored was: 50.64062665397791
- Floor 3 original 5-min floored was: 47.75016453897674
- Floor 4 original 5-min floored was 274.2433927796554

This is an improvement across the board and a more consistent predictor but still not low.
We want to try and see if we can improve it via parameters to use as a baseline.

# Attempt(s) 2: no cap and different parameters for changepoint_prior_scale

In [None]:
training_reduced_prophet = training_reduced_prophet.drop(['cap'], axis = 1)

In [None]:
flexibility_values = [1, 0.5, 0.25, 0.1, 0.05, 0.01, 0.005, 0.001]

In [None]:
mses = []

In [None]:
for val in flexibility_values:
    prophet_model = Prophet.Prophet(changepoint_prior_scale=val)
    prophet_model.fit(training_reduced_prophet)
    
    prophet_forecast = prophet_model.make_future_dataframe(periods = 365 * 24, freq = '1H')
    prophet_forecast = prophet_model.predict(prophet_forecast)

    prophet_forecast_reduced = prophet_forecast.loc[:, ['ds', 'yhat']]
    merged = prophet_forecast_reduced.merge(test_reduced_prophet, left_on = 'ds', right_on = 'ds')
    
    mses.append(sum((merged['y'] - merged['yhat']) ** 2) / test_reduced_prophet.shape[0])
    

In [None]:
print(mses)

In [None]:
# All MSEs seem to cap at 46

# Attempt(s) 3: cap values with 1 for flexibility

In [None]:
cap_values = [10,20,30,40,50,60,70,80,90,100]

In [None]:
mses = []

In [None]:
for val in cap_values:
    training_reduced_prophet.loc[:, 'cap'] = val
    
    prophet_model = Prophet.Prophet(changepoint_prior_scale=1)
    prophet_model.fit(training_reduced_prophet)
    
    prophet_forecast = prophet_model.make_future_dataframe(periods = 365 * 24, freq = '1H')
    prophet_forecast = prophet_model.predict(prophet_forecast)

    prophet_forecast_reduced = prophet_forecast.loc[:, ['ds', 'yhat']]
    merged = prophet_forecast_reduced.merge(test_reduced_prophet, left_on = 'ds', right_on = 'ds')
    
    mses.append(sum((merged['y'] - merged['yhat']) ** 2) / test_reduced_prophet.shape[0])
    

In [None]:
mses

In [None]:
# Cap value: seemingly no impact

In [None]:
fig = prophet_model.plot(prophet_forecast)

In [None]:
# This is how the forecast fits - it's having a difficult time predicting the range, which it thinks expands dramatically

# or are those just the upper and lower possible trends?

# Attempt 4 - Accounting for Days on Break

In [None]:
def is_school_break(ds):
    date = pd.to_datetime(ds)
    #https://blink.ucsd.edu/instructors/resources/academic/calendars/2016.html
    #https://blink.ucsd.edu/instructors/resources/academic/calendars/2017.html
    #https://blink.ucsd.edu/instructors/resources/academic/calendars/2018.html
    date_ranges = [
        ['03/26/2017', '04/06/2017'], 
        ['06/17/2017', '07/02/2017'],
        ['09/10/2017', '09/24/2017'],
        ['12/17/2017', '01/02/2018'],
        ['03/25/2018', '04/01/2018'],
        ['06/16/2018', '07/01/2018'],
        ['09/09/2018', '09/23/2018'],
        ['12/16/2018', '01/06/2019']
    ]
    
    for date_range in date_ranges:
        if pd.to_datetime(date_range[0]) <= date and pd.to_datetime(date_range[1]) >= date:
            return True
    return False

In [None]:
training_reduced_prophet = training_reduced_prophet.loc[:, ['ds', 'y']]


training_reduced_prophet['on_season'] = training_reduced_prophet['ds'].apply(is_school_break)
training_reduced_prophet['off_season'] = ~training_reduced_prophet['ds'].apply(is_school_break)

In [None]:
test_reduced_prophet['on_season'] = test_reduced_prophet['ds'].apply(is_school_break)
test_reduced_prophet['off_season'] = ~test_reduced_prophet['ds'].apply(is_school_break)

In [None]:
# this is taking a while to run

In [None]:
training_reduced_prophet.head(5)

In [None]:
test_reduced_prophet.head(5)

In [None]:
prophet_model = Prophet.Prophet(changepoint_prior_scale=1)
prophet_model.add_seasonality(name='weekly_on_season', period=7, fourier_order=3, condition_name='on_season')
prophet_model.add_seasonality(name='weekly_off_season', period=7, fourier_order=3, condition_name='off_season')


prophet_model.fit(training_reduced_prophet)
    
prophet_forecast = prophet_model.make_future_dataframe(periods = 365 * 24, freq = '1H')
prophet_forecast = prophet_model.predict(prophet_forecast)

prophet_forecast_reduced = prophet_forecast.loc[:, ['ds', 'yhat']]
merged = prophet_forecast_reduced.merge(test_reduced_prophet, left_on = 'ds', right_on = 'ds')
    
sum((merged['y'] - merged['yhat']) ** 2) / test_reduced_prophet.shape[0]

In [None]:
# on_season condition is determined - not sure what the issue is here