# Demo: Data Preparation for Time Series Forecasting with Prophet

## Notebook Set Up

In [2]:
# Install the required libraries
!pip install pystan
!pip install prophet
!pip install hvplot
!pip install holoviews

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting cmdstanpy>=1.0.1
  Downloading cmdstanpy-1.0.7-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 4.7 MB/s 
Installing collected packages: cmdstanpy
  Attempting uninstall: cmdstanpy
    Found existing installation: cmdstanpy 0.9.5
    Uninstalling cmdstanpy-0.9.5:
      Successfully uninstalled cmdstanpy-0.9.5
Successfully installed cmdstanpy-1.0.7
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [4]:
# Import the required libraries and dependencies
import pandas as pd
import hvplot.pandas
import datetime as dt
import holoviews as hv
from prophet import Prophet

%matplotlib inline

In [5]:
# Import the `files` library to allow files upload
from google.colab import files

In [6]:
# Setting `bokeh` to render hvPlot charts
hv.extension("bokeh")

Output hidden; open in https://colab.research.google.com to view.

## Plot the Data

In [7]:
# Upload "hourly_grid_prices.csv" into Colab, then store in a DataFrame
uploaded = files.upload()

Saving hourly_grid_prices.csv to hourly_grid_prices.csv


In [8]:
# Load the data into the DataFrame
hourly_prices = pd.read_csv(
    'hourly_grid_prices.csv',
    index_col='day-hour',
    parse_dates=True,
    infer_datetime_format=True
).dropna()


# Display the first and last five rows of the DataFrame
display(hourly_prices.head())
display(hourly_prices.tail())

Unnamed: 0_level_0,Price
day-hour,Unnamed: 1_level_1
2015-10-31 01:00:00,18.15
2015-10-31 02:00:00,20.73
2015-10-31 03:00:00,19.49
2015-10-31 04:00:00,17.75
2015-10-31 05:00:00,17.38


Unnamed: 0_level_0,Price
day-hour,Unnamed: 1_level_1
2020-10-14 20:00:00,19.23
2020-10-14 21:00:00,13.45
2020-10-14 22:00:00,15.24
2020-10-14 23:00:00,17.0
2020-10-15 00:00:00,18.73


In [10]:
# Holoviews extension to render hvPlots in Colab
hv.extension('bokeh')

# Plot the DataFrame
hourly_prices.hvplot()

Output hidden; open in https://colab.research.google.com to view.

## Prepare the Data

In [11]:
# Reset the index of the DataFrame
prophet_df = hourly_prices.reset_index()


# Review the first and last five rows of the DataFrame
display(prophet_df.head())
display(prophet_df.tail())

Unnamed: 0,day-hour,Price
0,2015-10-31 01:00:00,18.15
1,2015-10-31 02:00:00,20.73
2,2015-10-31 03:00:00,19.49
3,2015-10-31 04:00:00,17.75
4,2015-10-31 05:00:00,17.38


Unnamed: 0,day-hour,Price
43363,2020-10-14 20:00:00,19.23
43364,2020-10-14 21:00:00,13.45
43365,2020-10-14 22:00:00,15.24
43366,2020-10-14 23:00:00,17.0
43367,2020-10-15 00:00:00,18.73


In [12]:
# Prepare the training data to be read into a prophet model
# Rename the columns to names that Prophet recognizes
prophet_df.columns = ['ds', 'y']
prophet_df.head()

Unnamed: 0,ds,y
0,2015-10-31 01:00:00,18.15
1,2015-10-31 02:00:00,20.73
2,2015-10-31 03:00:00,19.49
3,2015-10-31 04:00:00,17.75
4,2015-10-31 05:00:00,17.38


In [13]:
# Confirm that there are no NaN values
prophet_df = prophet_df.dropna()
prophet_df.tail()

Unnamed: 0,ds,y
43363,2020-10-14 20:00:00,19.23
43364,2020-10-14 21:00:00,13.45
43365,2020-10-14 22:00:00,15.24
43366,2020-10-14 23:00:00,17.0
43367,2020-10-15 00:00:00,18.73
