In [1]:
!pip install --upgrade pip
!pip install tensorflow

Collecting pip
  Downloading pip-20.1.1-py2.py3-none-any.whl (1.5 MB)
[K     |████████████████████████████████| 1.5 MB 4.0 MB/s eta 0:00:01
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 20.0.2
    Uninstalling pip-20.0.2:
      Successfully uninstalled pip-20.0.2
Successfully installed pip-20.1.1
Collecting tensorflow
  Downloading tensorflow-2.2.0-cp36-cp36m-manylinux2010_x86_64.whl (516.2 MB)
[K     |██████████████████████████▌     | 427.9 MB 91.9 MB/s eta 0:00:012

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)



[K     |████████████████████████████████| 516.2 MB 7.4 kB/s  eta 0:00:01
[?25hCollecting tensorflow-estimator<2.3.0,>=2.2.0
  Downloading tensorflow_estimator-2.2.0-py2.py3-none-any.whl (454 kB)
[K     |████████████████████████████████| 454 kB 91.1 MB/s eta 0:00:01
Collecting termcolor>=1.1.0
  Downloading termcolor-1.1.0.tar.gz (3.9 kB)
Collecting keras-preprocessing>=1.1.0
  Downloading Keras_Preprocessing-1.1.2-py2.py3-none-any.whl (42 kB)
[K     |████████████████████████████████| 42 kB 242 kB/s  eta 0:00:01
Collecting astunparse==1.6.3
  Downloading astunparse-1.6.3-py2.py3-none-any.whl (12 kB)
Collecting grpcio>=1.8.6
  Downloading grpcio-1.29.0-cp36-cp36m-manylinux2010_x86_64.whl (3.0 MB)
[K     |████████████████████████████████| 3.0 MB 80.1 MB/s eta 0:00:01
[?25hCollecting absl-py>=0.7.0
  Downloading absl-py-0.9.0.tar.gz (104 kB)
[K     |████████████████████████████████| 104 kB 107.0 MB/s eta 0:00:01
[?25hCollecting google-pasta>=0.1.8
  Downloading google_pasta-0.2.0-p

In [2]:
import itertools
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import matplotlib as mpl

from scipy import signal
from scipy import stats

import statsmodels.api as sm
from statsmodels.tsa.arima_model import ARIMA
from statsmodels.tsa.arima_process import ArmaProcess
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.graphics.gofplots import qqplot_2samples

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
from scipy import stats

import tensorflow as tf
keras = tf.keras



---
# User-defined Functions

In [3]:
def calc_rmse(actual, pred):
    return np.sqrt(mean_squared_error(actual, pred))

def windowize_data(data, n_prev):
    n_predictions = len(data) - n_prev
    y = data[n_prev:]
    # this might be too clever
    indices = np.arange(n_prev) + np.arange(n_predictions)[:, None]
    x = data[indices, None]
    return x, y

def split_and_windowize(data, n_prev, fraction_test=0.3):
    n_predictions = len(data) - 2*n_prev
    
    n_test  = int(fraction_test * n_predictions)
    n_train = n_predictions - n_test   
    
    x_train, y_train = windowize_data(data[:n_train], n_prev)
    x_test, y_test = windowize_data(data[n_train:], n_prev)
    return x_train, x_test, y_train, y_test


---
# Data Import and Processing

In [4]:
caiso = pd.read_csv('data/caiso_master.csv')
caiso.drop('Unnamed: 0', axis=1, inplace=True)
caiso['INTERVAL_START_PT'] = pd.to_datetime(caiso['INTERVAL_START_PT']).apply(lambda x: x.replace(tzinfo=None))
caiso['INTERVAL_END_PT'] = pd.to_datetime(caiso['INTERVAL_END_PT']).apply(lambda x: x.replace(tzinfo=None))
caiso['date_hour_start'] = pd.to_datetime(caiso['date_hour_start']).apply(lambda x: x.replace(tzinfo=None))
caiso['OPR_DT_PT'] = pd.to_datetime(caiso['OPR_DT_PT']).apply(lambda x: x.replace(tzinfo=None))

caiso.set_index('INTERVAL_START_PT', inplace=True)

caiso.rename({'HH_$_million_BTU_not_seasonal_adj': 'HH_$_mill_BTU', 'total_mw':'total_gen'},axis=1, inplace=True)
caiso['HH_$_mill_BTU'] = pd.to_numeric(caiso['HH_$_mill_BTU'])

In [6]:
np_lmp_arr = caiso['$_MWH_np15'].values
sp_lmp_arr = caiso['$_MWH_sp15'].values
zp_lmp_arr = caiso['$_MWH_zp26'].values
datetime_arr = caiso.index.to_period('H')

In [10]:
one_week_idx = len(caiso) - len(caiso[caiso['OPR_DT_PT'] >= '2020-05-24'])
two_week_idx = len(caiso) - len(caiso[caiso['OPR_DT_PT'] >= '2020-05-17'])
one_month_idx = len(caiso) - len(caiso[caiso['OPR_DT_PT'] >= '2020-05-01'])
two_month_idx = len(caiso) - len(caiso[caiso['OPR_DT_PT'] >= '2020-04-01'])
one_week_idx

11351

---
# ARIMA Univariate Functions

In [12]:
def uni_var_train_test_split(lmp_curve, date_rng, date_delim_idx):
    lmp_train_curve = lmp_curve[:date_delim_idx]
    lmp_test_curve = lmp_curve[date_delim_idx:]
    date_train_rng = date_rng[:date_delim_idx]
    date_test_rng = date_rng[date_delim_idx:]
    return lmp_train_curve, lmp_test_curve, date_train_rng, date_test_rng

def arima_uni_var_fit(lmp_train, date_rng, p, d, q):
    return ARIMA(endog=lmp_train, dates=date_rng, order=(p, d, q)).fit()

def arima_uni_var_predict(model, n_periods_fcst):
    return model.forecast(steps=n_period_fcst)[0]

def plot_pred_vs_actuals(y_true, y_pred, date_rng):
    fig, ax = plt.subplots(figsize=(20,6))
    ax.plot(date_rng, y_pred, 'b.-', label='Predictions', lw=2)
    ax.plot(date_rng, y_true, 'r.', label='Actual')
    ax.set_title('Prediction vs Actual', fontsize=18, fontweight='bold')
    ax.set_xlabel('$/MWh', fontsize=12)
    ax.legend();

---
# NP-15

#### ARIMA Univariate Forecast

In [None]:
np_uni_train, np_uni_test, np_train_rng, np_test_rng = uni_var_train_test_split(np_lmp_arr, datetime_arr, 11351)
np_uni_arima = arima_uni_var_fit(np_uni_train, np_train_rng, 24 * 7, 1, 0)
np_uni_arima_pred = arima_uni_var_predict(np_uni_arima, len(caiso)-11351)

  Z_mat, R_mat, T_mat)
  large = s > cutoff


In [None]:
rmse_np_uni_arima = round(calc_rmse(np_uni_arima_pred, np_uni_test), 6)
print(f"RMSE: {rmse_np_uni_arima}")

plot_pred_vs_actual(np_uni_test, np_uni_arima_pred, np_test_rng)

In [None]:
#### LSTM Univariate

In [None]:
np_lstm_uni = keras.Sequential()
np_lstm_uni.add(keras.layers.LSTM(32, input_shape=(batch_size, n_features), return_sequences=True))
np_lstm_uni.add(keras.layers.LSTM(32, return_sequences=True))
lstm_endo.add(keras.layers.LSTM(32, return_sequences=False))
# The Dense value is the output sequence.
lstm_endo.add(keras.layers.Dense(1, activation='linear'))
lstm_endo.compile(optimizer='adam',loss='mse')