In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import gpflow
from gpflow.utilities import print_summary
import time



In [2]:
SHIFTS = [5, 30, 60, 120]


In [3]:
data =  pd.read_csv('Data/pv_power_df_5day.csv', index_col='datetime')
data_val = pd.read_csv('Data/pv_power_df_5day_validation.csv', index_col='datetime')

data_val = pd.read_csv('Data/pv_power_df_5day_validation.csv', index_col='datetime')
uk_pv = pd.read_csv('Data/system_metadata_location_rounded.csv')


## Benchmarks

### Persistence Model
$$ \hat{I}_{t + \Delta_t} = I_t$$
Here the prediction is just the latest value

In [4]:
mae_persistence = dict.fromkeys(SHIFTS)
for shift in SHIFTS:
    shift = int(shift / 5)
    data_shifted = data.shift(shift)
    mae_persistence[shift * 5] =  abs(data - data_shifted).mean().mean()
mae_persistence

{5: 0.04054998338878003,
 30: 0.08339344848248201,
 60: 0.10811489112396469,
 120: 0.14434640090561332}

### Hourly Average Model
$$ \hat{I}_{t + \Delta_t} = \frac{1}{12} \sum_{i = 1}^{12} {I _ {t - 5i}}$$

Here the prediction is the average value of the past 12 datapoints (1h moving average)

In [5]:
mae_HA = dict.fromkeys(SHIFTS)
for shift in SHIFTS:
    shift = int(shift / 5)
    data_shifted = data.rolling(12).mean().shift(shift)
    mae_HA[shift * 5] =  abs(data - data_shifted).mean().mean()
mae_HA

{5: 0.06989158175457981,
 30: 0.09379304789247063,
 60: 0.11500846385924156,
 120: 0.14663319456508048}

### Yesterday Model
$$ \hat{I}_{t + \Delta_t} = I_{t + \Delta_t - 24h }$$
Here the prediction is yesterday's value at the same time

In [6]:
mae_yday = dict.fromkeys(SHIFTS)
for shift in SHIFTS:
    shift = int(shift / 5)
    data_shifted = data.shift(shift + 97) #where 97 is the number of 5mins intervals in a day (no night)
    mae_yday[shift * 5] =  abs(data - data_shifted).mean().mean()
mae_yday

{5: 0.14687136285305438,
 30: 0.15290812851233923,
 60: 0.16201347570350266,
 120: 0.17956644732664687}