In [1]:
import json
import numpy as np
import matplotlib.pyplot as plt
%matplotlib notebook

from sklearn import linear_model

In [2]:
with open("GEFCom2017//GEFCom2017-Qual//GEFCom2017Qual2005-2015.json", "r") as f:
    alldata = json.loads(f.read())

# Train

In [14]:
alldata.keys()

dict_keys(['ISONE CA', 'ME', 'RI', 'VT', 'CT', 'NH', 'SEMASS', 'WCMASS', 'NEMASSBOST'])

In [50]:
zone = alldata["ISONE CA"]

In [51]:
zone.keys()

dict_keys(['Date', 'Hour', 'DA_DEMD', 'DEMAND', 'DA_LMP', 'DA_EC', 'DA_CC', 'DA_MLC', 'RT_LMP', 'RT_EC', 'RT_CC', 'RT_MLC', 'DryBulb', 'DewPnt', 'SYSLoad', 'RegCP'])

In [52]:
slc = 361 * 24
cap = 365 * 24
demand = np.array(zone["DEMAND"][-(slc + cap):-cap], dtype=np.float64)
hour = np.array(zone["Hour"][-(slc + cap):-cap], dtype=np.uint8)
# 1/5/2015 was Sunday, so starts with 0
days_of_week = [0, 1, 2, 3, 4, 5, 6]
week = np.array(list(days_of_week[(i // 24) % 7] for i in range(slc)), dtype=np.uint8)
month = np.array(list(map(lambda x: int(x[5:7]), zone["Date"][-(slc + cap):-cap])), dtype=np.uint8)
trend = np.arange(slc)
weekhour = week * hour

In [53]:
raw_temp = np.array(zone["DryBulb"])
temps= []
for year in range(10):
    for shift in range(9):
        offset = int(year * 365.24 + shift + 0.5) * 24
        temps.append(raw_temp[offset: offset + slc])

In [54]:
regs = []
for temp in temps:
    temp2 = temp ** 2
    temp3 = temp ** 3
    tempmonth = temp * month
    temp2month = temp2 * month
    temp3month = temp3 * month
    temphour = temp * hour
    temp2hour = temp2 * hour
    temp3hour = temp3 * hour

    regs.append(linear_model.LinearRegression())

    variables = [trend, month, week, hour, weekhour, temp, temp2, temp3, tempmonth, temp2month, temp3month,
                 temphour, temp2hour, temp3hour]
    variables = list(map(list, zip(*variables)))  # transpose
    regs[-1].fit(variables, demand)

In [55]:
plt.figure()
plt.plot(alldata["ISONE CA"]["DryBulb"][-2*365*24:], linewidth=3)
plt.plot(alldata["WCMASS"]["DryBulb"][-2*365*24:], linewidth=0.3)
plt.plot(alldata["NEMASSBOST"]["DryBulb"][-2*365*24:], linewidth=0.3)
plt.plot(alldata["CT"]["DryBulb"][-2*365*24:], linewidth=0.3)
plt.legend(["ISONE", "WCMASS (-18%)", "NEMASSBOST (-10%)", "CT (-8%)"])
# plt.plot(alldata["WCMASS"]["DryBulb"][-365*24:])
# plt.plot(alldata["NEMASSBOST"]["DryBulb"][-365*24:])
plt.show()

<IPython.core.display.Javascript object>

In [56]:
fit = regs[85].predict(variables)
plt.figure()
plt.plot(fit)
plt.plot(demand)
plt.show()

<IPython.core.display.Javascript object>

# Test

In [57]:
slc = 31 * 24
cap = (365 - 52) * 24 - slc
test_demand = np.array(zone["DEMAND"][-(slc + cap):-cap], dtype=np.float64)
hour = np.array(zone["Hour"][-(slc + cap):-cap], dtype=np.uint8)
# 1/5/2015 was Sunday, so starts with 0
days_of_week = [0, 1, 2, 3, 4, 5, 6]
week = np.array(list(days_of_week[(i // 24) % 7] for i in range(slc)), dtype=np.uint8)
month = np.array(list(map(lambda x: int(x[5:7]), zone["Date"][-(slc + cap):-cap])), dtype=np.uint8)
trend = np.arange(slc)

In [58]:
temp = np.array(zone["DryBulb"][-(slc + cap):-cap])

In [59]:
weekhour = week * hour
temp2 = temp ** 2
temp3 = temp ** 3
tempmonth = temp * month
temp2month = temp2 * month
temp3month = temp3 * month
temphour = temp * hour
temp2hour = temp2 * hour
temp3hour = temp3 * hour
test_variables = [trend, month, week, hour, weekhour, temp, temp2, temp3, tempmonth, temp2month, temp3month,
             temphour, temp2hour, temp3hour]
test_variables = list(map(list, zip(*test_variables)))  # transpose

In [60]:
preds = []
for reg in regs:
    preds.append(reg.predict(test_variables))

In [61]:
preds = np.array(preds)

In [62]:
pred = np.mean(preds, axis=0)
std = np.std(preds, axis=0)
plt.figure()
plt.plot(test_demand)
plt.plot(pred)
plt.plot(std + pred, linestyle="--", color="black", linewidth=0.5)
plt.plot(pred - std, linestyle="--", color="black", linewidth=0.5)
plt.show()

<IPython.core.display.Javascript object>

In [63]:
def pinball_loss(data, quant_preds, quantiles):
    loss = 0
    for t in range(len(data)):
        for i in range(len(quantiles)):
            q = quantiles[i]
            quant_pred = quant_preds[t][i]
            if quant_pred > data[t]:
                loss += (quant_pred - data[t]) * (1 - q)
            else:
                loss += (data[t] - quant_pred) * q
    return loss / (len(data) * len(quantiles))

In [64]:
quant_preds = []
quantiles = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for t in range(preds.shape[1]):
    sorted_preds = np.sort(preds[:, t])
    quant_pred = []
    for q in quantiles:
        idx = int(q * preds.shape[0] + 0.5)
        quant_pred.append(sorted_preds[idx])
    quant_preds.append(quant_pred)

In [65]:
print(quant_preds[100])

[14255.950348266728, 14461.526382665965, 14620.318581775315, 14740.760773824442, 14807.033519190321, 14853.191856414107, 14909.916836852948, 14976.784287987517, 15082.534233666594]


In [66]:
pinball_loss(test_demand, quant_preds, quantiles)

596.0334828805883