In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import lightgbm as lgb
%matplotlib inline
import torch
from deepseries.models import Wave2WaveV1
from deepseries.dataset import Property, TimeSeries, Seq2SeqDataLoader
from deepseries.nn.loss import MSELoss, RMSELoss
from deepseries.train import Learner
from deepseries.optim import ReduceCosineAnnealingLR
from torch.optim import Adam
from torch import nn
import matplotlib as mpl
from sklearn.metrics import mean_absolute_error
import chinese_calendar as calendar
import datetime as dt
info = pd.read_excel("../data/info.xlsx")
recored = info.set_index("contributor_id")['huangzf']
info = pd.read_excel("../data/info.xlsx").set_index("contributor_id")[['pjt_name', 'pjt_type']]
norm_score = pd.read_csv(r"../data/20200315_20200415.csv")

In [4]:
power = pd.read_csv('../data/df.csv', parse_dates=['data_time'])[['data_time', 'cid', 'value']]
power = power.set_index("data_time").groupby("cid").resample("1H").sum().reset_index()
power = power.pivot(index='cid', columns='data_time', values='value')
power = power.apply(np.log1p).iloc[:, 10000:]

In [5]:
is_zero = power.values == 0
is_nan = power.isnull().values
is_valid = ~is_zero & ~is_nan

xy = np.ma.masked_array(power.values, mask=~is_valid)

series_mu = xy.mean(axis=1).data.reshape(-1, 1)
series_std = xy.std(axis=1).data.reshape(-1, 1)
xy = (xy - series_mu) / series_std
xy = xy.filled(0.)

xy = np.expand_dims(xy, 1).astype('float32')

N_TEST = 24 * 30
N_VALID = 24 * 2
DROP_ZERO = True
DEC_LEN = 24 * 2
ENC_LEN = 24 * 7
time_free_space = 24

In [6]:
def n_lag(series, n):
    lag = np.zeros_like(series)
    lag[:, :, n:] = series[:, :, :-n]
    return lag

x_lag7 = n_lag(xy, 7 * 24)
x_lag14 = n_lag(xy, 14 * 24)

x_is_valid = np.expand_dims(is_valid, 1)

x_num_features = np.concatenate([x_lag7, x_lag14, x_is_valid], axis=1).astype("float32")

weights = x_is_valid.astype("float32") + 1e-6

In [7]:
def periodic_feature(x, T):
    psin = np.sin(x * np.pi * 2 / T)
    pcos = np.cos(x * np.pi * 2 / T)
    return np.stack([psin, pcos], axis=0)


xy_weekday = np.repeat(
    np.expand_dims(
        periodic_feature(power.columns.weekday.values, 7), axis=0), xy.shape[0], axis=0)

xy_hour = np.repeat(
    np.expand_dims(
        periodic_feature(power.columns.hour.values, 24), axis=0), xy.shape[0], axis=0)

xy_month = np.repeat(
    np.expand_dims(
        periodic_feature(power.columns.month.values, 12), axis=0), xy.shape[0], axis=0)

def get_holiday_features(dts):
    holidays = pd.get_dummies(pd.Series(dts).apply(lambda x: calendar.get_holiday_detail(x)[1]))
    holidays['sick'] = np.where((power.columns >= "2020-02-01") & (power.columns < "2020-03-01"), 1, 0)
    return holidays

holidays = get_holiday_features(power.columns)
holidays = np.expand_dims(holidays.values.transpose(1, 0), 0)
holidays = np.repeat(holidays, xy.shape[0], axis=0)

xy_num_features = np.concatenate([
    xy_weekday,
    xy_hour,
    xy_month,
    holidays
], axis=1).astype('float32')

In [8]:
xy_cat_features = np.expand_dims(np.arange(62), 1)

In [9]:
class ForwardSpliter:
    
    def split(self, time_idx, enc_len, dec_len, valid_size):
        if valid_size < 1:
            valid_size = int(np.floor(len(time_idx) * valid_size))
        valid_idx = time_idx[-(valid_size+enc_len):]
        train_idx = time_idx[:-valid_size]
        return train_idx, valid_idx
    
spliter = ForwardSpliter()
train_idx, valid_idx = spliter.split(np.arange(xy.shape[2]), ENC_LEN, DEC_LEN, N_TEST+N_VALID)
valid_idx, test_idx = spliter.split(valid_idx, ENC_LEN, DEC_LEN, N_TEST)

train_xy = TimeSeries(xy[:, :, train_idx])
valid_xy = TimeSeries(xy[:, :, valid_idx])

train_xy_features = TimeSeries(xy_num_features[:, :, train_idx])
valid_xy_features = TimeSeries(xy_num_features[:, :, valid_idx])
train_xy_cat = Property(xy_cat_features)

train_x_features = TimeSeries(x_num_features[:, :, train_idx])
valid_x_features = TimeSeries(x_num_features[:, :, valid_idx])
valid_xy_cat = Property(xy_cat_features)

train_weight = TimeSeries(weights[:, :, train_idx])
valid_weight = TimeSeries(weights[:, :, valid_idx])

train_frame = Seq2SeqDataLoader(train_xy, batch_size=16, enc_lens=ENC_LEN, dec_lens=DEC_LEN, use_cuda=True, mode='train', time_free_space=24,
                          enc_num_feats=[train_xy_features, train_x_features], dec_num_feats=[train_xy_features], weights=train_weight,
                               enc_cat_feats=[train_xy_cat], dec_cat_feats=[train_xy_cat])
valid_frame = Seq2SeqDataLoader(valid_xy, batch_size=64, enc_lens=ENC_LEN, dec_lens=DEC_LEN, use_cuda=True, mode='train', time_free_space=0,
                         time_interval=48, enc_num_feats=[valid_xy_features, valid_x_features], dec_num_feats=[valid_xy_features],
                               weights=valid_weight, dec_cat_feats=[valid_xy_cat], enc_cat_feats=[valid_xy_cat])

test_xy = xy[:, :, test_idx]
test_xf = np.concatenate([xy_num_features[:, :, test_idx], x_num_features[:, :, test_idx]], axis=1)
test_yf = xy_num_features[:, :, test_idx]
test_dec_cat = np.repeat(np.expand_dims(xy_cat_features, 2), DEC_LEN, axis=2)
test_enc_cat = np.repeat(np.expand_dims(xy_cat_features, 2), ENC_LEN, axis=2)

In [None]:
model = Wave2WaveV1(enc_num=9+8, dec_num=6+8, n_layers=8, n_blocks=3, 
                enc_cat=[(63, 4)], dec_cat=[(63, 4)], dropout=0.1, debug=False, hidden_size=512)
opt = Adam(model.parameters(), 0.002)
loss_fn = MSELoss()
model.cuda()
lr_scheduler = ReduceCosineAnnealingLR(opt, 64)
learner = Learner(model, opt, loss_fn, './power_env', verbose=5000, lr_scheduler=lr_scheduler)
learner.fit(1500, train_frame, valid_frame, patient=64, start_save=1, early_stopping=False)

[[04/23/2020 16:30:21]] start training >>>>>>>>>>>  see log: tensorboard --logdir ./power_env\logs
[[04/23/2020 16:30:37]] epoch 1 / 1500, batch 100%, train loss 0.6934, valid loss 0.8635, cost 0.3 min
[[04/23/2020 16:30:52]] epoch 2 / 1500, batch 100%, train loss 0.2070, valid loss 0.8207, cost 0.2 min
[[04/23/2020 16:31:10]] epoch 3 / 1500, batch 100%, train loss 1.3414, valid loss 0.8928, cost 0.3 min
[[04/23/2020 16:31:23]] epoch 4 / 1500, batch 100%, train loss 0.5467, valid loss 0.9882, cost 0.2 min
[[04/23/2020 16:31:41]] epoch 5 / 1500, batch 100%, train loss 0.4340, valid loss 1.1661, cost 0.3 min
[[04/23/2020 16:32:01]] epoch 6 / 1500, batch 100%, train loss 0.5371, valid loss 1.1690, cost 0.3 min
[[04/23/2020 16:32:17]] epoch 7 / 1500, batch 100%, train loss 0.4245, valid loss 0.9368, cost 0.3 min
[[04/23/2020 16:32:36]] epoch 8 / 1500, batch 100%, train loss 0.9840, valid loss 0.8528, cost 0.3 min
[[04/23/2020 16:32:52]] epoch 9 / 1500, batch 100%, train loss 0.2079, valid 

[[04/23/2020 16:55:20]] epoch 78 / 1500, batch 100%, train loss 0.1569, valid loss 0.3653, cost 0.3 min
[[04/23/2020 16:55:38]] epoch 79 / 1500, batch 100%, train loss 0.4178, valid loss 0.3714, cost 0.3 min
[[04/23/2020 16:55:58]] epoch 80 / 1500, batch 100%, train loss 0.2783, valid loss 0.3677, cost 0.3 min
[[04/23/2020 16:56:16]] epoch 81 / 1500, batch 100%, train loss 0.1286, valid loss 0.3659, cost 0.3 min
[[04/23/2020 16:56:30]] epoch 82 / 1500, batch 100%, train loss 0.2962, valid loss 0.3639, cost 0.2 min
[[04/23/2020 16:56:44]] epoch 83 / 1500, batch 100%, train loss 0.5680, valid loss 0.3554, cost 0.2 min
[[04/23/2020 16:57:01]] epoch 84 / 1500, batch 100%, train loss 0.1947, valid loss 0.3652, cost 0.3 min
[[04/23/2020 16:57:13]] epoch 85 / 1500, batch 100%, train loss 0.3223, valid loss 0.3632, cost 0.2 min
[[04/23/2020 16:57:33]] epoch 86 / 1500, batch 100%, train loss 0.1556, valid loss 0.3615, cost 0.3 min
[[04/23/2020 16:57:49]] epoch 87 / 1500, batch 100%, train loss 

[[04/23/2020 17:16:52]] epoch 156 / 1500, batch 100%, train loss 0.5052, valid loss 0.3210, cost 0.3 min
[[04/23/2020 17:17:12]] epoch 157 / 1500, batch 100%, train loss 0.1774, valid loss 0.3771, cost 0.3 min
[[04/23/2020 17:17:32]] epoch 158 / 1500, batch 100%, train loss 0.1258, valid loss 0.4115, cost 0.3 min
[[04/23/2020 17:17:47]] epoch 159 / 1500, batch 100%, train loss 0.3019, valid loss 0.3321, cost 0.2 min
[[04/23/2020 17:18:09]] epoch 160 / 1500, batch 100%, train loss 0.3695, valid loss 0.3193, cost 0.4 min
[[04/23/2020 17:18:25]] epoch 161 / 1500, batch 100%, train loss 0.2669, valid loss 0.3323, cost 0.3 min
[[04/23/2020 17:18:44]] epoch 162 / 1500, batch 100%, train loss 0.3060, valid loss 0.3564, cost 0.3 min
[[04/23/2020 17:18:58]] epoch 163 / 1500, batch 100%, train loss 0.1231, valid loss 0.3642, cost 0.2 min
[[04/23/2020 17:19:12]] epoch 164 / 1500, batch 100%, train loss 0.2902, valid loss 0.3475, cost 0.2 min
[[04/23/2020 17:19:31]] epoch 165 / 1500, batch 100%, t

[[04/23/2020 17:38:29]] epoch 234 / 1500, batch 100%, train loss 0.0636, valid loss 0.3666, cost 0.3 min
[[04/23/2020 17:38:46]] epoch 235 / 1500, batch 100%, train loss 0.4061, valid loss 0.3566, cost 0.3 min
[[04/23/2020 17:39:04]] epoch 236 / 1500, batch 100%, train loss 0.3101, valid loss 0.3280, cost 0.3 min
[[04/23/2020 17:39:22]] epoch 237 / 1500, batch 100%, train loss 0.4092, valid loss 0.3196, cost 0.3 min
[[04/23/2020 17:39:40]] epoch 238 / 1500, batch 100%, train loss 0.1292, valid loss 0.3145, cost 0.3 min
[[04/23/2020 17:40:00]] epoch 239 / 1500, batch 100%, train loss 0.2674, valid loss 0.3142, cost 0.3 min
[[04/23/2020 17:40:16]] epoch 240 / 1500, batch 100%, train loss 0.5429, valid loss 0.3109, cost 0.3 min
[[04/23/2020 17:40:34]] epoch 241 / 1500, batch 100%, train loss 0.1703, valid loss 0.3158, cost 0.3 min
[[04/23/2020 17:40:49]] epoch 242 / 1500, batch 100%, train loss 0.1773, valid loss 0.3251, cost 0.3 min
[[04/23/2020 17:41:09]] epoch 243 / 1500, batch 100%, t

[[04/23/2020 18:00:12]] epoch 312 / 1500, batch 100%, train loss 0.1633, valid loss 0.2888, cost 0.3 min
[[04/23/2020 18:00:28]] epoch 313 / 1500, batch 100%, train loss 0.0879, valid loss 0.2892, cost 0.3 min
[[04/23/2020 18:00:44]] epoch 314 / 1500, batch 100%, train loss 0.1704, valid loss 0.2859, cost 0.3 min
[[04/23/2020 18:00:59]] epoch 315 / 1500, batch 100%, train loss 0.1624, valid loss 0.2851, cost 0.3 min
[[04/23/2020 18:01:13]] epoch 316 / 1500, batch 100%, train loss 0.2420, valid loss 0.2822, cost 0.2 min
[[04/23/2020 18:01:29]] epoch 317 / 1500, batch 100%, train loss 0.0917, valid loss 0.2820, cost 0.3 min
[[04/23/2020 18:01:42]] epoch 318 / 1500, batch 100%, train loss 0.3001, valid loss 0.2937, cost 0.2 min
[[04/23/2020 18:01:59]] epoch 319 / 1500, batch 100%, train loss 0.1528, valid loss 0.2915, cost 0.3 min
[[04/23/2020 18:02:17]] epoch 320 / 1500, batch 100%, train loss 0.0635, valid loss 0.2831, cost 0.3 min
[[04/23/2020 18:02:37]] epoch 321 / 1500, batch 100%, t

[[04/23/2020 18:21:36]] epoch 390 / 1500, batch 100%, train loss 0.1338, valid loss 0.4373, cost 0.3 min
[[04/23/2020 18:21:58]] epoch 391 / 1500, batch 100%, train loss 0.4019, valid loss 0.4143, cost 0.4 min
[[04/23/2020 18:22:11]] epoch 392 / 1500, batch 100%, train loss 0.2736, valid loss 0.3553, cost 0.2 min
[[04/23/2020 18:22:31]] epoch 393 / 1500, batch 100%, train loss 0.2192, valid loss 0.3123, cost 0.3 min
[[04/23/2020 18:22:48]] epoch 394 / 1500, batch 100%, train loss 0.0811, valid loss 0.2953, cost 0.3 min
[[04/23/2020 18:23:05]] epoch 395 / 1500, batch 100%, train loss 0.0790, valid loss 0.2886, cost 0.3 min
[[04/23/2020 18:23:26]] epoch 396 / 1500, batch 100%, train loss 0.2481, valid loss 0.2859, cost 0.3 min
[[04/23/2020 18:23:45]] epoch 397 / 1500, batch 100%, train loss 0.1130, valid loss 0.2877, cost 0.3 min
[[04/23/2020 18:24:04]] epoch 398 / 1500, batch 100%, train loss 0.3477, valid loss 0.2846, cost 0.3 min
[[04/23/2020 18:24:26]] epoch 399 / 1500, batch 100%, t

[[04/23/2020 18:43:43]] epoch 468 / 1500, batch 100%, train loss 0.0785, valid loss 0.2832, cost 0.3 min
[[04/23/2020 18:43:57]] epoch 469 / 1500, batch 100%, train loss 0.0342, valid loss 0.2775, cost 0.2 min
[[04/23/2020 18:44:16]] epoch 470 / 1500, batch 100%, train loss 0.0810, valid loss 0.2804, cost 0.3 min
[[04/23/2020 18:44:33]] epoch 471 / 1500, batch 100%, train loss 0.2057, valid loss 0.2796, cost 0.3 min
[[04/23/2020 18:44:53]] epoch 472 / 1500, batch 100%, train loss 0.6085, valid loss 0.2791, cost 0.3 min
[[04/23/2020 18:45:10]] epoch 473 / 1500, batch 100%, train loss 0.2453, valid loss 0.2854, cost 0.3 min
[[04/23/2020 18:45:28]] epoch 474 / 1500, batch 100%, train loss 0.0592, valid loss 0.2843, cost 0.3 min
[[04/23/2020 18:45:44]] epoch 475 / 1500, batch 100%, train loss 0.0958, valid loss 0.2840, cost 0.3 min
[[04/23/2020 18:46:00]] epoch 476 / 1500, batch 100%, train loss 0.2446, valid loss 0.2849, cost 0.3 min
[[04/23/2020 18:46:15]] epoch 477 / 1500, batch 100%, t

[[04/23/2020 19:06:33]] epoch 546 / 1500, batch 100%, train loss 0.0951, valid loss 0.2795, cost 0.3 min
[[04/23/2020 19:06:53]] epoch 547 / 1500, batch 100%, train loss 0.3571, valid loss 0.2842, cost 0.3 min
[[04/23/2020 19:07:08]] epoch 548 / 1500, batch 100%, train loss 0.1058, valid loss 0.2870, cost 0.3 min
[[04/23/2020 19:07:30]] epoch 549 / 1500, batch 100%, train loss 0.1060, valid loss 0.2904, cost 0.4 min
[[04/23/2020 19:07:43]] epoch 550 / 1500, batch 100%, train loss 0.3442, valid loss 0.3025, cost 0.2 min
[[04/23/2020 19:07:59]] epoch 551 / 1500, batch 100%, train loss 0.1805, valid loss 0.3048, cost 0.3 min
[[04/23/2020 19:08:19]] epoch 552 / 1500, batch 100%, train loss 0.1161, valid loss 0.3005, cost 0.3 min
[[04/23/2020 19:08:35]] epoch 553 / 1500, batch 100%, train loss 0.2549, valid loss 0.3019, cost 0.3 min
[[04/23/2020 19:08:52]] epoch 554 / 1500, batch 100%, train loss 0.2317, valid loss 0.2962, cost 0.3 min
[[04/23/2020 19:09:10]] epoch 555 / 1500, batch 100%, t

[[04/23/2020 19:29:00]] epoch 624 / 1500, batch 100%, train loss 0.1686, valid loss 0.2940, cost 0.3 min
[[04/23/2020 19:29:17]] epoch 625 / 1500, batch 100%, train loss 0.0554, valid loss 0.2876, cost 0.3 min
[[04/23/2020 19:29:37]] epoch 626 / 1500, batch 100%, train loss 0.4859, valid loss 0.2774, cost 0.3 min
[[04/23/2020 19:29:52]] epoch 627 / 1500, batch 100%, train loss 0.1602, valid loss 0.2914, cost 0.2 min
[[04/23/2020 19:30:10]] epoch 628 / 1500, batch 100%, train loss 0.1447, valid loss 0.2946, cost 0.3 min
[[04/23/2020 19:30:24]] epoch 629 / 1500, batch 100%, train loss 0.0923, valid loss 0.2942, cost 0.2 min
[[04/23/2020 19:30:38]] epoch 630 / 1500, batch 100%, train loss 0.0785, valid loss 0.2870, cost 0.2 min
[[04/23/2020 19:30:56]] epoch 631 / 1500, batch 100%, train loss 0.2551, valid loss 0.2835, cost 0.3 min
[[04/23/2020 19:31:16]] epoch 632 / 1500, batch 100%, train loss 0.1943, valid loss 0.2761, cost 0.3 min
[[04/23/2020 19:31:34]] epoch 633 / 1500, batch 100%, t

[[04/23/2020 19:51:17]] epoch 702 / 1500, batch 100%, train loss 0.1251, valid loss 0.2772, cost 0.3 min
[[04/23/2020 19:51:35]] epoch 703 / 1500, batch 100%, train loss 0.1607, valid loss 0.2790, cost 0.3 min
[[04/23/2020 19:51:53]] epoch 704 / 1500, batch 100%, train loss 0.2061, valid loss 0.2773, cost 0.3 min
[[04/23/2020 19:52:12]] epoch 705 / 1500, batch 100%, train loss 0.3564, valid loss 0.2753, cost 0.3 min
[[04/23/2020 19:52:34]] epoch 706 / 1500, batch 100%, train loss 0.4090, valid loss 0.2766, cost 0.4 min
[[04/23/2020 19:52:55]] epoch 707 / 1500, batch 100%, train loss 0.1673, valid loss 0.2799, cost 0.4 min
[[04/23/2020 19:53:13]] epoch 708 / 1500, batch 100%, train loss 0.0553, valid loss 0.2759, cost 0.3 min
[[04/23/2020 19:53:30]] epoch 709 / 1500, batch 100%, train loss 0.1297, valid loss 0.2811, cost 0.3 min
[[04/23/2020 19:53:43]] epoch 710 / 1500, batch 100%, train loss 0.3599, valid loss 0.2817, cost 0.2 min
[[04/23/2020 19:54:00]] epoch 711 / 1500, batch 100%, t

[[04/23/2020 20:13:52]] epoch 780 / 1500, batch 100%, train loss 0.1037, valid loss 0.2688, cost 0.3 min
[[04/23/2020 20:14:06]] epoch 781 / 1500, batch 100%, train loss 0.4001, valid loss 0.2670, cost 0.2 min
[[04/23/2020 20:14:25]] epoch 782 / 1500, batch 100%, train loss 0.0147, valid loss 0.2813, cost 0.3 min
[[04/23/2020 20:14:44]] epoch 783 / 1500, batch 100%, train loss 0.3448, valid loss 0.2853, cost 0.3 min
[[04/23/2020 20:15:06]] epoch 784 / 1500, batch 100%, train loss 0.1013, valid loss 0.2787, cost 0.4 min
[[04/23/2020 20:15:25]] epoch 785 / 1500, batch 100%, train loss 0.0689, valid loss 0.2775, cost 0.3 min
[[04/23/2020 20:15:42]] epoch 786 / 1500, batch 100%, train loss 0.0913, valid loss 0.2695, cost 0.3 min
[[04/23/2020 20:15:54]] epoch 787 / 1500, batch 100%, train loss 0.0921, valid loss 0.2667, cost 0.2 min
[[04/23/2020 20:16:13]] epoch 788 / 1500, batch 100%, train loss 0.1932, valid loss 0.2666, cost 0.3 min
[[04/23/2020 20:16:32]] epoch 789 / 1500, batch 100%, t

[[04/23/2020 20:36:48]] epoch 858 / 1500, batch 100%, train loss 0.0612, valid loss 0.2781, cost 0.3 min
[[04/23/2020 20:37:04]] epoch 859 / 1500, batch 100%, train loss 0.1371, valid loss 0.2712, cost 0.3 min
[[04/23/2020 20:37:23]] epoch 860 / 1500, batch 100%, train loss 0.3264, valid loss 0.2757, cost 0.3 min
[[04/23/2020 20:37:40]] epoch 861 / 1500, batch 100%, train loss 0.1223, valid loss 0.2774, cost 0.3 min
[[04/23/2020 20:37:56]] epoch 862 / 1500, batch 100%, train loss 0.0953, valid loss 0.2821, cost 0.3 min
[[04/23/2020 20:38:12]] epoch 863 / 1500, batch 100%, train loss 0.1594, valid loss 0.2802, cost 0.3 min
[[04/23/2020 20:38:26]] epoch 864 / 1500, batch 100%, train loss 0.1248, valid loss 0.2802, cost 0.2 min
[[04/23/2020 20:38:41]] epoch 865 / 1500, batch 100%, train loss 0.0425, valid loss 0.2769, cost 0.3 min
[[04/23/2020 20:38:59]] epoch 866 / 1500, batch 100%, train loss 0.0716, valid loss 0.2770, cost 0.3 min
[[04/23/2020 20:39:13]] epoch 867 / 1500, batch 100%, t

[[04/23/2020 20:59:24]] epoch 936 / 1500, batch 100%, train loss 0.3156, valid loss 0.2689, cost 0.2 min
[[04/23/2020 20:59:44]] epoch 937 / 1500, batch 100%, train loss 0.2168, valid loss 0.2653, cost 0.3 min
[[04/23/2020 21:00:00]] epoch 938 / 1500, batch 100%, train loss 0.0526, valid loss 0.2702, cost 0.3 min
[[04/23/2020 21:00:19]] epoch 939 / 1500, batch 100%, train loss 0.0491, valid loss 0.2691, cost 0.3 min
[[04/23/2020 21:00:33]] epoch 940 / 1500, batch 100%, train loss 0.4531, valid loss 0.2701, cost 0.2 min
[[04/23/2020 21:00:52]] epoch 941 / 1500, batch 100%, train loss 0.0381, valid loss 0.2790, cost 0.3 min
[[04/23/2020 21:01:09]] epoch 942 / 1500, batch 100%, train loss 0.0324, valid loss 0.2753, cost 0.3 min
[[04/23/2020 21:01:28]] epoch 943 / 1500, batch 100%, train loss 0.0790, valid loss 0.2825, cost 0.3 min
[[04/23/2020 21:01:49]] epoch 944 / 1500, batch 100%, train loss 0.1510, valid loss 0.2810, cost 0.3 min
[[04/23/2020 21:02:09]] epoch 945 / 1500, batch 100%, t

[[04/23/2020 21:21:38]] epoch 1013 / 1500, batch 100%, train loss 0.1445, valid loss 0.2827, cost 0.3 min
[[04/23/2020 21:21:55]] epoch 1014 / 1500, batch 100%, train loss 0.0558, valid loss 0.2780, cost 0.3 min
[[04/23/2020 21:22:16]] epoch 1015 / 1500, batch 100%, train loss 0.1884, valid loss 0.2667, cost 0.3 min
[[04/23/2020 21:22:32]] epoch 1016 / 1500, batch 100%, train loss 0.0752, valid loss 0.2739, cost 0.3 min
[[04/23/2020 21:22:46]] epoch 1017 / 1500, batch 100%, train loss 0.2905, valid loss 0.2694, cost 0.2 min
[[04/23/2020 21:23:04]] epoch 1018 / 1500, batch 100%, train loss 0.0606, valid loss 0.2573, cost 0.3 min
[[04/23/2020 21:23:22]] epoch 1019 / 1500, batch 100%, train loss 0.1953, valid loss 0.2630, cost 0.3 min
[[04/23/2020 21:23:37]] epoch 1020 / 1500, batch 100%, train loss 0.0423, valid loss 0.2602, cost 0.3 min
[[04/23/2020 21:23:53]] epoch 1021 / 1500, batch 100%, train loss 0.1438, valid loss 0.2616, cost 0.3 min
[[04/23/2020 21:24:10]] epoch 1022 / 1500, bat

[[04/23/2020 21:44:02]] epoch 1090 / 1500, batch 100%, train loss 0.0458, valid loss 0.2745, cost 0.2 min
[[04/23/2020 21:44:22]] epoch 1091 / 1500, batch 100%, train loss 0.2495, valid loss 0.2683, cost 0.3 min
[[04/23/2020 21:44:42]] epoch 1092 / 1500, batch 100%, train loss 0.1088, valid loss 0.2675, cost 0.3 min
[[04/23/2020 21:44:57]] epoch 1093 / 1500, batch 100%, train loss 0.2080, valid loss 0.2664, cost 0.3 min
[[04/23/2020 21:45:16]] epoch 1094 / 1500, batch 100%, train loss 0.1093, valid loss 0.2658, cost 0.3 min
[[04/23/2020 21:45:31]] epoch 1095 / 1500, batch 100%, train loss 0.1449, valid loss 0.2691, cost 0.2 min
[[04/23/2020 21:45:49]] epoch 1096 / 1500, batch 100%, train loss 0.1907, valid loss 0.2626, cost 0.3 min
[[04/23/2020 21:46:10]] epoch 1097 / 1500, batch 100%, train loss 0.4063, valid loss 0.2605, cost 0.4 min
[[04/23/2020 21:46:24]] epoch 1098 / 1500, batch 100%, train loss 0.1397, valid loss 0.2650, cost 0.2 min
[[04/23/2020 21:46:42]] epoch 1099 / 1500, bat

[[04/24/2020 09:25:33]] epoch 1167 / 1500, batch 100%, train loss 0.5044, valid loss 0.2877, cost 0.3 min
[[04/24/2020 09:25:48]] epoch 1168 / 1500, batch 100%, train loss 0.0924, valid loss 0.2801, cost 0.2 min
[[04/24/2020 09:26:07]] epoch 1169 / 1500, batch 100%, train loss 0.0777, valid loss 0.2802, cost 0.3 min
[[04/24/2020 09:26:20]] epoch 1170 / 1500, batch 100%, train loss 0.1355, valid loss 0.2725, cost 0.2 min
[[04/24/2020 09:26:36]] epoch 1171 / 1500, batch 100%, train loss 0.3390, valid loss 0.2699, cost 0.3 min
[[04/24/2020 09:26:59]] epoch 1172 / 1500, batch 100%, train loss 0.2015, valid loss 0.2651, cost 0.4 min
[[04/24/2020 09:27:18]] epoch 1173 / 1500, batch 100%, train loss 0.0533, valid loss 0.2692, cost 0.3 min


In [None]:
test_xy = torch.as_tensor(xy[:, :, test_idx]).cuda()
test_xf = torch.as_tensor(np.concatenate([xy_num_features[:, :, test_idx], x_num_features[:, :, test_idx]], axis=1)).cuda()
test_yf = torch.as_tensor(xy_num_features[:, :, test_idx]).cuda()

def plot(x_true, y_true, y_pred):
    enc_ticks = np.arange(x_true.shape[1])
    dec_ticks = np.arange(y_pred.shape[1]) + x_true.shape[1]
    for idx, name in enumerate(power.index):
        plt.figure(figsize=(12, 3))
        plt.plot(enc_ticks, x_true[idx])
        plt.plot(dec_ticks, y_pred[idx], label='pred')
        plt.plot(dec_ticks, y_true[idx], label='true')
        plt.title(name)
        plt.legend()

def wmape(y_hat, y):
    scores = []
    for day in range(int(y.shape[0] / 24)):
        scores.append(np.abs(y[day*24: (day+1)*24] - y_hat[day*24: (day+1)*24]).sum() / np.sum(y[day*24: (day+1)*24]))
    return scores

def metric(y_true, y_pred):
    scores = {}
    for idx, name in enumerate(power.index):
        scores[name] = wmape(y_pred[idx], y_true[idx])
    return pd.DataFrame(scores)

def wmape_dataframe(y_hat, y):
    scores = []
    for day in range(int(y.shape[0] / 96)):
        scores.append(np.abs(y[day*96: (day+1)*96] - y_hat[day*96: (day+1)*96]).sum() / np.sum(y[day*96: (day+1)*96]))
    return scores

def metric_dataframe(y_true, y_pred):
    scores = {}
    for idx, name in enumerate(power.index):
        scores[name] = wmape_dataframe(y_pred.iloc[idx], y_true.iloc[idx])
    return pd.DataFrame(scores)

def predict(learner, xy, x_feats, y_feats, epoch):
    learner.load(epoch)
    learner.model.eval()
    learner.model.cuda()
    preds = []
    days = int(xy.shape[2] / 24 - ENC_LEN / 24 - DEC_LEN/24 + 1)
    for day in range(days):
        step = day * 24
#         enc_start = day
#         enc_end = (step+ENC_LEN) / 24
#         dec_start = enc_end
#         dec_end = (step+ENC_LEN+DEC_LEN) / 24
#         print(f"start {enc_start}, end {int(dec_end)}" )
        step_pred = model(
            xy[:, :, step: step+ENC_LEN], 
            enc_num=x_feats[:, :, step: step+ENC_LEN],
            dec_num=y_feats[:, :, step+ENC_LEN: step+ENC_LEN+DEC_LEN], dec_len=DEC_LEN).cpu().detach().numpy()
        if step == 0:
            preds.append(step_pred)
        else:
            preds.append(step_pred[:, :, -24:])
    preds = np.concatenate(preds, axis=2)
    preds = np.expm1(preds.squeeze() * series_std + series_mu)
    
    x_true = np.expm1(xy[:, :, :ENC_LEN].cpu().numpy().squeeze() * series_std + series_mu)
    y_true = np.expm1(xy[:, :, ENC_LEN:].cpu().numpy().squeeze() * series_std + series_mu)
    
    return x_true, y_true, preds

In [None]:
norm_data = pd.read_csv("./data/20200315_20200415.csv").drop(['Unnamed: 0', 'model_name'], axis=1)
norm_data = norm_data[norm_data.contributor_id.isin(power.index)].reset_index(drop=True)
norm_data = norm_data.set_index("contributor_id").loc[power.index].reset_index()
norm_data['data_time'] = pd.to_datetime(norm_data.data_time)
norm_data = norm_data.set_index("data_time").groupby("contributor_id").resample('1H')[['forecast_pwr', 'value']].sum().reset_index()
norm_true = norm_data.pivot(index='contributor_id', columns='data_time', values='value').iloc[:, 48:]
norm_pred = norm_data.pivot(index='contributor_id', columns='data_time', values='forecast_pwr').iloc[:, 48:]


x_true, y_true, y_pred  = predict(learner, test_xy, test_xf, test_yf, 1014)
scores = pd.DataFrame([metric(y_true, y_pred).mean().rename("wave"), 
                       metric(norm_true.values, norm_pred.values).mean().rename("v1")]).T.dropna()

In [None]:
plot(x_true, y_true, y_pred)