In [26]:
import torch
import pandas as pd
import torch.nn as nn
from torch.utils import data
import numpy as np

import os

In [6]:
DATA_DIR = '/Users/aplle/Code/MathModeling/SC/dataset1'
MODEL_DIR = '/Users/aplle/Code/MathModeling/SC/models'

In [14]:
train_df = pd.read_csv(f"{DATA_DIR}/train.csv")
eval_df = pd.read_csv(f"{DATA_DIR}/eval.csv")
test_df = pd.read_csv(f"{DATA_DIR}/test.csv")

POWER_DIVISOR = 1000.0
YEAR_MIN = 2016
YEAR_MAX = 2018

def is_leap(yr:int):
    if yr % 400 == 0:
        return True
    elif yr % 100 == 0:
        return False
    elif yr % 4 == 0:
        return True
    return False

def get_ndays(yr:int):
    return 366 if is_leap(yr) else 365

def preprocess_df(df):
    df.dropna(subset=['Power (kW)'], inplace=True)
    df['Power (kW)'] = df['Power (kW)'] / POWER_DIVISOR
    df.rename(columns={'Power (kW)': 'Power'}, inplace=True)

    df['Days_from_NYD'] = df['Days_from_NYD'] / df['Year'].apply(get_ndays)

    df['Year'] = df['Year'] - YEAR_MIN
    df['Year'] = df['Year'] / (YEAR_MAX - YEAR_MIN)

    df['Time'] = pd.to_datetime(df['Time'], format='%H:%M:%S')
    df['Time'] = df['Time'].dt.hour * 60 + df['Time'].dt.minute
    df['Time'] = df['Time'] / (24 * 60)

    df.drop(['Day', 'Span'], axis=1, inplace=True)

    df['Month'] = df['Month'] / 12.0

    df['Weekday'] = df['Weekday'] / 6.0

    df['Region'] = df['Region'].map({
        'Commercial': 0,
        'Office': 1,
        'Public': 2,
        'Residential': 3
    })

    return df

train_df = preprocess_df(train_df)
_train_columns = train_df.columns.copy()

eval_df = preprocess_df(eval_df)
eval_df = eval_df.reindex(columns=_train_columns, fill_value=0)

test_df = preprocess_df(test_df)
test_df = test_df.reindex(columns=_train_columns, fill_value=0)


In [28]:
train_df

Unnamed: 0,Time,Power,Year,Month,Weekday,Region,Days_from_NYD
0,0.000000,0.39240,0.0,0.083333,0.666667,3,0.000000
1,0.003472,0.38900,0.0,0.083333,0.666667,3,0.000000
2,0.006944,0.36740,0.0,0.083333,0.666667,3,0.000000
3,0.010417,0.37117,0.0,0.083333,0.666667,3,0.000000
4,0.013889,0.35280,0.0,0.083333,0.666667,3,0.000000
...,...,...,...,...,...,...,...
826898,0.986111,0.16480,1.0,0.166667,0.333333,2,0.158904
826899,0.989583,0.14880,1.0,0.166667,0.333333,2,0.158904
826900,0.993056,0.15440,1.0,0.166667,0.333333,2,0.158904
826901,0.996528,0.15240,1.0,0.166667,0.333333,2,0.158904


In [11]:
class PowerTCNDataset(data.Dataset):
    def __init__(self, df, hist_len, time_cols):
        self.hist_len = hist_len
        self.time_cols = time_cols

        self.x_hist = []
        self.x_time = []
        self.x_loc = []
        self.y = []

        for loc, g in df.groupby("Region"):
            g:pd.DataFrame = g.reset_index(drop=True)

            power = g["Power"].to_numpy().astype(np.float32)
            time_feat = g[time_cols].to_numpy().astype(np.float32)

            for t in range(hist_len, len(g)):
                self.x_hist.append(power[t - hist_len:t])  # X_hist
                self.x_time.append(time_feat[t])           # X_time
                self.x_loc.append(loc)                      # X_loc
                self.y.append(power[t])                     # y

        self.x_hist = torch.tensor(self.x_hist)
        self.x_time = torch.tensor(self.x_time)
        self.x_loc = torch.tensor(self.x_loc, dtype=torch.long)
        self.y = torch.tensor(self.y)

    def __len__(self):
        return self.x_loc.shape[0]

    def __getitem__(self, idx):
        return (
            self.x_hist[idx].unsqueeze(0),  # [1, k]
            self.x_time[idx],             # [d_time]
            self.x_loc[idx],
            self.y[idx]
        )

In [18]:
HIST_LEN = 288
TIME_COLS = [
    "Time",
    "Year",
    "Month",
    "Weekday",
    "Days_from_NYD"
]

train_ds = PowerTCNDataset(train_df, HIST_LEN, TIME_COLS)
eval_ds = PowerTCNDataset(eval_df, HIST_LEN, TIME_COLS)

train_loader = data.DataLoader(train_ds, batch_size=64, shuffle=True)
eval_loader = data.DataLoader(eval_ds, batch_size=64, shuffle=False)

In [22]:
from torch.nn.utils.parametrizations import weight_norm

class TCNBlock(nn.Module):
    def __init__(self, in_ch, out_ch, kernel_size, dilation, dropout):
        super().__init__()
        padding = (kernel_size - 1) * dilation

        self.conv1 = weight_norm(
            nn.Conv1d(in_ch, out_ch, kernel_size,
                      padding=padding, dilation=dilation)
        )
        self.conv2 = weight_norm(
            nn.Conv1d(out_ch, out_ch, kernel_size,
                      padding=padding, dilation=dilation)
        )

        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)

        self.downsample = (
            nn.Conv1d(in_ch, out_ch, 1)
            if in_ch != out_ch else None
        )

    def forward(self, x):
        out = self.conv1(x)
        out = out[..., :-self.conv1.padding[0]]  # causal crop
        out = self.relu(out)
        out = self.dropout(out)

        out = self.conv2(out)
        out = out[..., :-self.conv2.padding[0]]
        out = self.relu(out)
        out = self.dropout(out)

        res = x if self.downsample is None else self.downsample(x)
        return out + res

In [23]:
class PowerTCN(nn.Module):
    def __init__(
        self,
        hist_len,
        time_dim,
        num_locations=4,
        loc_emb_dim=6,
        channels=64,
        layers=7,
        kernel_size=3,
        dropout=0.1,
    ):
        super().__init__()

        blocks = []
        in_ch = 1
        for i in range(layers):
            blocks.append(
                TCNBlock(
                    in_ch, channels,
                    kernel_size,
                    dilation=2 ** i,
                    dropout=dropout
                )
            )
            in_ch = channels

        self.tcn = nn.Sequential(*blocks)
        self.loc_emb = nn.Embedding(num_locations, loc_emb_dim)

        self.mlp = nn.Sequential(
            nn.Linear(channels + time_dim + loc_emb_dim, 64),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x_hist, x_time, x_loc):
        """
        x_hist: [B, 1, k]
        x_time: [B, d_time]
        x_loc : [B]
        """
        h = self.tcn(x_hist)          # [B, C, k]
        h = h[:, :, -1]               # [B, C]

        loc = self.loc_emb(x_loc)     # [B, loc_emb_dim]

        z = torch.cat([h, x_time, loc], dim=1)
        return self.mlp(z).squeeze(1)


In [24]:
device = "cuda" if torch.cuda.is_available() else "cpu"

model = PowerTCN(
    hist_len=HIST_LEN,
    time_dim=len(TIME_COLS)
).to(device)


In [31]:
def train_epoch(loader, opt, loss_fn, train=True):
    model.train() if train else model.eval()
    total_loss = 0

    with torch.set_grad_enabled(train):
        for x_hist, x_time, x_loc, y in loader:
            x_hist = x_hist.to(device)
            x_time = x_time.to(device)
            x_loc  = x_loc.to(device)
            y      = y.to(device)

            pred = model(x_hist, x_time, x_loc)
            l = loss_fn(pred, y)

            if train:
                opt.zero_grad()
                l.backward()
                opt.step()

            total_loss += l.item() * len(y)

    return total_loss / len(loader.dataset)


In [32]:
import torch.optim as optim

def train(model, train_loader, eval_loader, epochs=50, lr=1e-3, weight_decay=1e-4, checkpoint_path="best_tcn.pt"):
    best_eval = float("inf")

    opt = optim.AdamW(model.parameters(), lr=lr, weight_decay=weight_decay)
    loss_fn = nn.HuberLoss(delta=1.0)

    for epoch in range(epochs):
        train_loss = train_epoch(train_loader, opt, loss_fn, train=True)
        eval_loss   = train_epoch(eval_loader, opt, loss_fn, train=False)

        print(f"Epoch {epoch:03d} | train {train_loss:.4f} | val {eval_loss:.4f}")

        if eval_loss < best_eval:
            best_eval = eval_loss
            if checkpoint_path is not None:
                try:
                    torch.save(model.state_dict(), os.path.join(MODEL_DIR, checkpoint_path))
                except Exception:
                    pass


In [33]:
train(model, train_loader, eval_loader)

KeyboardInterrupt: 