In [1]:
from datastreams import generate_trend_drift, generate_seasonal_drift, generate_ar1_drift
from models import *
import numpy as np
import pandas as pd
from driftdetector import DriftDetector
from tqdm import tqdm

In [10]:
def increase_lr_only(optimizer, drifted, decay_state, factor=2.0, max_lr=0.01):
    if drifted:
        for i, pg in enumerate(optimizer.param_groups):
            base_lr = decay_state['base_lrs'][i]
            pg['lr'] = min(base_lr * factor, max_lr)

def increase_and_decay_lr(optimizer, drifted, decay_state, factor=2.0, decay_steps=10):
    base_lrs = decay_state['base_lrs']
    
    if drifted:
        for i, pg in enumerate(optimizer.param_groups):
            pg['lr'] = base_lrs[i] * factor
        decay_state['current_decay_step'] = decay_steps

    elif decay_state['current_decay_step'] > 0:
        decay_state['current_decay_step'] -= 1
        for i, pg in enumerate(optimizer.param_groups):
            boosted_lr = base_lrs[i] * factor
            ratio = decay_state['current_decay_step'] / decay_steps
            pg['lr'] = base_lrs[i] + (boosted_lr - base_lrs[i]) * ratio

def do_nothing(optimizer, drifted, decay_state):
    pass

In [29]:
def train_model(model, detector, data, seq_len, optimizer, loss_fn, device, drift_action):
    model.train()
    all_loss = []
    buff = []
    pred = None

    # For drift action state
    base_lrs = [pg['lr'] for pg in optimizer.param_groups]
    decay_state = {
        'base_lrs': base_lrs,
        'current_decay_step': 0
    }

    step = 0
    mae_values = []
    mae_values_full = []

    for x in data:
        step += 1
        buff.append(x)
        if len(buff) < seq_len:
            continue

        buff_array = np.array(buff).reshape(1, -1)
        x_ten = torch.tensor(buff_array, dtype=torch.float32).to(device)

        # Inference
        pred = model(x_ten)
        target = torch.tensor(x).view(1, -1).float().to(device)

        # Full MAE tracking
        mae_full = torch.abs(pred - target).mean().item()
        mae_values_full.append(mae_full)

        # Post-warmup MAE tracking
        if step >= 200:
            mae_values.append(mae_full)

        # Training step
        if pred is not None:
            optimizer.zero_grad()
            loss = loss_fn(pred, target)
            loss.backward()
            optimizer.step()
            all_loss.append(loss.item())

            # Drift handling
            if detector.update(loss.item()):
                drift_action(optimizer, drifted=True, decay_state=decay_state)

        drift_action(optimizer, drifted=False, decay_state=decay_state)
        buff.pop(0)

    avg_loss = sum(all_loss) / len(all_loss) if all_loss else 0.0
    avg_mae_post_warmup = sum(mae_values) / len(mae_values) if mae_values else 0.0
    avg_mae_full = sum(mae_values_full) / len(mae_values_full) if mae_values_full else 0.0

    return model, avg_loss, avg_mae_post_warmup, avg_mae_full


In [30]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [31]:
streams = [generate_trend_drift, generate_seasonal_drift, generate_ar1_drift]
n_dims = [1, 3, 5]
n_pointss = [20_000, 50_000]
detector_types = ["ADWIN", "PageHinkley"]
drift_actions = [increase_lr_only, increase_and_decay_lr]
seq_lens = [16, 32, 64]

In [32]:
hidden_sizess = [[16], [32, 32], [64, 64]]

In [33]:
len(streams) * len(n_dims) * len(n_pointss) * len(detector_types) * len(drift_actions) * len(seq_lens) * len(hidden_sizess)

648

In [34]:
for stream_func in streams:
    for n_dim in tqdm(n_dims):
        for n_points in n_pointss:
            data = stream_func(n_points, n_dim)
            for detector_type in detector_types:
                detector = DriftDetector(method=detector_type)
                for drift_action in drift_actions:
                    for seq_len in seq_lens:
                        for hidden_sizes in hidden_sizess:
                            model = TimeSeriesMLP(
                                input_size=n_dim * seq_len,
                                hidden_sizes=hidden_sizes,
                                output_size=n_dim,
                            )
                            model.to(device)
                            optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
                            loss_fn = nn.MSELoss()
                            model, loss, mae_full, mae_warmup = train_model(
                                model,
                                detector,
                                data,
                                seq_len,
                                optimizer,
                                loss_fn,
                                device=device,
                                drift_action=drift_action
                            )
                            # save stream, n_dim, n_points, detector_type, seq_len, hidden_sizes, train_loss, test_loss to CVS file
                            with open("results.csv", "a") as f:
                                f.write(
                                    f"{stream_func.__name__};{n_dim};{n_points};{detector_type};{seq_len};{hidden_sizes};{loss};{mae_full};{mae_warmup}\n"
                                )

  + slope_post * np.log1p(t - drift_point)
  0%|          | 0/3 [03:31<?, ?it/s]


KeyboardInterrupt: 

In [19]:
data.shape

(20000,)

In [24]:
model.train()
all_loss = []
buff = []
pred = None

# For drift action state
base_lrs = [pg['lr'] for pg in optimizer.param_groups]
decay_state = {
    'base_lrs': base_lrs,
    'current_decay_step': 0
}

step = 0
mae_values = []
mae_values_full = []

for x in data:
    step += 1
    buff.append(x)
    if len(buff) < seq_len:
        continue

    buff_array = np.array(buff).reshape(1, -1)
    x_ten = torch.tensor(buff_array, dtype=torch.float32).to(device)

    # Inference
    pred = model(x_ten)
    target = torch.tensor(x).view(1, -1).float().to(device)

    # Full MAE tracking
    mae_full = torch.abs(pred - target).mean().item()
    mae_values_full.append(mae_full)

    # Post-warmup MAE tracking
    if step >= 200:
        mae_values.append(mae_full)

    # Training step
    if pred is not None:
        optimizer.zero_grad()
        loss = loss_fn(pred, target)
        loss.backward()
        optimizer.step()
        all_loss.append(loss.item())

        # Drift handling
        if detector.update(loss.item()):
            drift_action(optimizer, drifted=True, decay_state=decay_state)

    drift_action(optimizer, drifted=False, decay_state=decay_state)
    buff.pop(0)

avg_loss = sum(all_loss) / len(all_loss) if all_loss else 0.0
avg_mae_post_warmup = sum(mae_values) / len(mae_values) if mae_values else 0.0
avg_mae_full = sum(mae_values_full) / len(mae_values_full) if mae_values_full else 0.0

KeyboardInterrupt: 

In [25]:
pred

tensor([[6.8481]], device='cuda:0', grad_fn=<AddmmBackward0>)

In [26]:
target

tensor([[6.8757]], device='cuda:0')

In [27]:
loss

tensor(0.0006, device='cuda:0', grad_fn=<MseLossBackward0>)

In [28]:
mae_values

[0.012820243835449219,
 0.01114654541015625,
 0.008396625518798828,
 0.013548851013183594,
 0.009900093078613281,
 0.0029697418212890625,
 0.011647224426269531,
 0.012702465057373047,
 0.014444828033447266,
 0.010861873626708984,
 0.0011267662048339844,
 0.0035500526428222656,
 0.005248069763183594,
 0.01036691665649414,
 0.008610725402832031,
 0.0018968582153320312,
 0.004134178161621094,
 0.00984048843383789,
 0.01055908203125,
 0.006355762481689453,
 0.0049724578857421875,
 0.0058078765869140625,
 0.007554531097412109,
 0.010727882385253906,
 0.011745929718017578,
 0.01009988784790039,
 0.0021390914916992188,
 0.0053005218505859375,
 0.008620262145996094,
 0.005178928375244141,
 0.003590106964111328,
 0.0051021575927734375,
 0.009943485260009766,
 0.010922908782958984,
 0.0022993087768554688,
 0.008151531219482422,
 0.0146942138671875,
 0.018681764602661133,
 0.009224414825439453,
 0.004824638366699219,
 0.0022449493408203125,
 0.008544445037841797,
 0.019044876098632812,
 0.0190184