In [None]:
from datastreams import generate_ar1_time_series, generate_var_time_series, generate_hybrid_time_series, generate_crypto_time_series
from models import *
import numpy as np
import pandas as pd
from driftdetector import DriftDetector
from tqdm import tqdm

In [None]:
def default_drift_action(model, optimizer, magnitude, warning):
    # Reset RNN hidden state if present
    if hasattr(model, 'reset_hidden'):
        model.reset_hidden(batch_size=1)
    # Halve the learning rate
    for param_group in optimizer.param_groups:
        param_group['lr'] *= 0.5
    # Freeze early layers (example: all except final linear)
    for name, param in model.named_parameters():
        if 'fc' in name or 'network' in name:
            param.requires_grad = True
        else:
            param.requires_grad = False

In [None]:
def make_xy(data, seq_len):
    x = []
    y = []
    for i in range(seq_len, data.shape[0]):
        x.append(data[i-seq_len:i])
    for i in range(data.shape[0] - 1):
        y.append(data[i+1])

    x = np.array(x)
    x = x[:-1]   
    y = np.array(y)
    y = y[seq_len:] 
    return x, y

In [None]:
def array2generator(arr: np.ndarray):
    for element in arr:
        yield element

In [None]:
def train_model(model, detector, x, y, optimizer, loss_fn, n_epochs, device):
    all_loss = []
    for epoch in range(n_epochs):
        epoch_loss = 0
        model.train()
        for x_batch, y_batch in zip(x, y):
            x_ten = torch.tensor(x_batch, dtype=torch.float32).unsqueeze(0).to(device)
            y_ten = torch.tensor(y_batch, dtype=torch.float32).unsqueeze(0).to(device)
            pred = model(x_ten)
            optimizer.zero_grad()
            loss = loss_fn(pred, y_ten)
            loss.backward()
            optimizer.step()
            epoch_loss += loss.item()
            drift, warning, magnitude = detector.update(loss.item())
            if drift:
                default_drift_action(model, optimizer, magnitude, warning)
        all_loss.append(epoch_loss / len(x))
    return model, all_loss

In [None]:
def eval_model(model, x, y, loss_fn, device):
    total_loss = 0
    model.eval()
    with torch.no_grad():
        for x_batch, y_batch in zip(x, y):
            x_ten = torch.tensor(x_batch, dtype=torch.float32).unsqueeze(0).to(device)
            y_ten = torch.tensor(y_batch, dtype=torch.float32).unsqueeze(0).to(device)
            pred = model(x_ten)
            loss = loss_fn(pred, y_ten)
            total_loss += loss.item()
    return total_loss / len(x)

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [None]:
streams = [generate_ar1_time_series, generate_var_time_series, generate_hybrid_time_series, generate_crypto_time_series]
n_dims = [1, 3, 5]
n_pointss = [2000, 5000]
detector_types = ["ADWIN", "PageHinkley"]
seq_lens = [16, 32]

In [None]:
hidden_sizess = [[16], [32], [64]]

In [None]:
len(n_dims) * len(n_pointss) * len(detector_types) * len(seq_lens) * len(hidden_sizess)

In [None]:
for stream_func in streams:
    for n_dim in tqdm(n_dims):
        for n_points in n_pointss:
            if stream_func == generate_crypto_time_series:
                data, _ = stream_func("../data/btc_data.csv", n_points)
            else:
                data, _ = stream_func(n_points, n_dim)
            for detector_type in detector_types:
                detector = DriftDetector(method=detector_type)
                for seq_len in seq_lens:
                    for hidden_sizes in hidden_sizess:
                        if stream_func == generate_crypto_time_series:
                            n_dim = 1
                        x_train, y_train = make_xy(data[:int(data.shape[0] * 0.7)], seq_len)
                        x_test, y_test = make_xy(data[int(data.shape[0] * 0.7):], seq_len)
                        model = TimeSeriesMLP(
                            input_size=n_dim * seq_len,
                            hidden_sizes=hidden_sizes,
                            output_size=n_dim,
                        )
                        model.to(device)
                        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
                        loss_fn = nn.MSELoss()
                        model, train_loss = train_model(
                            model,
                            detector,
                            x_train,
                            y_train,
                            optimizer,
                            loss_fn,
                            n_epochs=1,
                            device=device,
                        )
                        test_loss = eval_model(
                            model,
                            x_test,
                            y_test,
                            loss_fn,
                            device
                        )
                        # save stream, n_dim, n_points, detector_type, seq_len, hidden_sizes, train_loss, test_loss to CVS file
                        with open("results.csv", "a") as f:
                            f.write(
                                f"{stream_func.__name__},{n_dim},{n_points},{detector_type},{seq_len},{hidden_sizes},{train_loss[-1]},{test_loss}\n"
                            )

In [None]:
df = pd.read_csv("results.csv")
df

Unnamed: 0,timeseries,n_dim,n_points,detector,window_size,hidden_layer,train_loss,test_loss
0,generate_ar1_time_series,1,2000,ADWIN,16,[16],0.245260,0.643221
1,generate_ar1_time_series,1,2000,ADWIN,16,[32],0.248427,0.679030
2,generate_ar1_time_series,1,2000,ADWIN,16,[64],0.242088,0.630990
3,generate_ar1_time_series,1,2000,ADWIN,32,[16],0.257208,0.813712
4,generate_ar1_time_series,1,2000,ADWIN,32,[32],0.245249,0.693684
...,...,...,...,...,...,...,...,...
74,generate_ar1_time_series,5,5000,PageHinkley,16,[64],0.305211,0.631470
75,generate_ar1_time_series,5,5000,PageHinkley,32,[16],0.325427,0.752865
76,generate_ar1_time_series,5,5000,PageHinkley,32,[32],0.311813,0.730067
77,generate_ar1_time_series,5,5000,PageHinkley,32,[64],0.299909,0.693432


In [169]:
df[df["n_dim"] == 1]

Unnamed: 0,timeseries,n_dim,n_points,detector,window_size,hidden_layer,train_loss,test_loss
0,generate_ar1_time_series,1,2000,ADWIN,16,[16],0.24526,0.643221
1,generate_ar1_time_series,1,2000,ADWIN,16,[32],0.248427,0.67903
2,generate_ar1_time_series,1,2000,ADWIN,16,[64],0.242088,0.63099
3,generate_ar1_time_series,1,2000,ADWIN,32,[16],0.257208,0.813712
4,generate_ar1_time_series,1,2000,ADWIN,32,[32],0.245249,0.693684
5,generate_ar1_time_series,1,2000,ADWIN,32,[64],0.23623,0.628164
12,generate_ar1_time_series,1,2000,PageHinkley,16,[16],0.238956,0.52928
13,generate_ar1_time_series,1,2000,PageHinkley,16,[32],0.242879,0.572346
14,generate_ar1_time_series,1,2000,PageHinkley,16,[64],0.239004,0.549534
15,generate_ar1_time_series,1,2000,PageHinkley,32,[16],0.249238,0.597112
