In [None]:
%load_ext autoreload
%autoreload 2
    
import numpy as np
from matplotlib import pyplot as plt
import iisignature
import copy
import torch
import torch.nn as nn
from sklearn.metrics import accuracy_score, roc_auc_score
from torch.utils.data import Dataset, TensorDataset, DataLoader
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import os
import pickle

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

In [None]:
def generate_multidim_ou_process(n_samples, n_steps, mu=0, theta=0.15, sigma=0.3, n_features=1):
    dt = 1.0
    X = np.zeros((n_samples, n_steps, n_features), dtype=np.float32)
    X[:,0,:] = mu
    for t in range(1, n_steps):
        dX = theta * (mu - X[:, t-1, :]) * dt + sigma * np.random.normal(size=(n_samples, n_features)) * np.sqrt(dt)
        X[:, t, :] = X[:, t-1, :] + dX
    return X

def generate_ou_process(n_samples, n_steps, mu=0, theta=0.15, sigma=0.3, time_feature = False):
    dt = 1.0
    if time_feature:
        X = np.zeros((n_samples, n_steps, 2), dtype=np.float32)
        timeline = np.linspace(0, 1, n_steps)
        X[:,0,1] = mu

    else:
        X = np.zeros((n_samples, n_steps, 1), dtype=np.float32)
        X[:,0,0] = mu
    for t in range(1, n_steps):
        dX = theta * (mu - X[:, t-1, 0]) * dt + sigma * np.random.normal(size=n_samples) * np.sqrt(dt)
        if time_feature:
            X[:, t, 1] = X[:, t-1, 1] + dX
            X[:, t, 0] = timeline[t]
        else:
            X[:, t, 0] = X[:, t-1,0] + dX
    return X

In [None]:
def AddTimeline(X):
    '''
    X: samples, steps, features
    '''
    samples, steps, features = X.shape
    timeline = np.linspace(0, 1, steps)  # shape: (steps,)
    timeline = np.tile(timeline, (samples, 1))  # shape: (samples, steps)
    timeline = timeline[:, :, np.newaxis]  # shape: (samples, steps, 1)

    X_new = np.concatenate((timeline, X), axis=2)  # вставка timeline как первой фичи
    return X_new

In [None]:
def sig_data(X, sig_level) :
    (n_samples, n_steps, n_features) = X.shape
    if (n_features == 1) :
        print('Warning: only 1 feature detected, adding timeline might be needed')
    sig_length = iisignature.siglength(n_features, sig_level)
    Y = np.zeros((n_samples, sig_length), dtype=np.float32)
    for i in range(n_samples):
        Y[i] = iisignature.sig(X[i, :, :], sig_level)
    return Y

def logsig_data(X, sig_level) :
    (n_samples, n_steps, n_features) = X.shape
    if (n_features == 1) :
        print('Warning: only 1 feature detected, adding timeline might be needed')
    s = iisignature.prepare(n_features, sig_level)
    sig_length = iisignature.logsiglength(n_features + 1, sig_level)
    Y = np.zeros((n_samples, sig_length), dtype=np.float32)
    for i in range(n_samples):
        Y[i] = iisignature.logsig( X[i, :, :], s)
    return Y

def RandomizedSignatureBatch(X_batch, k, activation='tanh', seed=None):
    '''
    Compute Randomized Signature with random matrixes A, b shared between samples
    X_batch : (samples, timesteps, features)
    
    activation func: 'linear', 'tanh', 'relu' or callable

    seed : random seed

    Returns Z_batch : (samples, steps, k)
    k-dimensional r-sig at each time step for each sample.
    '''
    X_batch = np.asarray(X_batch)
    samples, n, d = X_batch.shape
    rng = np.random.default_rng(seed)

    A = rng.standard_normal(size=(d, k, k))
    b = rng.standard_normal(size=(d, k))

    Z_batch = np.zeros((samples, n, k))
    Z_batch[:, 0, :] = rng.standard_normal(size=(samples, k))

    if activation == 'linear':
        def sigma(x): return x / np.sqrt(k)
    elif activation == 'tanh':
        sigma = np.tanh
    elif activation == 'relu':
        sigma = lambda x: np.maximum(x, 0)
    elif callable(activation):
        sigma = activation
    else:
        raise ValueError("Unsupported activation")

    dX_batch = np.diff(X_batch, axis=1)

    for j in range(1, n):
        z_prev = Z_batch[:, j-1, :]
        increment = np.zeros_like(z_prev)
        for i in range(d):
            projected = (z_prev @ A[i].T) + b[i]
            increment += sigma(projected) * dX_batch[:, j-1, i:i+1]
        Z_batch[:, j, :] = z_prev + increment

    return Z_batch

In [None]:
def Gen_OU_1_Dataset(train_samples, random_seed) :
    np.random.seed(random_seed)
    steps = 20
    train_1 = generate_ou_process(int(train_samples * 0.5), steps, 0, 0.2, 0.4)
    train_2 = generate_ou_process(int(train_samples * 0.5), steps, 0, 0.15, 0.3)
    train = np.concatenate([train_1, train_2], axis=0)
    train_labels = np.zeros((train_samples)).astype(int)
    train_labels[:int(train_samples * 0.5)] = 1
    return train, train_labels

In [None]:
def Gen_OU_2_Dataset(train_samples, random_seed) :
    np.random.seed(random_seed)
    steps = 20
    features = 10
    train_1 = generate_multidim_ou_process(train_samples//2, steps, 0, 0.15, 0.3, features)
    for i in range(len(train_1)):
        train_1[i, :, i%features] = generate_ou_process(1, steps, 0, 0.3, 0.6).reshape(steps)
    train_2 = generate_multidim_ou_process(train_samples//2, steps, 0, 0.15, 0.3, features)
    train = np.concatenate([train_1, train_2], axis=0)
    train_labels = np.zeros((train_samples)).astype(int)
    train_labels[:train_samples//2] = 1
    return train, train_labels

In [None]:
class NN(nn.Module):
    def __init__(self, input_dim, hidden_layers, dropout_rate):
        super(NN, self).__init__()

        layers = []
        in_features = input_dim

        for hidden_dim in hidden_layers:
            linear = nn.Linear(in_features, hidden_dim)
            nn.init.kaiming_normal_(linear.weight, nonlinearity='leaky_relu', a=0.01)
            layers.append(linear)
            layers.append(nn.BatchNorm1d(hidden_dim))
            layers.append(nn.LeakyReLU(negative_slope=0.01))
            layers.append(nn.Dropout(p=dropout_rate))
            in_features = hidden_dim

        self.hidden = nn.Sequential(*layers)
        self.output = nn.Linear(in_features, 1)
        nn.init.xavier_normal_(self.output.weight, gain = nn.init.calculate_gain('sigmoid'))  # можно использовать Xavier для последнего слоя

    def forward(self, x):
        x = self.hidden(x)
        x = torch.sigmoid(self.output(x))
        return x

In [None]:
class DualMetricEarlyStopping:
    def __init__(self, patience, min_delta):
        """
        patience: int — сколько эпох ждать перед остановкой
        min_delta: float — минимальное относительное улучшение (0.01 = 1%)
        """
        self.patience = patience
        self.min_delta = min_delta
        self.history = []

    def step(self, accuracy, roc_auc):
        self.history.append((accuracy, roc_auc))
        
        if len(self.history) <= self.patience:
            return False 

        recent = self.history[-self.patience-1:]
        base_acc, base_auc = recent[0]
        improved = False

        for acc, auc in recent[1:]:
            acc_gain = (acc - base_acc) / max(base_acc, 1e-6)
            auc_gain = (auc - base_auc) / max(base_auc, 1e-6)

            if acc_gain >= self.min_delta or auc_gain >= self.min_delta:
                improved = True
                break

        return not improved
        
def train_NN(model_params, train_params, X, y):
    num_epochs = train_params["num_epochs"]
    lr = train_params["lr"]
    batch_size = train_params["batch_size"]
    test_size = train_params["test_size"]
    weight_decay = train_params["weight_decay"]
    patience = train_params["patience"]
    min_delta = train_params["min_delta"]

    device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, shuffle=True, stratify=y, random_state=42)

    if (train_params["standard_scaler"]) :
        scaler = StandardScaler()
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)
    
    X_train = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1).to(device)
    X_test = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test = torch.tensor(y_test, dtype=torch.float32).unsqueeze(1).to(device)

    train_dataset = TensorDataset(X_train, y_train)
    test_dataset = TensorDataset(X_test, y_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    input_size = X.shape[1]
    output_size = 1
    model = NN(input_size,
               model_params["hidden_layers"],
               model_params["dropout_rate"]
              ).to(device)

    criterion = nn.BCELoss()
    optimizer = optim.AdamW(model.parameters(), lr = lr, weight_decay=weight_decay)
    
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=num_epochs//20, gamma=0.8)

    best_acc = 0
    best_acc_epoch = -1
    best_roc_auc = 0
    best_roc_auc_epoch = -1
    best_metrics = {}

    early_stopper = DualMetricEarlyStopping(patience, min_delta)

    for epoch in range(num_epochs):
        model.train()
        for batch_X, batch_y in train_loader:
            outputs = model(batch_X)
            loss = criterion(outputs, batch_y)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
        scheduler.step()

        model.eval()
        with torch.no_grad():
            all_outputs = []
            all_labels = []
            for batch_X, batch_y in test_loader:
                outputs = model(batch_X)
                predicted = (outputs > 0.5).float()
                all_outputs.extend(outputs.cpu().numpy())
                all_labels.extend(batch_y.cpu().numpy())

            all_outputs = np.array(all_outputs).flatten()
            all_labels = np.array(all_labels).flatten()

            accuracy = (all_labels == (all_outputs > 0.5)).mean()
            roc_auc = roc_auc_score(all_labels, all_outputs)

            if best_acc < accuracy:
                best_acc = accuracy
                best_acc_epoch = epoch+1
                
            if best_roc_auc < roc_auc:
                best_roc_auc = roc_auc
                best_roc_auc_epoch = epoch+1

        print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}, Roc-auc: {100 * roc_auc:.2f}%, Accuracy: {100*accuracy:.2f}, LR:{scheduler.get_last_lr()[0]:.6f}")

        if early_stopper.step(accuracy, roc_auc):
                print(f"Early stopping at epoch {epoch+1}")
                break

    best_metrics = {
        "accuracy" : best_acc,
        "roc_auc" : best_roc_auc,
        "best_acc_epoch" : best_acc_epoch,
        "best_roc_auc_epoch" : best_roc_auc_epoch
    }
    return best_metrics

In [None]:
X, y = Gen_OU_1_Dataset(50_000, 123)

In [None]:
X_t = AddTimeline(X)
X_sig = sig_data(X_t, 8)

In [None]:
print(X.shape)
print(X_sig.shape)

In [None]:
model_params = {
    "hidden_layers": [],
    "dropout_rate" : 0.2
}

train_params = {
    "num_epochs": 500,
    "lr": 0.001,
    "batch_size": 64,
    "test_size": 0.05,
    "weight_decay": 1e-4,
    "standard_scaler": True,
    "patience" : 50,
    "min_delta" : 0.01
}

models = {
    "1 hl" : [64],
    "2 hl" : [128, 64],
    "3 hl" : [256, 128, 64],
    "4 hl" : [512, 256, 128, 64],
    "5 hl" : [1024, 512, 256, 128, 64]
}

In [None]:
print(np.arange(10, 520, 20))

In [None]:
metrics = {}
for key, val in models.items():
    print(key, val)
    model_params_ = copy.deepcopy(model_params)
    model_params_["hidden_layers"] = val
    metrics[key] = {}
    for sig_len in np.arange(10, 520, 20):
        print(model_params_)
        metrics[key][sig_len] = train_NN(model_params_, train_params, X.reshape(len(X),-1), y)
        print(sig_len)
        print(metrics[key][sig_len])
        with open('testing_var_sig_data/ou1_metrics_sig.pk1', 'wb') as f:
            pickle.dump(metrics, f)

In [None]:
def plot_model_metrics(metric_dict):
    plt.figure(figsize=(12, 5))

    # Plot Accuracy
    plt.subplot(1, 2, 1)
    for model_name, data in metric_dict.items():
        x = sorted(data.keys())
        acc = [data[k]['accuracy'] for k in x]
        plt.plot(x, acc, marker='o', label=model_name)
    plt.title('Accuracy vs Input Length')
    plt.xlabel('Input Length (Features)')
    plt.ylim(0, 1)
    plt.yticks(np.arange(0.05, 1.05, 0.05))
    plt.ylabel('Accuracy')
    plt.grid(True)
    plt.legend()

    # Plot ROC AUC
    plt.subplot(1, 2, 2)
    for model_name, data in metric_dict.items():
        x = sorted(data.keys())
        roc = [data[k]['roc_auc'] for k in x]
        plt.plot(x, roc, marker='s', label=model_name)
    plt.title('ROC AUC vs Input Length')
    plt.xlabel('Input Length (Features)')
    plt.ylabel('ROC AUC')
    plt.ylim(0,1)
    plt.yticks(np.arange(0.05, 1.05, 0.05))
    plt.grid(True)
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_model_metrics(metrics)