## VAE Hawkes Process Estimation - Tutorial

In [2]:
import os

import numpy as np
import pandas as pd

from dl.mlp_model import MLPTrainer
import variables.prep_var as prep
from hawkes.simulation import hawkes_simulations, hawkes_simulation
from hawkes.hyperparameters import hyper_params_simulation
from hawkes.discretisation import discretise
from dl.linear_model import linear_model
from tools.utils import write_csv, write_parquet, read_parquet, timer, parquet_to_csv, read_csv
from preprocessing.dataset import split_data, create_datasets, create_data_loaders

%load_ext autoreload
%autoreload 2

### Preprocessing

In [2]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 100_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [6]:
# Data Preparation

x = read_parquet("binned_hawkes_simulations.parquet")
y = read_parquet('hawkes_hyperparams.parquet')

train_x, train_y, val_x, val_y, test_x, test_y = split_data(x, y.iloc[:, [0, 2]])
train_dataset, val_dataset, test_dataset = create_datasets(train_x, train_y, val_x, val_y, test_x, test_y)
train_loader, val_loader, test_loader = create_data_loaders(train_dataset, val_dataset, test_dataset)

In [75]:
# Trained model

model, train_losses, val_losses, val_y_pred, val_eta, val_mu = MLPTrainer().train_model(train_loader, val_loader, val_x, val_y)
%load_ext tensorboard

Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Param %                   Kernel Shape              Mult-Adds                 Trainable
MLP                                      [10000, 100]              [10000, 2]                --                             --                   --                        --                        True
├─Linear: 1-1                            [10000, 100]              [10000, 100]              10,100                     14.25%                   --                        101,000,000               True
├─ModuleList: 1-2                        --                        --                        --                             --                   --                        --                        True
│    └─Sequential: 2-1                   [10000, 100]              [10000, 100]              --                             --                   --                        --              

Epoch 319/500 - train_loss: 0.2649, val_loss: 0.2900:  64%|[32m██████▍   [0m| 319/500 [53:48<30:31, 10.12s/it]  

Unexpected exception formatting exception. Falling back to standard exception



Traceback (most recent call last):
  File "C:\Users\Nicolas Girard\AppData\Roaming\Python\Python311\site-packages\IPython\core\interactiveshell.py", line 3505, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "C:\Users\Nicolas Girard\AppData\Local\Temp\ipykernel_6260\3997827302.py", line 3, in <module>
    model, train_losses, val_losses, val_y_pred, val_eta, val_mu = MLPTrainer().train_model(train_loader, val_loader, val_x, val_y)
                                                                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nicolas Girard\Documents\VAE_HAWKES_PROCESS_ESTIMATION\src\tools\utils.py", line 376, in wrapper
    result = func(*_args, **kwargs)
             ^^^^^^^^^^^^^^^^^^^^^^
  File "c:\Users\Nicolas Girard\Documents\VAE_HAWKES_PROCESS_ESTIMATION\src\dl\mlp_model.py", line 319, in train_model
    pbar.update(1)
                   
  File "c:\Users\Nicolas Girard\AppData\Local\Programs\Python\P

In [23]:
# Tested model

# test_y_pred, test_loss, test_eta, test_mu = MLPTrainer().test_model(test_loader, test_y)
# %load_ext tensorboard

# α and β estimation (linear model)

# param_pred, alpha_pred, beta_pred = linear_model(train_x, train_y, val_x)

Test set - Test loss: 5.0181, Estimated branching ratio (η): -0.0096, Estimated baseline intensity (µ): -0.0436
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [None]:
# Tests and comparison (Testing Effects of Parameters = β, η, ∆, E)

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.2, b = 0.6)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 200
# Number of tests = 100

# Intensity Decay Parameter (β) | Branching Ratio (η) | Interval Length (∆) | Expected Activity (E)
                                                                   
#          [0.5,2.5]                   [0.1,0.4]               0.25                   50
#         [1.75,3.75]                  [0.3,0.6]               0.5                    100
#            [3,5]                     [0.5,0.8]                1                     250
#           [0.5,3]                    [0.1,0.6]                2                     500
#           [1.5,4]                    [0.2,0.7]                5                     1000
#           [2.5,5]                    [0.3,0.8]
#           [0.5,4]                    [0.05,0.6]
#           [1.5,5]                    [0.05,0.7]
#           [0.5,5]                    [0.05,0.8]

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')