## VAE Hawkes Process Estimation - Tutorial

In [113]:
import os

import numpy as np
import pandas as pd

from DL.mlp_model import MLPTrainer
from VARIABLES import preprocessing_var as prep
from HAWKES.hawkes import hawkes_simulations, hawkes_simulation
from HAWKES.hyperparameters import hyper_params_simulation
from HAWKES.discretisation import discretise
from DL.linear_model import linear_model
from UTILS.utils import write_csv, write_parquet, read_parquet, timer, parquet_to_csv, read_csv
from PREPROCESSING.dataset import split_data, create_datasets, create_data_loaders

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [114]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 100_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [130]:
# Data Preparation

x = read_parquet("binned_hawkes_simulations.parquet")
y = read_parquet('hawkes_hyperparams.parquet')

train_x, train_y, val_x, val_y, test_x, test_y = split_data(x, y.iloc[:, [0, 2]])
train_dataset, val_dataset, test_dataset = create_datasets(train_x, train_y, val_x, val_y, test_x, test_y)
train_loader, val_loader, test_loader = create_data_loaders(train_dataset, val_dataset, test_dataset)

In [133]:
# Trained model

model, train_losses, val_losses, val_y_pred, val_eta, val_mu = MLPTrainer().train_model(train_loader, val_loader, val_x, val_y)
%load_ext tensorboard

  action_fn=lambda data: sys.getsizeof(data.storage()),
  return super().__sizeof__() + self.nbytes()


Layer (type:depth-idx)                   Input Shape               Output Shape              Param #                   Param %                   Kernel Shape              Mult-Adds                 Trainable
MLP                                      [128, 100]                [128, 2]                  --                             --                   --                        --                        True
├─ModuleList: 1-11                       --                        --                        (recursive)               (recursive)               --                        --                        True
│    └─Linear: 2-1                       [128, 100]                [128, 100]                10,100                     16.61%                   --                        1,292,800                 True
├─ReLU: 1-2                              [128, 100]                [128, 100]                --                             --                   --                        --              

Training Progress:   0%|[32m          [0m| 0/500 [00:00<?, ?it/s]

In [118]:
# Tested model

test_y_pred, test_loss, test_eta, test_mu = MLPTrainer().test_model(test_loader, test_y)
%load_ext tensorboard

Test set - Test loss: 4.8846, Estimated branching ratio (η): -0.1058, Estimated baseline intensity (µ): 0.0142
The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [126]:
# α and β estimation (linear model)

param_pred, alpha_pred, beta_pred = linear_model(val_y_pred, train_x, val_x, params)

ValueError: operands could not be broadcast together with shapes (100000,) (40000,) 

In [None]:
# Tests and comparison (Testing Effects of Parameters = β, η, ∆, E)

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.2, b = 0.6)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 200
# Number of tests = 100

# Intensity Decay Parameter (β) | Branching Ratio (η) | Interval Length (∆) | Expected Activity (E)
                                                                   
#          [0.5,2.5]                   [0.1,0.4]               0.25                   50
#         [1.75,3.75]                  [0.3,0.6]               0.5                    100
#            [3,5]                     [0.5,0.8]                1                     250
#           [0.5,3]                    [0.1,0.6]                2                     500
#           [1.5,4]                    [0.2,0.7]                5                     1000
#           [2.5,5]                    [0.3,0.8]
#           [0.5,4]                    [0.05,0.6]
#           [1.5,5]                    [0.05,0.7]
#           [0.5,5]                    [0.05,0.8]

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [121]:
import VARIABLES.evaluation_var as eval

read_parquet('2023_04_27_14_29_36_MICSHPEG8GIRARD_PRED.parquet', folder=os.path.join(eval.LOGDIRUN, 'TRAINING', eval.RUN_NAME))

Unnamed: 0,val_eta_true,val_mu_true,val_eta_pred,val_mu_pred
0,0.289340,3.578383,0.236507,3.864950
1,0.116476,4.999648,0.221661,3.946149
2,0.724130,1.420453,0.559577,2.149976
3,0.710604,1.188248,0.663752,1.674449
4,0.691518,1.293628,0.635288,1.804378
...,...,...,...,...
39995,0.339272,3.285681,0.475098,2.559999
39996,0.509901,2.245607,0.385080,3.052341
39997,0.691510,1.614110,0.703228,1.481999
39998,0.252738,3.516221,0.329935,3.353953
