## VAE Hawkes Process Estimation - Tutorial

In [118]:
from HAWKES.hawkes import hawkes_simulations, hawkes_simulation
from HAWKES.hyperparameters import hyper_params_simulation
from HAWKES.discretisation import discretise
from UTILS.utils import write_parquet, read_parquet, timer

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [119]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 160_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [120]:
# Full dataset without 0 because simulated events > 100 (Time horizon)

df = read_parquet("hawkes_simulations.parquet")
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.201718,0.549272,0.669291,0.763347,1.340586,2.058666,2.103951,2.295464,2.343328,2.353256,...,20.320431,20.688574,20.870411,20.979383,21.097797,21.144234,22.376024,22.549595,22.559690,22.737068
1,0.263952,0.980489,1.865918,1.877182,1.969191,2.350872,2.376977,2.504217,2.843203,2.876901,...,13.959504,13.994565,14.268679,14.379191,14.409444,14.787704,14.910306,14.930773,15.162373,15.177721
2,0.757565,1.112966,1.237346,1.632213,1.996376,2.208894,2.511039,2.891257,4.220501,5.127620,...,21.033575,21.111570,21.172806,21.241169,21.293085,21.413635,21.878988,22.047525,22.173967,22.390015
3,0.811603,0.834645,1.284420,1.368183,1.441455,1.972347,2.458578,2.890171,3.103446,3.365217,...,22.464075,22.594969,22.711527,22.921701,23.122763,23.201223,23.221540,23.391586,23.422445,23.542561
4,0.161384,0.192304,0.383672,0.474002,0.571019,0.735096,1.154819,1.838846,2.269749,2.273406,...,15.223903,15.910003,16.070644,16.151670,16.341576,17.028374,17.086119,17.091005,17.154428,17.292027
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159995,0.272602,0.283193,0.315043,0.757112,0.846662,0.858606,0.924352,1.335818,1.726262,1.760118,...,20.470461,20.581747,20.582932,20.784786,20.943747,21.231852,21.334955,21.708195,21.775961,21.888159
159996,0.444650,0.526150,0.585518,0.618196,0.625249,0.958345,1.073279,1.229367,1.259306,1.406495,...,16.942986,17.406807,18.143621,18.185246,18.187489,18.511551,18.680811,20.037138,20.116808,20.132349
159997,0.502019,0.559399,0.632138,0.807985,1.077994,1.378767,1.426690,1.581105,1.585015,1.585537,...,15.064438,15.121795,15.356409,15.357042,15.568507,15.618413,15.862170,15.881324,15.988467,16.033043
159998,0.098122,0.398326,1.674030,2.545313,3.021523,3.334394,4.393425,4.442160,4.651843,4.987930,...,23.508976,23.591951,23.595551,24.222773,24.231295,24.410946,24.549894,25.025036,25.511477,25.637531


In [121]:
df = read_parquet("binned_hawkes_simulations.parquet")
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,4.0,1.0,17.0,1.0,7.0,3.0,7.0,4.0,5.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2.0,3.0,6.0,4.0,7.0,7.0,7.0,8.0,7.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,1.0,4.0,3.0,0.0,1.0,3.0,3.0,8.0,2.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2.0,4.0,2.0,5.0,5.0,7.0,2.0,3.0,2.0,6.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,6.0,2.0,3.0,3.0,4.0,7.0,4.0,12.0,5.0,11.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159995,7.0,7.0,4.0,7.0,8.0,3.0,7.0,2.0,5.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
159996,6.0,7.0,7.0,2.0,4.0,3.0,3.0,5.0,10.0,3.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
159997,4.0,8.0,8.0,11.0,12.0,3.0,4.0,8.0,3.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
159998,2.0,1.0,1.0,2.0,4.0,7.0,13.0,8.0,5.0,5.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [122]:
num_zeros = (df == 0).sum(axis=1)
print(num_zeros)

0         79
1         84
2         78
3         76
4         82
          ..
159995    78
159996    80
159997    83
159998    75
159999    77
Length: 160000, dtype: int64
