## VAE Hawkes Process Estimation - Tutorial

In [118]:
from HAWKES.hawkes import hawkes_simulations, hawkes_simulation
from HAWKES.hyperparameters import hyper_params_simulation
from HAWKES.discretisation import discretise
from UTILS.utils import write_parquet, read_parquet, timer

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [71]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 160_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(alpha, beta, mu, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [77]:
# Full dataset without 0 because simulated events > 100 (Time horizon)

df = read_parquet("hawkes_simulations.parquet")
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,90,91,92,93,94,95,96,97,98,99
0,0.103629,0.160154,0.467746,0.551615,0.903304,0.971267,1.052583,1.113889,1.427181,1.470723,...,13.915068,14.060807,14.219495,14.295300,14.302876,14.304322,14.328323,14.358667,14.935146,15.147425
1,0.766224,1.111592,1.169891,1.405977,2.267317,2.421949,2.661689,3.850107,4.134073,4.422656,...,22.942625,23.000401,23.262337,23.265650,23.351070,23.574991,23.577011,23.653563,23.658110,23.743011
2,0.073781,0.121855,0.921330,1.535974,2.109995,2.221177,2.957004,3.015258,3.166239,3.578259,...,16.342535,16.439287,17.879131,18.008806,18.055981,18.234209,18.322327,18.744061,18.848660,18.928692
3,0.124446,0.278202,0.770830,0.827540,1.055858,1.231543,1.256835,1.303067,1.382225,1.411622,...,20.733007,20.866299,21.065048,21.380539,21.750008,23.105583,23.279594,23.541523,23.595928,23.621233
4,0.208216,0.285793,0.632931,0.806604,0.850676,0.906225,0.973882,1.323920,1.439582,2.439882,...,17.309759,17.908943,18.066502,18.177301,18.394859,18.669397,18.856340,19.158443,19.629421,19.714268
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
159995,0.168257,0.503672,0.509493,0.658226,0.928102,0.949315,1.016766,1.075959,1.247155,1.417213,...,20.190525,20.296770,20.321655,20.444820,20.765930,20.983368,21.182665,21.267517,21.636131,22.679684
159996,0.085979,0.847283,1.047570,1.054174,1.534695,1.610753,2.052204,2.268371,2.335621,2.503865,...,23.633549,23.791418,23.792921,23.869625,24.100950,24.367620,25.568586,25.570606,25.621109,25.664053
159997,0.381597,0.682949,0.714562,0.768450,0.978447,1.547891,1.929200,2.323916,2.379158,2.483685,...,20.196228,20.414068,20.668736,20.837576,20.864161,21.108917,21.449650,21.516291,21.832479,22.000639
159998,1.580631,1.955691,2.340539,2.343436,2.584889,4.036587,4.194083,4.610888,4.643360,4.689805,...,21.980633,22.046173,22.888506,23.271973,23.375751,24.268978,24.401814,24.736656,25.161446,25.331141


In [75]:
num_zeros = (df == 0).sum(axis=1)
print(num_zeros)

0            84
1            77
2            81
3            79
4            80
           ... 
15999995    100
15999996    100
15999997    100
15999998    100
15999999    100
Length: 16000000, dtype: int64
