## VAE Hawkes Process Estimation - Tutorial

In [113]:
from HAWKES.hawkes import hawkes_simulations, hawkes_simulation
from HAWKES.hyperparameters import hyper_params_simulation
from HAWKES.discretisation import discretise
from UTILS.utils import write_parquet, read_parquet

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [114]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 160_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
train_simulated_events_seqs = hawkes_simulations(mu, alpha, beta, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
# train_discret_simulated_events_seqs = discretise(train_simulated_events_seqs, filename='train_binned_hawkes_simulations.parquet')

In [117]:
import numpy as np
import VARIABLES.hawkes_var as hwk
import Hawkes as hk

def discretise(jump_times: np.ndarray, filename: str = 'binned_hawkes_simulations.parquet') -> np.ndarray:

    # Computed bins number
    num_bins = int(hwk.TIME_HORIZON // hwk.DISCRETISE_STEP)

    # Initialized an array with dimensions (number of processes, number of jumps per unit of time)
    counts = np.zeros((len(jump_times), num_bins), dtype=np.float32)

    # For each process (j), compute jump times histogram (h) using the intervals boundaries specified by the bins
    for j, h in enumerate(jump_times):
        counts[j], _ = np.histogram(h, bins=np.linspace(0, hwk.TIME_HORIZON, num_bins + 1))

    # Written parameters to Parquet file
    write_parquet(counts, columns=list(map(str, range(hwk.TIME_HORIZON))), filename=filename)
    
    return counts

%timeit -r 1 -n 5 discretise(train_simulated_events_seqs)
#%timeit -r 1 -n 5 discretise(train_simulated_events_seqs)

2.24 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 5 loops each)


In [111]:
hawkes_simulations2(train_alpha, train_beta, train_mu)

array([[3.0946476, 3.0970595, 3.0978591, 3.0988655, 3.0998588],
       [1.7390517, 0.       , 0.       , 0.       , 0.       ],
       [0.       , 0.       , 0.       , 0.       , 0.       ]],
      dtype=float32)

In [116]:
read_parquet("train_hawkes_simulations.parquet")

Unnamed: 0,0,1,2,3,4
0,0.031156,0.169415,0.194617,0.210404,0.24105
1,0.01062,0.020434,0.03499,0.133899,0.14968
2,0.016305,0.017539,0.025224,0.079798,0.085918
