## VAE Hawkes Process Estimation - Tutorial

In [130]:
from HAWKES.hawkes import hawkes_simulations, hawkes_simulation
from HAWKES.hyperparameters import hyper_params_simulation
from HAWKES.discretisation import discretise
from UTILS.utils import write_parquet, read_parquet

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [131]:
# Training/Validation/Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Number of processes = 160_000

# Hawkes process hyper-parameters generation
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")

# Hawkes processes simulations
simulated_events_seqs = hawkes_simulations(mu, alpha, beta, filename='hawkes_simulations.parquet')

# Discrétiser les processus de Hawkes
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')

In [127]:
import time

# Start timer
start_total = time.perf_counter()

# Hawkes process hyper-parameters generation
start_params = time.perf_counter()
params, alpha, beta, mu = hyper_params_simulation(filename="hawkes_hyperparams.parquet")
end_params = time.perf_counter()
time_params = end_params - start_params

# Hawkes processes simulations
start_simulations = time.perf_counter()
simulated_events_seqs = hawkes_simulations(mu, alpha, beta, filename='hawkes_simulations.parquet')
end_simulations = time.perf_counter()
time_simulations = end_simulations - start_simulations

# Discretisation of the simulated events
start_discretisation = time.perf_counter()
discret_simulated_events_seqs = discretise(simulated_events_seqs, filename='binned_hawkes_simulations.parquet')
end_discretisation = time.perf_counter()
time_discretisation = end_discretisation - start_discretisation

# End timer
end_total = time.perf_counter()
time_total = end_total - start_total

# Print times
print(f"Hyper-parameters generation: {time_params:.3f} seconds")
print(f"Hawkes process simulations: {time_simulations:.3f} seconds")
print(f"Discretisation of the events: {time_discretisation:.3f} seconds")
print(f"Total execution time: {time_total:.3f} seconds")


Hyper-parameters generation: 0.002 seconds
Hawkes process simulations: 0.015 seconds
Discretisation of the events: 0.002 seconds
Total execution time: 0.019 seconds


In [158]:
import numpy as np

columns = np.arange(hwk.TIME_HORIZON, dtype=np.int32).astype(str)
columns

array(['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12',
       '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23',
       '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34',
       '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45',
       '46', '47', '48', '49', '50', '51', '52', '53', '54', '55', '56',
       '57', '58', '59', '60', '61', '62', '63', '64', '65', '66', '67',
       '68', '69', '70', '71', '72', '73', '74', '75', '76', '77', '78',
       '79', '80', '81', '82', '83', '84', '85', '86', '87', '88', '89',
       '90', '91', '92', '93', '94', '95', '96', '97', '98', '99'],
      dtype='<U11')

In [178]:
import numpy as np
import pandas as pd

# Paramètres de simulation
process_num = 160000
time_horizon = 100
# Génération de données simulées
data = np.random.randn(process_num, time_horizon)

start_1 = time.perf_counter()
columns = np.arange(time_horizon).astype(str)
end_1 = time.perf_counter()
total_1 = end_1 - start_1

start_2 = time.perf_counter()
columns = np.arange(hwk.TIME_HORIZON, dtype=np.int32).astype(str)
end_2 = time.perf_counter()
total_2 = end_2 - start_2

start_3 = time.perf_counter()
columns = np.char.mod('%d', np.arange(time_horizon))
end_3 = time.perf_counter()
total_3 = end_3 - start_3


# Création du DataFrame
df = pd.DataFrame(data, columns=columns, dtype=np.float32)

# Affichage des premières lignes du DataFrame
print(f"Total execution time n°1: {total_1:.6f} seconds")
print(f"Total execution time n°2: {total_2:.6f} seconds")
print(f"Total execution time n°3: {total_3:.6f} seconds")

Total execution time n°1: 0.000093 seconds
Total execution time n°2: 0.000062 seconds
Total execution time n°3: 0.000092 seconds


In [129]:
read_parquet("hawkes_simulations.parquet")

Unnamed: 0,0,1,2,3,4
0,0.008824,0.015973,0.030029,0.03204,0.070994
1,0.002417,0.031233,0.037525,0.062274,0.079062
2,0.013975,0.019324,0.034401,0.035873,0.038397
