## VAE Hawkes Process Estimation - Tutorial

In [9]:
import os
import sys

import numpy as np
import pandas as pd
import Hawkes as hk
from matplotlib import pyplot as plt

from PREPROCESSING.hawkes import hawkes_simulations
from PREPROCESSING.hyperparameters import hyper_params_simulation
from PREPROCESSING.discretisation import discretise
import VARIABLES.variables as var

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


### Preprocessing

In [88]:
# Training dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Training processes = 100_000

# Hawkes process hyper-parameters generation
train_params, train_alpha, train_beta, train_mu = hyper_params_simulation(filename="train_hawkes_hyperparams.csv")
# Hawkes processes simulations
train_simulated_events_seqs = hawkes_simulations(train_mu, train_alpha, train_beta, filename='train_hawkes_simulations.csv')
# Discrétiser les processus de Hawkes
train_discret_simulated_events_seqs = discretise(train_simulated_events_seqs, filename='train_binned_hawkes_simulations.csv')

In [None]:
# Validating dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Training processes = 30_000

# Hawkes process hyper-parameters generation
val_params, val_alpha, val_beta, val_mu = hyper_params_simulation(filename="val_hawkes_hyperparams.csv")
# Hawkes processes simulations
val_simulated_events_seqs = hawkes_simulations(val_mu, val_alpha, val_beta, filename='val_hawkes_simulations.csv')
# Discrétiser les processus de Hawkes
val_discret_simulated_events_seqs = discretise(val_simulated_events_seqs, filename='val_binned_hawkes_simulations.csv')

In [None]:
# Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Testing processes = 20_000

# Hawkes process hyper-parameters generation
test_params, test_alpha, test_beta, test_mu = hyper_params_simulation(filename="test_hawkes_hyperparams.csv")
# Hawkes processes simulations
test_simulated_events_seqs = hawkes_simulations(test_mu, test_alpha, test_beta, filename='test_hawkes_simulations.csv')
# Discrétiser les processus de Hawkes
test_discret_simulated_events_seqs = discretise(test_simulated_events_seqs, filename='test_binned_hawkes_simulations.csv')

In [10]:
from UTILS.utils import write_csv
import Hawkes as hk

train_params, train_alpha, train_beta, train_mu = hyper_params_simulation(filename="train_hawkes_hyperparams.csv")

def hawkes_simulation(params={"mu": 0.1, "alpha": 0.5, "beta": 10.0}):
    # Created a Hawkes process with the given kernel, baseline and parameters
    hawkes_process = hk.simulator().set_kernel(var.KERNEL).set_baseline(var.BASELINE).set_parameter(params)
    # Simulated a Hawkes process in the given time interval
    T = hawkes_process.simulate([var.TIME_ITV_START, var.TIME_HORIZON])
    
    # Plotted the number of events and intensity over time (don't work with many iteration)
    # hawkes_process.plot_N()
    # hawkes_process.plot_l()
    
    return hawkes_process, T

def hawkes_simulations(mu, alpha, beta, filename='hawkes_simulations.csv'):
    # Initialize a filled with zeros array to store Hawkes processes (Pre-allocate memory)
    simulated_events_seqs = np.zeros((var.PROCESS_NUM, var.TIME_HORIZON), dtype=np.float64)

    for k in range(var.PROCESS_NUM):
        # Simulate a Hawkes processes with the current simulation parameters
        # The results are stored in the k-th row of the simulated_events_seqs array
        _, T = hawkes_simulation(params={"mu": mu[k], "alpha": alpha[k], "beta": beta[k]})
        
        # Convert temporary list T to an array and store the results in simulated_events_seqs
        simulated_events_seqs[k,:] = np.asarray(T)[:var.TIME_HORIZON]

    # Created a DataFrame, name the columns, and generate csv file
    df = pd.DataFrame(np.row_stack(simulated_events_seqs))
    df.to_csv(f"{var.FILEPATH}{filename}", index=False)

    return simulated_events_seqs

def hawkes_simulations2(mu, alpha, beta, filename='hawkes_simulations.csv'):
    # Initialize a filled with zeros array to store Hawkes processes (Pre-allocate memory)
    simulated_events_seqs = np.zeros((var.PROCESS_NUM, var.TIME_HORIZON), dtype=np.float64)

    for k in range(var.PROCESS_NUM):
        # Simulate a Hawkes processes with the current simulation parameters
        # The results are stored in the k-th row of the simulated_events_seqs array
        _, T = hawkes_simulation(params={"mu": mu[k], "alpha": alpha[k], "beta": beta[k]})
        
        # Convert temporary list T to an array and store the results in simulated_events_seqs
        simulated_events_seqs[k,:] = np.asarray(T)[:var.TIME_HORIZON]

    # Convert the simulated_events_seqs array to a list of dictionaries
    processes = [{"seq": seq} for seq in simulated_events_seqs]

    # Write the data to a CSV file using the custom function
    write_csv(processes, filepath=f"{var.FILEPATH}{filename}")

    return simulated_events_seqs


%timeit hawkes_simulations(train_alpha, train_beta, train_mu)
%timeit hawkes_simulations2(train_alpha, train_beta, train_mu)