## VAE Hawkes Process Estimation - Tutorial

In [1]:
import os
import sys

import numpy as np
import pandas as pd
import Hawkes as hk
from matplotlib import pyplot as plt

from PREPROCESSING.hawkes import hawkes_simulations, hawkes_simulation
from PREPROCESSING.hyperparameters import hyper_params_simulation
from PREPROCESSING.discretisation import discretise
import VARIABLES.variables as var

%load_ext autoreload
%autoreload 2

### Preprocessing

In [2]:
# Training dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Training processes = 100_000

# Hawkes process hyper-parameters generation
train_params, train_alpha, train_beta, train_mu = hyper_params_simulation(filename="train_hawkes_hyperparams.csv")

# Hawkes processes simulations
train_simulated_events_seqs = hawkes_simulations(train_mu, train_alpha, train_beta, filename='train_hawkes_simulations.csv')

# Discrétiser les processus de Hawkes
train_discret_simulated_events_seqs = discretise(train_simulated_events_seqs, filename='train_binned_hawkes_simulations.csv')

In [None]:
# Validating dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Training processes = 30_000

# Hawkes process hyper-parameters generation
val_params, val_alpha, val_beta, val_mu = hyper_params_simulation(filename="val_hawkes_hyperparams.csv")
# Hawkes processes simulations
val_simulated_events_seqs = hawkes_simulations(val_mu, val_alpha, val_beta, filename='val_hawkes_simulations.csv')
# Discrétiser les processus de Hawkes
val_discret_simulated_events_seqs = discretise(val_simulated_events_seqs, filename='val_binned_hawkes_simulations.csv')

In [None]:
# Testing dataset generation

# Intensity Decay Parameter (β) = U(p = 1, q = 3)
# Branching Ratio (η) = U(a = 0.05, b = 0.8)
# Expected Activity (E) = 500
# Time Horizon (T) = 100
# Interval Length (∆) = 1
# Testing processes = 20_000

# Hawkes process hyper-parameters generation
test_params, test_alpha, test_beta, test_mu = hyper_params_simulation(filename="test_hawkes_hyperparams.csv")
# Hawkes processes simulations
test_simulated_events_seqs = hawkes_simulations(test_mu, test_alpha, test_beta, filename='test_hawkes_simulations.csv')
# Discrétiser les processus de Hawkes
test_discret_simulated_events_seqs = discretise(test_simulated_events_seqs, filename='test_binned_hawkes_simulations.csv')

In [14]:
from mpi4py import MPI
from typing import Tuple
from UTILS.utils import write_csv

def hyper_params_simulation(filename: str = "hawkes_hyperparams.csv") -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

    # Generated random vectors of size PROCESS_NUM
    epsilon = np.random.normal(var.EXPECTED_ACTIVITY, var.STD, var.PROCESS_NUM)
    eta = np.random.uniform(var.MIN_ITV_ETA, var.MAX_ITV_ETA, var.PROCESS_NUM)
    beta = np.random.uniform(var.MIN_ITV_BETA, var.MAX_ITV_BETA, var.PROCESS_NUM)

    # Calculated alpha/mu vectors from beta/eta vectors (alpha = eta because of library exponential formula)
    alpha = eta
    mu = (epsilon / var.TIME_HORIZON) * (1 - eta)

    # Created dictionaries list containing the parameters
    params = list(map(lambda a, b, m: {"alpha": a, "beta": b, "mu": m}, alpha, beta, mu)) 

    # Written parameters to a CSV file 
    write_csv(params, filename=filename) 

    return np.array([alpha, beta, mu], dtype=np.float64).T, alpha, beta, mu


def hyper_params_simulation2(root: int = 0, filename: str = "hawkes_hyperparams.csv") -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:

    # Initialized MPI
    comm = MPI.COMM_WORLD
    rank = comm.Get_rank()
    size = comm.Get_size()

    # Generated random vectors on root process
    if rank == 0:
        epsilon = np.random.normal(var.EXPECTED_ACTIVITY, var.STD, var.PROCESS_NUM)
        eta = np.random.uniform(var.MIN_ITV_ETA, var.MAX_ITV_ETA, var.PROCESS_NUM)
        beta = np.random.uniform(var.MIN_ITV_BETA, var.MAX_ITV_BETA, var.PROCESS_NUM)

    # Broadcast random vectors to all processes
    epsilon = comm.bcast(epsilon, root=root)
    eta = comm.bcast(eta, root=root)
    beta = comm.bcast(beta, root=root)

    # Divided vectors indices among processes
    indices = np.array_split(range(var.PROCESS_NUM), size)

    # Scattered indices to all processes
    indices = comm.scatter(indices, root=root)
    
    # Calculated alpha/mu vectors in parallel
    alpha = np.zeros(var.PROCESS_NUM, dtype=np.float64)
    mu = np.zeros(var.PROCESS_NUM, dtype=np.float64)
    
    alpha[indices] = eta[indices]
    mu[indices] = (epsilon[indices] / var.TIME_HORIZON) * (1 - eta[indices])

    # Reduced alpha/mu vectors from all processes to root process
    comm.Reduce(alpha, np.zeros(var.PROCESS_NUM, dtype=np.float64), op=MPI.SUM, root=root)
    comm.Reduce(mu, np.zeros(var.PROCESS_NUM, dtype=np.float64), op=MPI.SUM, root=root)

    # Written CSV file on the root process
    if rank == 0:
        params = [{"alpha": a, "beta": b, "mu": m} for a, b, m in zip(alpha, beta, mu)]
        write_csv(params, filename=filename)

        return np.array([alpha, beta, mu], dtype=np.float64).T, alpha, beta, mu
    

%timeit hyper_params_simulation()
%timeit hyper_params_simulation2()

341 µs ± 25.9 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)
4 0


ValueError: expecting 1 items, got 4

In [13]:
import numpy as np
from tqdm import tqdm

# données d'entrée
X = np.array([[1, 2], [3, 4], [5, 6], [7, 8], [9, 10]])

# valeur moyenne cible pour chaque colonne
mean = np.array([2, 5])

# écart-type de chaque colonne
std = np.array([0.5, 1])

# bruit aléatoire
noise = np.random.normal(0, 0.1, size=(X.shape[0], X.shape[1]))

# ajouter le bruit à X
X_noisy = X + noise

# mettre à l'échelle X_noisy avec la moyenne et l'écart-type cibles
X_scaled = (X_noisy - np.mean(X_noisy, axis=0)) / np.std(X_noisy, axis=0) * std + mean

with tqdm(total=len(X_scaled), desc="Computing mean") as pbar:
    # Calcul de la moyenne
    mean = 0
    for val in X_scaled:
        mean += val
        pbar.update(1)
        
    mean /= len(X_scaled)
    # Affichage de la description et de la moyenne
    tqdm.write(f"Mean computed: {mean}")


Computing mean: 100%|██████████| 5/5 [00:00<00:00, 1450.01it/s]

Mean computed: [2. 5.]



