### This notebook tries to fit tick's EM model to the real world datasets to get a feeling what their kernels look like. Because the EM algorithm takes a single (long) path, we concatenate the paths of the real world datasets.

In [1]:
import pickle
import numpy as np
import matplotlib.pyplot as plt

from tick.plot import plot_basis_kernels, plot_hawkes_kernels
from tick.hawkes import SimuHawkes, HawkesKernelTimeFunc, HawkesBasisKernels, HawkesEM

In [2]:
with open("../../data/evaluation/hawkes/mimic_II.pkl", "rb") as f:
    mimic = pickle.load(f)
    
with open("../../data/evaluation/hawkes/mooc.pkl", "rb") as f:
    mooc = pickle.load(f)
    
with open("../../data/evaluation/hawkes/stackOverflow.pkl", "rb") as f:
    stack = pickle.load(f)
    
with open("../../data/evaluation/hawkes/retweet.pkl", "rb") as f:
    retweet = pickle.load(f)

In [3]:
def get_number_of_marks(types_data):
    return np.unique(np.concatenate(types_data)).max() + 1

def merge_to_single_path(dataset):
    """
    Merge all paths to a single path.
    We return the timestamps for every mark separately.
    """
    num_marks = get_number_of_marks(dataset['types'])
    res = [[] for _ in range(num_marks)]
    for path_idx in range(len(dataset['types'])):
        marks = np.unique(dataset['types'][path_idx])
        for mark in marks:
            prev_time = 0
            if len(res[mark]) > 0 and res[mark][-1] != 0:
                prev_time = res[mark][-1]
            time_stamps = dataset['timestamps'][path_idx][dataset['types'][path_idx] == mark]
            # Add the previous time to the timestamps
            time_stamps = [time + prev_time for time in time_stamps]
            res[mark] += time_stamps
            
    for i in range(len(res)):
        res[i] = np.array(res[i])
    return res

In [4]:
em = HawkesEM(1, kernel_size=100, n_threads=8, verbose=False, tol=1e-3)
timestamps = merge_to_single_path(mimic)

num_marks_to_consider = 6
em.fit(timestamps[:num_marks_to_consider])

fig = plot_hawkes_kernels(em, show=True)
fig.set_size_inches(15, 10)  # Adjust the size as needed
plt.tight_layout()  # Adjust the layout to prevent overlap

plt.show()
plt.savefig("mimic.png")

In [5]:
em = HawkesEM(0.04, kernel_size=100, n_threads=8, verbose=False, tol=1e-3)
timestamps = merge_to_single_path(mooc)

num_marks_to_consider = 6
em.fit(timestamps[:num_marks_to_consider])

fig = plot_hawkes_kernels(em, show=True)
fig.set_size_inches(15, 10)  # Adjust the size as needed
plt.tight_layout()  # Adjust the layout to prevent overlap

plt.show()
plt.savefig("mooc.png")

In [6]:
em = HawkesEM(30, kernel_size=100, n_threads=8, verbose=False, tol=1e-3)
timestamps = merge_to_single_path(stack)

num_marks_to_consider = 6
em.fit(timestamps[:num_marks_to_consider])

fig = plot_hawkes_kernels(em, show=True)
fig.set_size_inches(15, 10)  # Adjust the size as needed
plt.tight_layout()  # Adjust the layout to prevent overlap

plt.show()
plt.savefig("stack.png")

In [7]:
em = HawkesEM(150, kernel_size=100, n_threads=8, verbose=False, tol=1e-3)
timestamps = merge_to_single_path(retweet)

num_marks_to_consider = 3
em.fit(timestamps[:num_marks_to_consider])

fig = plot_hawkes_kernels(em, show=True)
fig.set_size_inches(15, 10)  # Adjust the size as needed
plt.tight_layout()  # Adjust the layout to prevent overlap

plt.show()
plt.savefig("retweet.png")