In [1]:
import math
from matplotlib import cm
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import  TwoSlopeNorm
import sys, os
import logging
#logging.basicConfig(level=logging.INFO)

In [2]:
current = os.path.dirname(os.path.realpath(sys.argv[0]))
parent = os.path.dirname(current)
sys.path.append(parent)

from MSig import Motif, NullModel

In [3]:
#multivar_time_series = np.random.uniform(0, 1, (100, 1000))
#np.savetxt("../data/synthetic/multivar_time_series.csv", multivar_time_series, delimiter=",")

In [4]:
#read the data
multivar_time_series = np.loadtxt("../data/synthetic/multivar_time_series.csv", delimiter=",")
delta_thresholds = [0.1 ,0.3]
number_vars = np.arange(1, 11, 1)
motif_lenghts = np.arange(2, 11,1)
number_matches = [1,10,20,30,40,50,60,70,80,90,100]
dtypes = [float] * len(multivar_time_series)
model = NullModel(multivar_time_series, dtypes=dtypes ,model="kde")

In [5]:
#heatmap 2d number of variables vs number of matches

fig, axes = plt.subplots(len(motif_lenghts), len(delta_thresholds), figsize=(12, 15), sharex=True, sharey=True)

for i, m in enumerate(motif_lenghts):
    for j, delta in enumerate(delta_thresholds):
        max_possible_matches = len(multivar_time_series[0]) - m + 1
        data = np.zeros((len(number_vars), len(number_matches)))
        for vars_index, k in enumerate(number_vars):
            vars = np.arange(k)
            motif_subsequence = multivar_time_series[vars, 0:0 + m]
            pvalues =[]
            for n_matches in number_matches:
                motif = Motif(motif_subsequence, vars, np.repeat(delta, k), n_matches)
                p = motif.set_pattern_probability(model, vars_indep=True)
                pvalue = motif.set_significance(max_possible_matches, number_vars, idd_correction=False)
                pvalues.append(pvalue)
            data[vars_index, :] = pvalues
        
        sns.heatmap(data, cmap="coolwarm", ax=axes[i, j], cbar=False, norm=TwoSlopeNorm(vcenter=0.01, vmin=0, vmax=1))
        bbox_props = dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=0.5)
        axes[i, j].text(0.925, 1.2, f'$s$: {m}', ha="center", va="center", size=15, bbox=bbox_props, transform=axes[i, j].transAxes)
        if i == 0:
            axes[i, j].set_title(f'$\delta$: {delta}', size=30)

        #set label only in the first and last ytick
        axes[i, j].set_xticklabels(number_matches, size=15)
        axes[i, j].set_yticks([0+0.5, len(number_vars) - 1 + 0.5], labels=[number_vars[0], number_vars[-1]], size=15)  


#add cbar in the middle right of the figure with the norm
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
cbar = fig.colorbar(cm.ScalarMappable(norm=TwoSlopeNorm(vmin=0, vcenter=0.01, vmax=1),cmap='coolwarm'), cax=cbar_ax, ticks=[0, 0.01, 0.2, 0.4, 0.6, 0.8, 1] ,label='$p$-value' )
cbar.ax.tick_params(labelsize=15)
cbar.ax.yaxis.label.set_size(30)

fig.supxlabel('Number of Matches ($k$)', size=30)
fig.supylabel('Number of Variables ($q$)', size=30)

plt.tight_layout(rect=[0, 0, 0.9, 0.95])
plt.savefig('../results/synthetic/p_value_heatmap_grid_numberofvariables_numbermatches.pdf', bbox_inches='tight')
plt.close()

  plt.tight_layout(rect=[0, 0, 0.9, 0.95])


In [8]:
#tamanho do padrao vs numero matches
fig, axes = plt.subplots(len(number_vars), len(delta_thresholds), figsize=(12, 15), sharex=True, sharey=True)

for i, q in enumerate(number_vars):
    for j, coherence in enumerate(delta_thresholds):
        max_possible_matches = len(multivar_time_series[0]) - q + 1
        data = np.zeros((len(motif_lenghts), len(number_matches)))
        for m_index, m in enumerate(motif_lenghts):
            vars = np.arange(q)
            motif_subsequence = multivar_time_series[vars, 0:0 + m]
            pvalues =[]
            for n_matches in number_matches:
                motif = Motif(motif_subsequence, vars, np.repeat(coherence, q), n_matches)
                p = motif.set_pattern_probability(model, vars_indep=True)
                pvalue = motif.set_significance(max_possible_matches, number_vars, idd_correction=False)
                pvalues.append(pvalue)
            data[m_index, :] = pvalues

        sns.heatmap(data, cmap="coolwarm", ax=axes[i, j], cbar=False, norm=TwoSlopeNorm(vcenter=0.01, vmin=0, vmax=1))

        #add a legend saying the value of k and the coherence top right
        bbox_props = dict(boxstyle="round,pad=0.3", fc="white", ec="black", lw=0.5)
        axes[i, j].text(0.925, 1.2, f'q: {q}', ha="center", va="center", size=15, bbox=bbox_props, transform=axes[i, j].transAxes)
        if i == 0:
            axes[i, j].set_title(f'$\delta$: {coherence}', size=30)

        #set label only in the first and last ytick
        axes[i, j].set_xticklabels(number_matches, size=15)
        axes[i, j].set_yticks([0 +0.5, len(motif_lenghts) - 1 + 0.5], labels=[motif_lenghts[0], motif_lenghts[-1]], size=15)



#add cbar in the middle right of the figure
cbar_ax = fig.add_axes([0.92, 0.15, 0.02, 0.7])
cbar = fig.colorbar(cm.ScalarMappable(norm=TwoSlopeNorm(vmin=0, vcenter=0.01, vmax=1),cmap='coolwarm'), cax=cbar_ax, ticks=[0, 0.01, 0.2, 0.4, 0.6, 0.8, 1], label='$p$-value')
cbar.ax.tick_params(labelsize=15)
cbar.ax.yaxis.label.set_size(30)

#add x y labels in the center of the axis
fig.supxlabel('Number of Matches ($k$)', size=30)
fig.supylabel('Motif Length ($s$)', size=30)

plt.tight_layout(rect=[0, 0, 0.9, 0.95])


plt.savefig('../results/synthetic/p_value_heatmap_grid_motiflength_vs_numbermatches.pdf', bbox_inches='tight')
plt.close()

  plt.tight_layout(rect=[0, 0, 0.9, 0.95])
