In [1]:
# Import msig from parent directory
import sys
import os
sys.path.insert(0, os.path.dirname(os.getcwd()))

from msig import Motif, NullModel
import numpy as np


In [2]:
ts1 = [1, 3, 3, 5, 5, 2, 3, 3, 5, 5, 3, 3, 5, 4, 4]
ts2 = [4.3, 4.5, 2.6, 3.0, 3.0, 1.7, 4.9, 2.9, 3.3, 1.9, 4.9, 2.5, 3.1, 1.8, 0.3]
ts3 = ["A", "D", "B", "D", "A", "A", "A" ,"C", "C", "B", "D", "D", "C", "A", "A"]
ts4 = ["T", "L", "T", "Z", "Z", "T", "L", "T", "Z", "T", "L", "T", "Z", "L", "L"]
data = np.stack([np.asarray(ts1, dtype=int), np.asarray(ts2, dtype=float), np.asarray(ts3, dtype=str), np.asarray(ts4, dtype=str)])
m, n = data.shape # data with shape (m=4 x n=15)
m, n

(4, 15)

In [3]:
#Create the null model 
model = NullModel(data, dtypes=[int, float, str, str],  model="empirical")

# Identify the Motif of length $p=3$
# with three matches (at indices 1, 6, and 10) that spans the first, second, and fourth variables
# satisfying a maximum deviation threshold of $\delta = 0.5$.
vars = np.array([0,1,3])
multivar_sequence = data[vars, 1:4]
multivar_sequence

array([['3', '3', '5'],
       ['4.5', '2.6', '3.0'],
       ['L', 'T', 'Z']], dtype='<U32')

In [4]:
# Create motif with corrected delta_thresholds (one per variable in pattern)
# Pattern uses variables [0, 1, 3] so we need 3 thresholds: [var0, var1, var3]
motif = Motif(multivar_sequence, vars, np.array([0, 0.5, 0]), n_matches=3)
probability = motif.set_pattern_probability(model, vars_indep=True)
probability


0.0020170565623167215

In [5]:
s = len(multivar_sequence[0]) # length of the motif
max_possible_matches = n-s+1 # maximum number of possible matches
pvalue = motif.set_significance(max_possible_matches, data_n_variables=m, idd_correction=False) 
pvalue

2.3117896159300944e-06