# Transition matrix generation

In [17]:
def cartesian_product(a,b):
    import itertools

    c = list(itertools.product(a, b))
    return c


def combine_to_list(c):
    from SynBPS.simulation.simulation_helpers import flatten
    
    # combine the letters into one item
    newlist = []
    
    for i in range(0,len(c)):
        combination = flatten(c[i])
        newlist.append(combination)
            
    return newlist

def modify_to_absorption(c):
    """
    iterate over each line, and if E occurs at any point
    """
    newlist = []
    
    return newlist


def modify_rules(parent, states):
    import numpy as np
    #append probabilities to each row in the condition table
    condprob=[]
        
    #for each parent state
    for parentstate in states:
        
        #subset all rows starting with parent state i
        subset = [row for row in parent if row[0] == parentstate]

        """# manipulate the list """
        
        #All rows, starting with E, should lead only to E
        #If a sequence has E at any point, every subsequent entry becomes E
        
        new_subset = []
        
        for row in subset:
            
            #make a new row, based on rules
            newrow=[]
            
            #flag-variable
            e_observed = False
            
            #for each step in the sequence
            for idx in range(0,len(row)):
                
                
                # if e is observed in current timestep, set flag to true
                if row[idx] == "E":
                    e_observed = True
                
                # 
                if e_observed == True:
                    value = "E"
                else:
                    value = row[idx]
                
                #append new value, based on above logic
                newrow.append(value)
                
                                
            #append new modified row
            new_subset.append(newrow)
        
        #append to final list
        condprob = condprob + new_subset
    
    return condprob

def generate_condprob(parent, states, mode="max_entropy", n_transitions=5):
    import numpy as np
    #append probabilities to each row in the condition table
    condprob=[]
        
    #for each parent state
    for parentstate in states:
        
        #subset all rows starting with parent state i
        subset = [row for row in parent if row[0] == parentstate]

        """# manipulate the list """
        
        #All rows, starting with E, should lead only to E
        #If a sequence has E at any point, every subsequent entry becomes E
        
        if mode=="max_entropy":
            #get list of probabilities for each state
            vec = np.random.random(len(subset))
        
        if mode=="med_entropy":
            #get n random rows with probability > 0, and 0 for rest of the rows
            vec = np.zeros(len(subset)).tolist()
            
            ids = list(range(0,len(vec)))
            import random
            selected = random.sample(ids, n_transitions)
            
            for i in selected:
                vec[i] = np.round(np.random.random(1)[0],decimals=8)
                
        if mode=="min_entropy":
            #get 1 random row with probability == 1 and 0 for rest of the rows
            vec = np.zeros(len(subset)).tolist()
            
            ids = list(range(0,len(vec)))
            import random
            selected = random.sample(ids, 1)[0]
            
            #set probability to 1
            vec[selected] = 1
            
        
        #normalize it
        vec = np.round(vec/np.sum(vec), decimals=5)
        vec = vec.tolist()
        
        for i in range(0,len(subset)):
            #get the probability
            p = vec[i]
            
            #append it to row i in subset
            subset[i].append(p)
            
        #"""
        #append to final list
        condprob = condprob + subset
    
    return condprob


def create_homc(states, h0, h=2, mode="max_entropy", n_transitions=5):
        
    from SynBPS.simulation.homc_helpers import cartesian_product, combine_to_list, modify_rules, generate_condprob
    
    
    ######################################
    # P1
    
    #for each link
    c = cartesian_product(states, states)
    d = combine_to_list(c)
    
    #final steps
    g = modify_rules(d, states)
    p1_input = generate_condprob(g, states, mode, n_transitions)
    
    ######################################
    # P2
    
    #for each link
    c = cartesian_product(states, states)
    d = combine_to_list(c)
    
    e = cartesian_product(c, states)
    f = combine_to_list(e)
    
    #final steps
    g = modify_rules(f, states)
    p2_input = generate_condprob(g, states, mode, n_transitions)
    
    ######################################    
    # P3
    
    #for each link
    c = cartesian_product(states, states)
    d = combine_to_list(c)
    
    e = cartesian_product(c, d)
    f = combine_to_list(e)
    
    #final steps
    g = modify_rules(f, states)
    p3_input = generate_condprob(g, states, mode, n_transitions)
    
    ######################################    
    # P4
    
    #for each link
    c = cartesian_product(states, states)
    d = combine_to_list(c)
    
    e = cartesian_product(c, d)
    f = combine_to_list(e)
    
    e = cartesian_product(f, states)
    f = combine_to_list(e)
    
    #final steps
    g = modify_rules(f, states)
    p4_input = generate_condprob(g, states, mode, n_transitions)

    ######################################    
    # P5
    
    #for each link
    c = cartesian_product(states, states)
    d = combine_to_list(c)
    
    e = cartesian_product(c, d)
    f = combine_to_list(e)
    
    e = cartesian_product(f, states)
    f = combine_to_list(e)
    
    #final steps
    g = modify_rules(f, states)
    p4_input = generate_condprob(g, states, mode, n_transitions)

    """
    Input generated tables to pomegranate
    """
    from pomegranate import DiscreteDistribution, ConditionalProbabilityTable, MarkovChain
    
    if h == 1:
        p0 = DiscreteDistribution(h0)
        
        p1 = ConditionalProbabilityTable(p1_input, [p0])
        
        HOMC = MarkovChain([p0, p1])
        
    if h == 2:
        p0 = DiscreteDistribution(h0)
        
        p1 = ConditionalProbabilityTable(p1_input, [p0])
        
        p2 = ConditionalProbabilityTable(p2_input, [p1])
        
        HOMC = MarkovChain([p0, p1, p2])
        
    if h == 3:
        
        p0 = DiscreteDistribution(h0)
        
        p1 = ConditionalProbabilityTable(p1_input, [p0])
        
        p2 = ConditionalProbabilityTable(p2_input, [p1])
        
        p3 = ConditionalProbabilityTable(p3_input, [p2])
        
        HOMC = MarkovChain([p0, p1, p2, p3])
        
    if h == 4:
         
        p0 = DiscreteDistribution(h0)
         
        p1 = ConditionalProbabilityTable(p1_input, [p0])
         
        p2 = ConditionalProbabilityTable(p2_input, [p1])
         
        p3 = ConditionalProbabilityTable(p3_input, [p2])
         
        p4 = ConditionalProbabilityTable(p4_input, [p3])
         
        HOMC = MarkovChain([p0, p1, p2, p3, p4])
         
    if h > 4:
        print("h > 4 not supported yet - please create an issue on github")
        HOMC = 0
    
    return HOMC

In [25]:
def GenerateInitialProb(D=["a","b"], p0_type="regular"):
    import numpy as np
    import pandas as pd
    
    if p0_type == "min_entropy":
        # Example P0 is one-hot
    
        P0 = np.zeros(len(D))
        P0[np.random.randint(0,len(D),1)[0]] = 1
        P0 = P0.tolist()

    if p0_type != "min_entropy":
        
        P0 = []
    
        for d in D:
            #Draw from uniform dist
            x_d = np.random.uniform(0,1,1)[0]
            #print(x_d)
            #Append the value to the vector
            P0.append(x_d)
        
        #Add the p(absorbtion)=0 to P0
        #P0.append(0)
        
        #Normalize
        S_sum = np.sum(P0)
        P0 = P0/S_sum
        
        #Make dataframe
        #P_0_df = pd.DataFrame(P0).T
        #P_0_df.columns = D
        
    return P0#P_0_df

## Dummy data test

In [27]:
states = ["a","b"]
mode = "med_entropy"
n_transitions = 1



In [81]:
h0 = GenerateInitialProb(D=states, p0_type="regular")
h0

array([0.81051236, 0.18948764])

In [30]:
######################################
# P1

#for each link
c = cartesian_product(states, states)
d = combine_to_list(c)

#final steps
g = modify_rules(d, states)
p1_input = generate_condprob(g, states, mode, n_transitions)

######################################
# P2

#for each link
c = cartesian_product(states, states)
d = combine_to_list(c)

e = cartesian_product(c, states)
f = combine_to_list(e)

#final steps
g = modify_rules(f, states)
p2_input = generate_condprob(g, states, mode, n_transitions)

In [32]:
def DiscreteDistribution(h0):
    return h0
    
def ConditionalProbabilityTable(p1_input, p0):
    return p1_input

In [33]:

######### Creating the HOMC from the matrices/tables

p0 = DiscreteDistribution(h0)
 
p1 = ConditionalProbabilityTable(p1_input, [p0])
 
p2 = ConditionalProbabilityTable(p2_input, [p1])

In [34]:
def MarkovChain(array):
    """
    Dummy function
    """
    HOMC = array
    return HOMC

In [35]:

HOMC = MarkovChain([p0, p1, p2])

In [36]:
HOMC

[array([0.91251687, 0.08748313]),
 [['a', 'a', 1.0], ['a', 'b', 0.0], ['b', 'a', 0.0], ['b', 'b', 1.0]],
 [['a', 'a', 'a', 0.0],
  ['a', 'a', 'b', 0.0],
  ['a', 'b', 'a', 0.0],
  ['a', 'b', 'b', 1.0],
  ['b', 'a', 'a', 0.0],
  ['b', 'a', 'b', 0.0],
  ['b', 'b', 'a', 0.0],
  ['b', 'b', 'b', 1.0]]]

# Moving from list to dict representation

In [55]:
#for each link
c = cartesian_product(states, states)
c

[('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b')]

In [56]:
d = combine_to_list(c)
d

[['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']]

In [57]:
g = modify_rules(d, states)
g

[['a', 'a'], ['a', 'b'], ['b', 'a'], ['b', 'b']]

In [58]:
p1_input = generate_condprob(g, states, mode, n_transitions)
p1_input

[['a', 'a', 1.0], ['a', 'b', 0.0], ['b', 'a', 0.0], ['b', 'b', 1.0]]

## How do we do the same thing with tuples?

In [None]:
def generate_condprob(parent, states, mode="max_entropy", n_transitions=5):
    import numpy as np
    #append probabilities to each row in the condition table
    condprob=[]
        
    #for each parent state
    for parentstate in states:
        
        #subset all rows starting with parent state i
        subset = [row for row in parent if row[0] == parentstate]

        """# manipulate the list """
        
        #All rows, starting with E, should lead only to E
        #If a sequence has E at any point, every subsequent entry becomes E
        
        if mode=="max_entropy":
            #get list of probabilities for each state
            vec = np.random.random(len(subset))
        
        if mode=="med_entropy":
            #get n random rows with probability > 0, and 0 for rest of the rows
            vec = np.zeros(len(subset)).tolist()
            
            ids = list(range(0,len(vec)))
            import random
            selected = random.sample(ids, n_transitions)
            
            for i in selected:
                vec[i] = np.round(np.random.random(1)[0],decimals=8)
                
        if mode=="min_entropy":
            #get 1 random row with probability == 1 and 0 for rest of the rows
            vec = np.zeros(len(subset)).tolist()
            
            ids = list(range(0,len(vec)))
            import random
            selected = random.sample(ids, 1)[0]
            
            #set probability to 1
            vec[selected] = 1
            
        
        #normalize it
        vec = np.round(vec/np.sum(vec), decimals=5)
        vec = vec.tolist()
        
        for i in range(0,len(subset)):
            #get the probability
            p = vec[i]
            
            #append it to row i in subset
            subset[i].append(p)
            
        #"""
        #append to final list
        condprob = condprob + subset
    
    return condprob

In [49]:
markov_chain = [[['a', 'a', 1.0],
                 ['a', 'b', 0.0],
                 ['b', 'a', 0.0],
                 ['b', 'b', 1.0]],

                [['a', 'a', 'a', 0.0],
                 ['a', 'a', 'b', 0.0],
                 ['a', 'b', 'a', 0.0],
                 ['a', 'b', 'b', 1.0],
                 ['b', 'a', 'a', 0.0],
                 ['b', 'a', 'b', 0.0],
                 ['b', 'b', 'a', 0.0],
                 ['b', 'b', 'b', 1.0]]]

{('a', 'a', 1.0): {'a': 'b', ('a', 'b', 0.0): {'b': 'a', ('b', 'a', 0.0): {'b': 'b'}}}, ('a', 'a', 'a', 0.0): {'a': 'a', ('a', 'a', 'b', 0.0): {'a': 'b', ('a', 'b', 'a', 0.0): {'a': 'b', ('a', 'b', 'b', 1.0): {'b': 'a', ('b', 'a', 'a', 0.0): {'b': 'a', ('b', 'a', 'b', 0.0): {'b': 'b', ('b', 'b', 'a', 0.0): {'b': 'b'}}}}}}}}


In [54]:
test = {"a":0.1,
       ("a","b"):0.9}

test[("a","b")]

0.9

## A simple dict-based data format

In [69]:
markov_chain = {
    1: {
        ('a'): {'a': 1.0, 'b': 0.0},
        ('b'): {'a': 0.0, 'b': 1.0}
    },
    2: {
        ('a', 'a'): {'a': 0.0, 'b': 0.0},
        ('a', 'b'): {'a': 0.0, 'b': 1.0},
        ('b', 'a'): {'a': 0.0, 'b': 0.0},
        ('b', 'b'): {'a': 0.0, 'b': 1.0}
    }
}

# transitions from a sequence with A only:
markov_chain[1][("a")]

# fransitions from a sequence A A:
markov_chain[2][("a","a")]

{'a': 0.0, 'b': 0.0}

In [79]:
# Existing format created by the transition matrix algorithms:

markov_chain = [[['a', 'a', 1.0],
                 ['a', 'b', 0.0],
                 ['b', 'a', 0.0],
                 ['b', 'b', 1.0]],

                [['a', 'a', 'a', 0.0],
                 ['a', 'a', 'b', 0.0],
                 ['a', 'b', 'a', 0.0],
                 ['a', 'b', 'b', 1.0],
                 ['b', 'a', 'a', 0.0],
                 ['b', 'a', 'b', 0.0],
                 ['b', 'b', 'a', 0.0],
                 ['b', 'b', 'b', 1.0]]]

# A function to convert it into a nested dictionary:

def transform_markov_chain(markov_chain_list):
    markov_chain_dict = {}
    for table in markov_chain_list:
        order = len(table[0]) - 2
        transitions = {}
        for row in table:
            state, next_state, probability = tuple(row[:-2]), row[-2], row[-1]
            if state not in transitions:
                transitions[state] = {}
            transitions[state][next_state] = probability
        markov_chain_dict[order] = transitions
    return markov_chain_dict

# Output

HOMC = transform_markov_chain(markov_chain)
#print(HOMC)

# Retrieve probability of a => a and a => b:
HOMC[1][("a",)]

# Retrieve probability of a => a => a and a => a => b
HOMC[2][("a","a")]

{'a': 0.0, 'b': 0.0}

In [76]:
"""
HOMC = {1: {('a',): {'a': 1.0, 
                      'b': 0.0}, 
             ('b',): {'a': 0.0, 
                      'b': 1.0}},
 
         2: {('a', 'a'): {'a': 0.0, 
                          'b': 0.0},
              ('a', 'b'): {'a': 0.0, 
                           'b': 1.0},
              ('b', 'a'): {'a': 0.0, 
                           'b': 0.0},
              ('b', 'b'): {'a': 0.0, 
                           'b': 1.0}}}

HOMC[1][("a",)]

HOMC[2][("a","a")]
"""

{'a': 0.0, 'b': 0.0}

## Sampling from a dict-based data format

In [86]:
import numpy as np

In [199]:
# statespace for intial prob
D = ["a","b","c","d"]

# statespace for conditional prob
D_abs = D.copy()
D_abs.append("!")

mode = ["min_entropy","med_entropy","max_entropy"][0]
n_transitions = 2

# event log placeholder
sigma = []

# Unconditional probabilities, excluding the absorbing state
P0 = GenerateInitialProb(D=D, 
                         p0_type="regular")

# copy for use in higher order probability tables
states = D_abs.copy()

######################################
# P1

#for each link
c = cartesian_product(states, states)
d = combine_to_list(c)

#final steps
g = modify_rules(d, states)
P1 = generate_condprob(g, states, mode, n_transitions)

######################################
# P2

#for each link
c = cartesian_product(states, states)
d = combine_to_list(c)

e = cartesian_product(c, states)
f = combine_to_list(e)

#final steps
g = modify_rules(f, states)
P2 = generate_condprob(g, states, mode, n_transitions)

######################################

# final probability tables
HOMC = [P1, P2]

HOMC

[[['a', 'a', 1.0],
  ['a', 'b', 0.0],
  ['a', 'c', 0.0],
  ['a', 'd', 0.0],
  ['a', '!', 0.0],
  ['b', 'a', 0.0],
  ['b', 'b', 0.0],
  ['b', 'c', 1.0],
  ['b', 'd', 0.0],
  ['b', '!', 0.0],
  ['c', 'a', 0.0],
  ['c', 'b', 0.0],
  ['c', 'c', 0.0],
  ['c', 'd', 1.0],
  ['c', '!', 0.0],
  ['d', 'a', 0.0],
  ['d', 'b', 0.0],
  ['d', 'c', 1.0],
  ['d', 'd', 0.0],
  ['d', '!', 0.0],
  ['!', 'a', 0.0],
  ['!', 'b', 0.0],
  ['!', 'c', 1.0],
  ['!', 'd', 0.0],
  ['!', '!', 0.0]],
 [['a', 'a', 'a', 0.0],
  ['a', 'a', 'b', 0.0],
  ['a', 'a', 'c', 1.0],
  ['a', 'a', 'd', 0.0],
  ['a', 'a', '!', 0.0],
  ['a', 'b', 'a', 0.0],
  ['a', 'b', 'b', 0.0],
  ['a', 'b', 'c', 0.0],
  ['a', 'b', 'd', 0.0],
  ['a', 'b', '!', 0.0],
  ['a', 'c', 'a', 0.0],
  ['a', 'c', 'b', 0.0],
  ['a', 'c', 'c', 0.0],
  ['a', 'c', 'd', 0.0],
  ['a', 'c', '!', 0.0],
  ['a', 'd', 'a', 0.0],
  ['a', 'd', 'b', 0.0],
  ['a', 'd', 'c', 0.0],
  ['a', 'd', 'd', 0.0],
  ['a', 'd', '!', 0.0],
  ['a', '!', 'a', 0.0],
  ['a', '!', 'b', 0.

In [200]:
states

['a', 'b', 'c', 'd', '!']

### Convert to dictionary

In [201]:
P_k = transform_markov_chain(HOMC)

# Show the order of the HOMC
print(P_k.keys())

# Retrieve probability of a => a and a => b:
P_k[1][("a",)] #order = 1

# Retrieve probability of a => a => a and a => a => b
P_k[2][("a","a")] #order = 2

dict_keys([1, 2])


{'a': 0.0, 'b': 0.0, 'c': 1.0, 'd': 0.0, '!': 0.0}

### first event sampling

In [202]:
e_t = np.random.choice(D, #len(D), #
                       size=1, 
                       replace=False, 
                       p=P0)[0]
# add first event to trace
sigma.append(e_t)
sigma

['d']

### second event sampling

In [203]:
# this should be conditional on the length of the sequence;
# if length K-1, use K'th order probability table
# if length is > K-1, subset sequence to the last K-1 events, and use K'th order probability table

#max(P_k.keys())

### Full sampling procedure

In [204]:
sigma = []

In [207]:
sigma = []

for i in range(0,50):
    #stop the loop when "!" has been reached
    if "!" in set(sigma):
        break
    
    # determine tracelength
    tracelen = len(sigma)
    print(tracelen)
    
    if tracelen == 0:
        #sample first event from P0
        e_t = np.random.choice(D,
                           size=1, 
                           replace=False, 
                           p=P0)[0]
        # add first event to trace
        sigma.append(e_t)
    
    # if length K-1, use K'th order probability table
    if tracelen > 0 and tracelen <= max(P_k.keys()):
        # retrieve the order to subset P_k from
        order = len(sigma)
    
        # retrieve the probability distribution
        prob_dist = P_k[order][tuple(sigma)]
    
        # Extract elements and their associated probabilities
        elements = list(prob_dist.keys())
        probabilities = list(prob_dist.values())
        
        # Use np.random.choice with the probabilities
        e_t = np.random.choice(elements,
                               size=1, 
                               replace=False, 
                               p=probabilities)
        
        # add sampled event to trace
        sigma.append(e_t[0])
    
    # if length is > K-1, subset sequence to the last K-1 events, and use K'th order probability table
    if tracelen > max(P_k.keys()):
        """
        #in this case we keep using the last transition matrix
        order = max(P_k.keys())
    
        #we use the last K events as a sliding window
        sigma_last = sigma[-order:].copy()
    
         # retrieve the probability distribution
        prob_dist = P_k[order][tuple(sigma_last)]
    
        # Extract elements and their associated probabilities
        elements = list(prob_dist.keys())
        probabilities = list(prob_dist.values())
        
        # Use np.random.choice with the probabilities
        e_t = np.random.choice(elements,
                               size=1, 
                               replace=False, 
                               p=probabilities)
        
        # add sampled event to trace
        sigma.append(e_t[0])
        """

        #### Alternate approach:
        # pro: this is much more intuitive
        # con: sequences _could_ be longer with a sliding window, and this might use up more memory
        
        # truncate/end everything after K
        e_t = "!"
        
        # add sampled event to trace
        sigma.append(e_t)
        
sigma

0
1
2
3


['a', 'a', 'c', '!']

In [206]:
probabilities
prob_dist

{'a': 0.0, 'b': 0.0, 'c': 0.0, 'd': 0.0, '!': 0.0}

In [190]:
sigma

['a', 'b']

In [189]:
P2

[['a', 'a', 'a', 0.0],
 ['a', 'a', 'b', 0.0],
 ['a', 'a', '!', 0.0],
 ['a', 'b', 'a', 0.75945],
 ['a', 'b', 'b', 0.0],
 ['a', 'b', '!', 0.0],
 ['a', '!', 'a', 0.0],
 ['a', '!', 'b', 0.24055],
 ['a', '!', '!', 0.0],
 ['b', 'a', 'a', 0.60869],
 ['b', 'a', 'b', 0.0],
 ['b', 'a', '!', 0.39131],
 ['b', 'b', 'a', 0.0],
 ['b', 'b', 'b', 0.0],
 ['b', 'b', '!', 0.0],
 ['b', '!', 'a', 0.0],
 ['b', '!', 'b', 0.0],
 ['b', '!', '!', 0.0],
 ['!', 'a', 'a', 0.0],
 ['!', 'a', 'b', 0.0],
 ['!', 'a', '!', 0.0],
 ['!', 'b', 'a', 0.0],
 ['!', 'b', 'b', 0.0],
 ['!', 'b', '!', 0.55676],
 ['!', '!', 'a', 0.0],
 ['!', '!', 'b', 0.44324],
 ['!', '!', '!', 0.0]]

# Process with memory function (alg 7)

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Fri Nov  5 20:10:31 2021

@author: Mike
"""

def Process_with_memory(D = ["a","b","c","d","e"], 
                                    mode = ["min_entropy",
                                            "max_entropy",
                                            "med_entropy"][2], 
                                    num_traces=2, 
                                    sample_len=100,
                                    K=2,
                                    num_transitions=5):
    import numpy as np
    import pandas as pd
    import sys
    
    ##### Part 1: Generate the transition probabilities
    
    # event-log container
    Theta = []
    
    # Including absorption state
    D_abs = D.copy()
    D_abs.append("!")
    
    
    # Generate the model
    from SynBPS.simulation.alg2_initial_probabilities import GenerateInitialProb
    from SynBPS.simulation.homc_helpers import create_homc

    
    #generate initial probabilities
    probabilities = GenerateInitialProb(D_abs, p0_type=mode)    
    P0 = {}
    
    for i in range(0,len(D_abs)):
        P0.update({D_abs[i]:probabilities[i]})
    

    #print("mode",mode)
    #create the markov chain
    HOMC = create_homc(D_abs, P0, h=K, mode=mode, n_transitions=num_transitions)
    
    ##### Part 2: Draw from the distributions
    while len(Theta) != num_traces:
        #for trace in list(range(0,num_traces)):
                
        #Trace placeholder
        Q = []
        
        trials = 0
        
        #Continue drawing until there is an absorption event when length = x
        while "!" not in set(Q): #or len(Q) > 1
            
            #counter
            trials = trials + 1

            if trials > 1:
                #print("trial",trials)
                #Sample trace from model
                new_samplelen = int(sample_len)*(trials*10)
                Q = HOMC.sample(new_samplelen)
            else:
                #Sample trace from model
                Q = HOMC.sample(sample_len)
            
            #if absorption state is observed, remove all extra occurrences of it
            if "!" in set(Q):
                Q = Q[:Q.index('!')+1]

            #if only the absorbing state is observed, try again
            #if len(Q) == 1:
            #    print("trace:",trace,"only the absorbing state is observed, trying again")
            #    Q = [] 
            
            if trials > 10:
                Q = []
                print("Sequence did not reach absorbing state after 10 trials. Trying again.")
                break
            
        #recode the name of the termination event
        Q = [w.replace('!', 'END') for w in Q]
        
        #if there is more than one event
        if len(Q) > 1:
            #Update the event-log
            Theta.append(Q)

    print("generated traces:", len(Theta))
    return Theta, HOMC



In [4]:
Event_log, HOMC = Process_with_memory(D = ["a","b","c","d","e"], 
                                    mode = ["min_entropy",
                                            "max_entropy",
                                            "med_entropy"][2], 
                                    num_traces=2, 
                                    sample_len=100,
                                    K=2,
                                    num_transitions=5)

generated traces: 2


In [14]:
Event_log

[['b', 'b', 'b', 'b', 'b', 'b', 'END'], ['d', 'a', 'END']]

In [8]:
HOMC.sample(2)

['b', 'd']

In [10]:
#HOMC.distributions

# Functionality needed:

1. distribution:
    - Representing the HOMC using conditional probability tables (P0, P1, P2, P....)
    - This will be done using nested dictionaries, from which probabilities are more easily looked up
2. Sampling
    - Predicting from P0 when no sequence
    - Predicting from P1 when one event
    - Predicting from P2 when two events
    - Predicting from P_K when sequence hasnt ended yet (sliding over, predicting from the last K-1 events)