In [41]:
import xml.etree.ElementTree as ET
import datetime
from itertools import product

def read_from_file(filename):
    _EVENT_START = 'start'
    _EVENT_END = 'end'
    
    TAG_STRING = 'string'
    TAG_DATE = 'date'
    TAG_INT = 'int'
    TAG_EVENT = 'event'
    TAG_TRACE = 'trace'
    TAG_LOG = 'log'
    KEY = 'key'
    VALUE = 'value'
    
    log = {}
    
    prev_elem = None 
    curr_elem = None
    curr_case = None
    is_event = False
    event_no = 0
    
    f = open(filename, "rb")
    context = ET.iterparse(f, events=[_EVENT_START, _EVENT_END])
    
    for tree_event, elem in context:
        curr_elem = elem
        
        if prev_elem == None:
            prev_elem = curr_elem
            continue
          
        # Get case_id at trace/string
        if prev_elem.tag.endswith(TAG_TRACE):
            if curr_elem.tag.endswith(TAG_STRING):
                event_no = 0
                curr_case = curr_elem.get(VALUE)
                log[curr_case] = {}
          
        # Open or close event window
        if curr_elem.tag.endswith(TAG_EVENT):
            if tree_event == _EVENT_START: 
                log[curr_case][event_no] = {}
                is_event = True
                
            if tree_event == _EVENT_END: 
                event_no += 1
                is_event = False
                
        if is_event:
            temp_key = curr_elem.get(KEY)
            temp_value = curr_elem.get(VALUE)
            
            if curr_elem.tag.endswith(TAG_STRING): log[curr_case][event_no][temp_key] = temp_value
            if curr_elem.tag.endswith(TAG_INT): log[curr_case][event_no][temp_key] = int(temp_value)
            if curr_elem.tag.endswith(TAG_DATE): 
                if "+" in temp_value: temp_value = temp_value.split("+")
                if "-" in temp_value: temp_value = temp_value.split("-")
                temp_value = temp_value[0]
                
                log[curr_case][event_no][temp_key] = datetime.datetime.strptime(temp_value,"%Y-%m-%dT%H:%M:%S")
        
        prev_elem = curr_elem
    
    return log


def dependency_graph(log):
    df = {}
 
    for case in log.keys():
        for act in log[case]:
            df[log[case][act]['concept:name']] = {}
     
    
    for case in log.keys():
        prev_elem = None
        curr_elem = None
            
        for act in log[case]:
            curr_elem = log[case][act]['concept:name']
            
            if prev_elem is not None: 
                if curr_elem not in df[prev_elem].keys(): df[prev_elem][curr_elem] = 1
                else: df[prev_elem][curr_elem] += 1 
            
            prev_elem = curr_elem
    

    keys_delete = []
    for key, value in df.items():
        if len(df[key].keys()) == 0: keys_delete.append(key)
     
    for i in keys_delete: 
        del df[i]
    
    return df

class Place():
    def __init__(self, name):
        self.name = name
        self.in_edges = set()
        self.out_edges = set()
        self.tokens = 0
        
    def add_mark(self):
        self.tokens += 1
        
    def remove_mark(self):
        self.tokens -= 1
        
        
class Transition():
    def __init__(self, name, id):
        self.name = name
        self.id = id
        self.in_edges = set()
        self.out_edges = set()
        self.is_enabled = False
        

class Edge():
    def __init__(self, source, target):
        self.source = source
        self.target = target
        


class PetriNet():
    def __init__(self):
        self.places = {}
        self.transitions = {}
        self.edges = []
        self.name_to_id = {}
        
    def add_place(self, name):
        self.places[name] = Place(name)

    def add_transition(self, name, id):
        self.transitions[id] = Transition(name, id)
        self.name_to_id[name]= id

    def add_edge(self, source, target):
        self.edges.append(Edge(source, target))
        return self
      
    def get_tokens(self, place):
        return self.places[place].tokens

    def is_enabled(self, transition):
        list_sources = []
        
        for edge in self.edges:
            if transition == edge.target: list_sources.append(edge.source)
            
        
        for source in list_sources:
            if self.places[source].tokens <= 0:
                self.transitions[transition].is_enabled = False
                return False
                
        self.transitions[transition].is_enabled = True
        return True
            
        
    def add_marking(self, place):
        self.places[place].add_mark()

    def fire_transition(self, transition):
        if self.transitions[transition].is_enabled == False: return False
        
        # Remove token from source of transition
        for edge in self.edges:    
            if transition == edge.target: 
                self.places[edge.source].remove_mark()
                
        # Add token to target of transition
        for edge in self.edges:    
            if transition == edge.source: 
                self.places[edge.target].add_mark()
                
    def transition_name_to_id(self, transition_name):
        return self.name_to_id[transition_name]
    
class Alpha:
    def __init__(self, dg):
        self.start_activities, self.end_activities, self.activities = get_activity_details(dg)
        self.dg = dg
        self.causal, self.parallel = get_relation(dg)

def get_activity_details(dg):
    starting_tasks = []
    ending_tasks = []
    
    for ai in dg.keys():
        if ai not in starting_tasks: starting_tasks.append(ai)
        
        [ending_tasks.append(x) for x in dg[ai].keys() if x not in ending_tasks] 
                
    # Set starting activity
    starting = [x for x in starting_tasks if x not in ending_tasks]
    
    # Set ending activity
    ending = [x for x in ending_tasks if x not in starting_tasks]
    
    all_tasks = list(set().union(starting_tasks, ending_tasks))
    
    return starting, ending, all_tasks


def get_relation(dg):
    causal = {}
    parallel = {}
    for ai in sorted(dg.keys()):
        causal[ai] = set()
        for aj in sorted(dg[ai].keys()):
            try:
                if ai in dg[aj].keys():
                    parallel[(ai, aj)] = ""
                else: 
                    causal[ai].add(aj)
            except:
                causal[ai].add(aj)
          
        if len(causal[ai]) == 0: del causal[ai]
            
    return causal, parallel


def check_related(a, elem_1, elem_2):
    S = set(product(elem_1, elem_2)).union(set(product(elem_1, elem_2)))
    for pair in S:
        if pair in a.parallel or pair in a.causal: 
            return True
    return False


def identifiy_places(pairs, p):
    for elem in pairs:
        if p != elem and p[0].issubset(elem[0]) and p[1].issubset(elem[1]): return False
    return True




def alpha(log):
    
    dg = dependency_graph(log)
    p = PetriNet()
    a = Alpha(dg)    
    
    pairs = []
    for key, element in a.causal.items():
        for item in element: pairs.append(({key}, {item}))

    for i in range(0, len(pairs)):
            p1 = pairs[i]
            for j in range(i, len(pairs)):
                p2 = pairs[j]
                
                if p1 == p2: continue
                    
                # Check whether items in p1 are present in p2
                is_subset = (p1[0].issubset(p2[0]) or p1[1].issubset(p2[1]))
                if not is_subset: continue
                
                is_related = (check_related(a, p1[0], p2[0]) or check_related(a, p1[1], p2[1]))
                
                if is_related: continue
                
                new_pair = (p1[0] | p2[0], p1[1] | p2[1])
                if new_pair not in pairs: pairs.append(new_pair)
    
    places_to_add = filter(lambda p: identifiy_places(pairs, p), pairs)

    # Each activity corresponds to a transition
    for i in range(len(a.activities)): p.add_transition(a.activities[i], -i)
        
    # Add source
    p.add_place('start')
    p.add_marking('start')
    for s in a.start_activities: p.add_edge('start', p.transition_name_to_id(s))

    # Add sink
    p.add_place('end')
    for e in a.end_activities: p.add_edge(p.transition_name_to_id(e), 'end')
    
    for pair in places_to_add:
        new_id = len(p.places)
        p.add_place(new_id)
        
        for in_edge in pair[0]: p.add_edge(p.transition_name_to_id(in_edge), new_id)
        for out_edge in pair[1]: p.add_edge(new_id, p.transition_name_to_id(out_edge))
    
    return p

In [26]:
pair = ('calculate capacity', 'check credit')
parallel = { ('calculate capacity', 'check credit') : "", ('calculate capacity', 'check system') : "", ('check credit', 'check system') : "", ('check credit', 'calculate capacity') : "", ('check system', 'check credit') : "", ('check system', 'calculate capacity') : "",}

pair in parallel

True

In [42]:
mined_model = alpha(read_from_file("loan-process.xes"))

def check_enabled(pn):
    ts = ["register application", "check credit", "calculate capacity", "check system", "accept", "reject", "send decision e-mail"]
    for t in ts:
        print (pn.is_enabled(pn.transition_name_to_id(t)))
    print("")


trace = ["register application", "check credit", "check system", "calculate capacity", "accept", "send decision e-mail"]
for a in trace:
    check_enabled(mined_model)
    mined_model.fire_transition(mined_model.transition_name_to_id(a))


True
False
False
False
False
False
False

False
True
True
True
False
False
False

False
False
True
True
False
False
False

False
False
True
False
False
False
False

False
False
False
False
True
True
False

False
False
False
False
False
False
True



In [38]:
def get_relation(dg):
    causal = {}
    parallel = {}
    for ai in sorted(dg.keys()):
        for aj in sorted(dg[ai].keys()):
            try:
                if ai in dg[aj].keys():
                    parallel[(ai, aj)] = ""
                else: 
                    causal[(ai, aj)] = ""
            except:
                causal[(ai, aj)] = ""
            
    return causal, parallel

dg = dependency_graph(read_from_file("loan-process.xes"))
causal, parallel = get_relation(dg)
#print(causal)
#{ ('calculate capacity', 'check credit') : ""
print(parallel)

{('calculate capacity', 'check credit'): '', ('calculate capacity', 'check system'): '', ('check credit', 'calculate capacity'): '', ('check credit', 'check system'): '', ('check system', 'calculate capacity'): '', ('check system', 'check credit'): ''}


In [37]:
di = {}
di[('calculate capacity', 'check credit')] = "red"
di

{('calculate capacity', 'check credit'): 'red'}

In [5]:
dg = dependency_graph(read_from_file("loan-process.xes"))

for ai in sorted(dg.keys()):
    for aj in sorted(dg[ai].keys()):
        print (ai, '->', aj, ':', dg[ai][aj])

accept -> send decision e-mail : 20
calculate capacity -> accept : 13
calculate capacity -> check credit : 6
calculate capacity -> check system : 46
calculate capacity -> reject : 35
check credit -> accept : 1
check credit -> calculate capacity : 54
check credit -> check system : 38
check credit -> reject : 7
check system -> accept : 6
check system -> calculate capacity : 42
check system -> check credit : 4
check system -> reject : 38
register application -> calculate capacity : 4
register application -> check credit : 90
register application -> check system : 6
reject -> send decision e-mail : 80


name:  accept                id:  0
name:  send decision e-mail  id:  -1
name:  check credit          id:  -2
name:  calculate capacity    id:  -3
name:  reject                id:  -4
name:  check system          id:  -5
name:  register application  id:  -6

start -> -6      start -> register application
-1 -> end        send decision e-mail -> end
-4 -> 2          reject -> 2
0 -> 2           accept -> 2
2 -> -1          2 ->  send decision e-mail
-5 -> 3          check system -> 3
-3 -> 3          calculate capacity -> 3
-2 -> 3          check credit -> 3
3 -> 0           3 -> accept
3 -> -4          3 -> reject
-6 -> 4          register application -> 4
4 -> -5          4 -> check system
4 -> -3          4 -> calculate capacity
4 -> -2          4 -> check credit