In [10]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Data creation


In [11]:
class Activity:
    def __init__(self, name, id):
        self.name = name
        self.id = id

    def __print__(self):
        print(f"ProcessID {self.id}: {self.name}")

In [12]:
class Process:
    def __init__(self, name, id, activities):
        self.name = name
        self.id = id
        self.activities = activities

    def __print__(self):
        print(f'Process {self.name} consists of following steps (ordered): \n')
        for a in self.activities:
            print(a.__print__())


In [13]:
class ProcessList:
    def __init__(self, name, processes):
        self.name = name
        self.processList = processes
        self.sequenceList = self.fillSequenceList()
        self.uniqueActivities = self.getUniqueActivities()
        self.initActivities = self.getInitActivities()
        self.finalActivities = self.getFinalActivities()
        self.FootprintMatrix = None
    
    def fillSequenceList(self):
        sequenceList = []
        for process in self.processList:
            sequence = []
            for activity in process.activities:
                sequence.append(activity.name)
            sequenceList.append(sequence)
        return sequenceList

    def getUniqueActivities(self):
        T_L = {}
        for process in self.processList:
            for activity in process.activities:
                if activity.name not in T_L:
                    T_L[activity.name] = activity
        self.uniqueActivities = list(T_L.values())
        return list(T_L.values())
    
    def getInitActivities(self):
        T_I = {}
        for process in self.processList:
            initActivity = process.activities[0]
            if initActivity.name not in T_I:
                T_I[initActivity.name] = initActivity.name
        self.initActivities = list(T_I.values())

        return self.initActivities
    
    def getFinalActivities(self):
        T_O = {}
        for process in self.processList:
            finalActivity = process.activities[-1]
            if finalActivity.name not in T_O:
                T_O[finalActivity.name] = finalActivity.name
        self.finalActivities = list(T_O.values())
        return list(T_O.values())
    #! FIX TO WORK AS LIST OF ACTIVITIES

    def generateFootprintMatrix(self):
        #ToDo Implement
        
        # Define the sequence
        sequence = self.sequenceList

        # Determine the maximum length of the sub-arrays
        max_length = max(len(sub_list) for sub_list in sequence)

        # Create a numpy array with the maximum length and pad with zeros
        # Use empty strings ('') for padding as we are dealing with strings
        padded_sequence = np.array([sub_list + [''] * (max_length - len(sub_list)) for sub_list in sequence])
        sequence = padded_sequence

        allTokens = sequence.flatten()
        allTokens = np.unique(allTokens)

        print(sequence)
        print(allTokens )

        FootprintMatrix = np.ones((len(allTokens),len(allTokens)))*4

                # the 4 patterns
                # direct succession     1
                # causality             2
                # parallel              3
                # choice                4

        # fill the matrix
        for s in sequence:
            for i in range(len(s)-1):
                # direct succession     1
                x = s[i]
                y = s[i+1]
                x_index = np.where(allTokens == x)[0][0]
                y_index = np.where(allTokens == y)[0][0]
                FootprintMatrix[x_index][y_index] = 1

                # causality             2
                if FootprintMatrix[y_index][x_index] == 4:
                    FootprintMatrix[y_index][x_index] = 2 

                # parallel              3
                if FootprintMatrix[x_index][y_index] == 2:
                    FootprintMatrix[x_index][y_index] = 3

                # choice                4
                if FootprintMatrix[x_index][y_index] == 0:
                    FootprintMatrix[x_index][y_index] = 4

        # delete first row and column
        FootprintMatrix = np.delete(FootprintMatrix,0,0)
        FootprintMatrix = np.delete(FootprintMatrix,0,1)

        def convertFootprintMatrixToVanAalstNotation(FootprintMatrix):
            FootprintMatrix = np.where(FootprintMatrix == 1, '>', FootprintMatrix)
            FootprintMatrix = np.where(FootprintMatrix == '2.0', '->', FootprintMatrix)
            FootprintMatrix = np.where(FootprintMatrix == 3, '||', FootprintMatrix)
            FootprintMatrix = np.where(FootprintMatrix == '4.0', '#', FootprintMatrix)
            return FootprintMatrix

        FootprintMatrix = convertFootprintMatrixToVanAalstNotation(FootprintMatrix)
        self.FootprintMatrix = FootprintMatrix
        return FootprintMatrix      




                
          


    
    def printAlphaMinerOutput(self):
        # Step 1
        self.getUniqueActivities()
        # Step 2
        self.getInitActivities()
        # Step 3
        self.getFinalActivities()
        # Step 4
        # self.getListOfPairs()



        print("Unique activities found:")
        print(",".join(a.name for a in self.uniqueActivities))
        print("Initial activities found:")
        print(",".join(a for a in self.initActivities))
        print("Final activities found:")
        print(",".join(a for a in self.finalActivities))




    def __print__(self):
        print(f'This process list contains {len(self.processList)} executions of process: {self.name}')

In [14]:
def createData(sequences, path='data/defaultData'):
    data = []
    i = 0
    for sequence in sequences:
        d = []
        for element in sequence:
            d.append(Activity(element, i))
        p = Process("Example", i, d)
        data.append(p)
        i += 1
    return data

In [15]:
sequence = [['a','c','b'],['b','c','d'],['a','c','e'],['b','c','e']]
sequence = [['a','b','c','d','f'],['a','b','d','c','f'],['a','c','b','d','f'],['a','e','f']]
data = createData(sequence)
print(data)
processesList = ProcessList("Example",data)


[<__main__.Process object at 0x103d5c790>, <__main__.Process object at 0x103eb7d90>, <__main__.Process object at 0x11ebb0f90>, <__main__.Process object at 0x11ebb3a90>]


# Alpha algorithm

[x] 1. Scan the entire event log, check all unique activities.  
 Set $T_L$

[X] 2. Initial activities. Set $T_I$

[X] 3. Find final activities. Set $T_O$

[X] 4. Footprint Matrix
[ ] Calculate pairs

[ ] 5. Delete non-maximal pairs


In [16]:
(processesList.printAlphaMinerOutput())


Unique activities found:
a,b,c,d,f,e
Initial activities found:
a
Final activities found:
f


In [17]:
print(processesList.sequenceList)

[['a', 'b', 'c', 'd', 'f'], ['a', 'b', 'd', 'c', 'f'], ['a', 'c', 'b', 'd', 'f'], ['a', 'e', 'f']]


Step 4 -> Create the FootprintMatrix


In [18]:
# Define the sequence
sequence = [['a','b','c','d','f'],['a','b','d','c','f'],['a','c','b','d','f'],['a','e','f']]

# Determine the maximum length of the sub-arrays
max_length = max(len(sub_list) for sub_list in sequence)

# Create a numpy array with the maximum length and pad with zeros
# Use empty strings ('') for padding as we are dealing with strings
padded_sequence = np.array([sub_list + [''] * (max_length - len(sub_list)) for sub_list in sequence])
sequence = padded_sequence

allTokens = sequence.flatten()
allTokens = np.unique(allTokens)

print(sequence)
print(allTokens )

FootprintMatrix = np.ones((len(allTokens),len(allTokens)))*4

        # the 4 patterns
        # direct succession     1
        # causality             2
        # parallel              3
        # choice                4

# fill the matrix
for s in sequence:
    for i in range(len(s)-1):
        # direct succession     1
        x = s[i]
        y = s[i+1]
        x_index = np.where(allTokens == x)[0][0]
        y_index = np.where(allTokens == y)[0][0]
        FootprintMatrix[x_index][y_index] = 1

        # causality             2
        if FootprintMatrix[y_index][x_index] == 4:
            FootprintMatrix[y_index][x_index] = 2 

        # parallel              3
        if FootprintMatrix[x_index][y_index] == 2:
            FootprintMatrix[x_index][y_index] = 3

        # choice                4
        if FootprintMatrix[x_index][y_index] == 0:
            FootprintMatrix[x_index][y_index] = 4

# delete first row and column
FootprintMatrix = np.delete(FootprintMatrix,0,0)
FootprintMatrix = np.delete(FootprintMatrix,0,1)

def convertFootprintMatrixToVanAalstNotation(FootprintMatrix):
    FootprintMatrix = np.where(FootprintMatrix == 1, '>', FootprintMatrix)
    FootprintMatrix = np.where(FootprintMatrix == '2.0', '->', FootprintMatrix)
    FootprintMatrix = np.where(FootprintMatrix == 3, '||', FootprintMatrix)
    FootprintMatrix = np.where(FootprintMatrix == '4.0', '#', FootprintMatrix)
    return FootprintMatrix



FootprintMatrix = convertFootprintMatrixToVanAalstNotation(FootprintMatrix)
print(FootprintMatrix)

print(processesList.generateFootprintMatrix())


[['a' 'b' 'c' 'd' 'f']
 ['a' 'b' 'd' 'c' 'f']
 ['a' 'c' 'b' 'd' 'f']
 ['a' 'e' 'f' '' '']]
['' 'a' 'b' 'c' 'd' 'e' 'f']
[['#' '>' '>' '#' '>' '#']
 ['->' '#' '>' '>' '#' '#']
 ['->' '>' '#' '>' '#' '>']
 ['#' '->' '>' '#' '#' '>']
 ['->' '#' '#' '#' '#' '>']
 ['#' '#' '->' '->' '->' '#']]
[['a' 'b' 'c' 'd' 'f']
 ['a' 'b' 'd' 'c' 'f']
 ['a' 'c' 'b' 'd' 'f']
 ['a' 'e' 'f' '' '']]
['' 'a' 'b' 'c' 'd' 'e' 'f']
[['#' '>' '>' '#' '>' '#']
 ['->' '#' '>' '>' '#' '#']
 ['->' '>' '#' '>' '#' '>']
 ['#' '->' '>' '#' '#' '>']
 ['->' '#' '#' '#' '#' '>']
 ['#' '#' '->' '->' '->' '#']]
