# Third approach Frequency-based Encoding 

Finding frequent sequences and Association Rule Mining for Sequential Patterns

In [11]:
import pandas as pd
import pm4py
import logview
from sequential.seq2pat import Seq2Pat
from efficient_apriori import apriori

Filtered log includes only:
- Start activities such as Create Purchase Order Item, Create Purchase Requisition Item, SRM Created, Vendor creates debit memo
- End activities Clear Invoice, Delete Purchase Order Item
- Only cases between 01.01.2018 and 31.12.2018


In [None]:
log2019_filtered = pm4py.read_xes(r"")

parsing log, completed traces ::   0%|          | 0/169380 [00:00<?, ?it/s]

In [3]:
log19df_filtered= pm4py.convert_to_dataframe(log2019_filtered)
log19df_filtered = log19df_filtered.sort_values(['case:concept:name', 'time:timestamp'], ignore_index=True)
log2019_filtered = pm4py.format_dataframe(log19df_filtered, case_id='case:concept:name', activity_key='concept:name', timestamp_key='time:timestamp')

In [4]:
from logview.utils import LogViewBuilder

log_view_filtered = LogViewBuilder.build_log_view(log2019_filtered)

In [5]:
from logview.predicate import *

query_pb = Query('Remove Payment Block', [EqToConstant('concept:name', 'Remove Payment Block')])
result_set_query_pb, complement_pb = log_view_filtered.evaluate_query('rs_Remove payment block', log2019_filtered, query_pb)

In [6]:
unique_events = sorted(log2019_filtered['concept:name'].unique())

In [7]:
result_set_query_pb_sorted = result_set_query_pb.sort_values(by=['case:concept:name', 'time:timestamp'])
complement_pb_sorted = complement_pb.sort_values(by=['case:concept:name', 'time:timestamp'])

In [13]:
def prepare_traces_as_sequences(df):
    sequences = df.groupby('case:concept:name')['concept:name'].apply(list).tolist()
    return sequences

In [14]:
sequences_with_remove = prepare_traces_as_sequences(result_set_query_pb_sorted)
sequences_without_remove = prepare_traces_as_sequences(complement_pb_sorted)

In [15]:
seq2pat_with = Seq2Pat(sequences_with_remove)
sequential_patterns_with_remove = seq2pat_with.get_patterns(min_frequency=2)

In [16]:
seq2pat_without = Seq2Pat(sequences_without_remove)
sequential_patterns_without_remove = seq2pat_without.get_patterns(min_frequency=2)

Comparisson

In [17]:
patterns_with_remove = {tuple(p[:-1]) for p in sequential_patterns_with_remove}
patterns_without_remove = {tuple(p[:-1]) for p in sequential_patterns_without_remove}

unique_to_with_remove = patterns_with_remove - patterns_without_remove
unique_to_without_remove = patterns_without_remove - patterns_with_remove

unique_to_with_remove_list = list(unique_to_with_remove)
unique_to_without_remove_list = list(unique_to_without_remove)

print("Unique Patterns in With-Remove Dataset:")
for i, pattern in enumerate(unique_to_with_remove_list[:5]): 
    print(f"Pattern: {pattern}")

print("\nUnique Patterns in Without-Remove Dataset:")
for i, pattern in enumerate(unique_to_without_remove_list[:5]): 
    print(f"Pattern: {pattern}")

Unique Patterns in With-Remove Dataset:
Pattern: ('Change Quantity', 'Remove Payment Block', 'Vendor creates invoice', 'Clear Invoice', 'Record Invoice Receipt', 'Remove Payment Block', 'Clear Invoice')
Pattern: ('Create Purchase Requisition Item', 'Create Purchase Order Item', 'Record Invoice Receipt', 'Record Goods Receipt', 'Cancel Subsequent Invoice')
Pattern: ('Vendor creates debit memo', 'Vendor creates invoice', 'Record Goods Receipt', 'Cancel Invoice Receipt', 'Record Invoice Receipt', 'Record Invoice Receipt', 'Remove Payment Block')
Pattern: ('Change Quantity', 'Vendor creates invoice', 'Vendor creates invoice', 'Record Invoice Receipt', 'Record Goods Receipt', 'Remove Payment Block', 'Record Invoice Receipt')
Pattern: ('Receive Order Confirmation', 'Vendor creates invoice', 'Record Invoice Receipt', 'Vendor creates debit memo', 'Cancel Subsequent Invoice', 'Remove Payment Block', 'Clear Invoice', 'Clear Invoice')

Unique Patterns in Without-Remove Dataset:
Pattern: ('Clear I

Including support values

In [18]:
patterns_with_remove_1 = {tuple(p[:len(p)-1]): p[-1] for p in sequential_patterns_with_remove}
patterns_without_remove_1 = {tuple(p[:len(p)-1]): p[-1] for p in sequential_patterns_without_remove}

common_patterns_1 = {k: (patterns_with_remove_1[k], patterns_without_remove_1[k]) for k in patterns_with_remove_1 if k in patterns_without_remove_1}

unique_to_with_remove_1 = {k: patterns_with_remove_1[k] for k in patterns_with_remove_1 if k not in patterns_without_remove_1}
unique_to_without_remove_1 = {k: patterns_without_remove_1[k] for k in patterns_without_remove_1 if k not in patterns_with_remove_1}

print("Common Patterns with Support in Both Datasets:")
for i, (pattern, (support_with, support_without)) in enumerate(common_patterns_1.items()):
    if i < 5: 
        print(f"Pattern: {pattern}, Support in With-Remove: {support_with}, Support in Without-Remove: {support_without}")

print("\nUnique Patterns in With-Remove Dataset:")
for i, (pattern, support) in enumerate(unique_to_with_remove_1.items()):
    if i < 5:  
        print(f"Pattern: {pattern}, Support: {support}")

print("\nUnique Patterns in Without-Remove Dataset:")
for i, (pattern_without, support_without) in enumerate(unique_to_without_remove_1.items()):
    if i < 5: 
        print(f"Pattern: {pattern_without}, Support: {support_without}")

Common Patterns with Support in Both Datasets:
Pattern: ('Record Invoice Receipt', 'Clear Invoice'), Support in With-Remove: 43292, Support in Without-Remove: 117793
Pattern: ('Create Purchase Order Item', 'Vendor creates invoice'), Support in With-Remove: 43269, Support in Without-Remove: 117032
Pattern: ('Record Goods Receipt', 'Clear Invoice'), Support in With-Remove: 43212, Support in Without-Remove: 117595
Pattern: ('Vendor creates invoice', 'Clear Invoice'), Support in With-Remove: 43205, Support in Without-Remove: 117559
Pattern: ('Create Purchase Order Item', 'Record Goods Receipt'), Support in With-Remove: 43150, Support in Without-Remove: 117734

Unique Patterns in With-Remove Dataset:
Pattern: ('Remove Payment Block', 'Clear Invoice'), Support: 43346
Pattern: ('Record Invoice Receipt', 'Remove Payment Block'), Support: 43299
Pattern: ('Record Invoice Receipt', 'Remove Payment Block', 'Clear Invoice'), Support: 43284
Pattern: ('Vendor creates invoice', 'Remove Payment Block')

Association Rule Mining for Sequential Patterns

In [19]:
transactions_with = [list(pattern) for pattern in patterns_with_remove_1.keys()]  # For 'With Remove Payment Block'
transactions_without = [list(pattern) for pattern in patterns_without_remove_1.keys()]  # For 'Without Remove Payment Block'

print("Association Rules for Transactions WITH 'Remove Payment Block")
itemsets_with, rules_with = apriori(transactions_with, min_support=0.01, min_confidence=0.7)

print("\nFrequent Itemsets (With):")
for itemset, support in itemsets_with.items():
    print(f"{itemset}: {support}")

print("\nAssociation Rules (With):")
for rule in rules_with:
    print(rule)

with open('rules_with_remove_payment_block.txt', 'w') as f:
    f.write("Frequent Itemsets (With):\n")
    for itemset, support in itemsets_with.items():
        f.write(f"{itemset}: {support}\n")
    f.write("\nAssociation Rules (With):\n")
    for rule in rules_with:
        f.write(str(rule) + '\n')


print("Association Rules for Transactions WITHOUT 'Remove Payment Block")
itemsets_without, rules_without = apriori(transactions_without, min_support=0.01, min_confidence=0.7)

print("\nFrequent Itemsets (Without):")
for itemset, support in itemsets_without.items():
    print(f"{itemset}: {support}")

print("\nAssociation Rules (Without):")
for rule in rules_without:
    print(rule)

with open('rules_without_remove_payment_block.txt', 'w') as f:
    f.write("Frequent Itemsets (Without):\n")
    for itemset, support in itemsets_without.items():
        f.write(f"{itemset}: {support}\n")
    f.write("\nAssociation Rules (Without):\n")
    for rule in rules_without:
        f.write(str(rule) + '\n')

Association Rules for Transactions WITH 'Remove Payment Block

Frequent Itemsets (With):
1: {('Remove Payment Block',): 141259, ('Clear Invoice',): 142871, ('Record Invoice Receipt',): 192614, ('Create Purchase Order Item',): 74242, ('Vendor creates invoice',): 171422, ('Record Goods Receipt',): 169955, ('Receive Order Confirmation',): 13195, ('Change Quantity',): 75325, ('Create Purchase Requisition Item',): 9095, ('Change Price',): 46159, ('Vendor creates debit memo',): 83438, ('Cancel Invoice Receipt',): 78356, ('Change Delivery Indicator',): 32431, ('Change Approval for Purchase Order',): 16680, ('Cancel Subsequent Invoice',): 12944, ('Record Service Entry Sheet',): 14352, ('Cancel Goods Receipt',): 13508, ('Release Purchase Order',): 12219, ('Reactivate Purchase Order Item',): 7273, ('SRM: Awaiting Approval',): 8403, ('SRM: Change was Transmitted',): 8913, ('SRM: Document Completed',): 9702, ('SRM: In Transfer to Execution Syst.',): 11153, ('SRM: Ordered',): 10948, ('SRM: Complete

comparisson

In [21]:
rules_with_set = set(rules_with)
rules_without_set = set(rules_without)

intersection_rules = rules_with_set & rules_without_set
difference_with = rules_with_set - rules_without_set
difference_without = rules_without_set - rules_with_set

print("\nIntersection of Rules:")
for rule in intersection_rules:
    print(rule)

print("\nRules in 'With Remove' but not in 'Without Remove':")
for rule in difference_with:
    print(rule)

print("\nRules in 'Without Remove' but not in 'With Remove':")
for rule in difference_without:
    print(rule)

# with open('intersection_rules.txt', 'w') as f:
#     f.write("Intersection of Rules:\n")
#     for rule in intersection_rules:
#         f.write(str(rule) + '\n')

# with open('difference_with_rules.txt', 'w') as f:
#     f.write("Rules in 'With Remove' but not in 'Without Remove':\n")
#     for rule in difference_with:
#         f.write(str(rule) + '\n')

# with open('difference_without_rules.txt', 'w') as f:
#     f.write("Rules in 'Without Remove' but not in 'With Remove':\n")
#     for rule in difference_without:
#         f.write(str(rule) + '\n')


Intersection of Rules:
{Cancel Invoice Receipt, Vendor creates invoice} -> {Record Invoice Receipt} (conf: 0.780, supp: 0.065, lift: 1.445, conv: 2.092)
{Clear Invoice, Record Goods Receipt, Vendor creates invoice} -> {Record Invoice Receipt} (conf: 0.735, supp: 0.131, lift: 1.361, conv: 1.735)
{Clear Invoice, Record Goods Receipt, Vendor creates debit memo, Vendor creates invoice} -> {Record Invoice Receipt} (conf: 0.766, supp: 0.030, lift: 1.419, conv: 1.967)
{Clear Invoice, Create Purchase Order Item, Vendor creates debit memo} -> {Record Invoice Receipt} (conf: 0.728, supp: 0.022, lift: 1.349, conv: 1.694)
{Clear Invoice, Vendor creates debit memo, Vendor creates invoice} -> {Record Invoice Receipt} (conf: 0.764, supp: 0.050, lift: 1.416, conv: 1.952)
{Cancel Invoice Receipt, Change Quantity, Clear Invoice} -> {Record Invoice Receipt} (conf: 0.736, supp: 0.011, lift: 1.362, conv: 1.740)
{Release Purchase Order} -> {Change Approval for Purchase Order} (conf: 0.826, supp: 0.043, lif