In [1]:
from sentence_transformers import SentenceTransformer, util
import pm4py
from pm4py.objects.bpmn.obj import BPMN
import torch
import torch.nn.functional as F
import numpy as np
import pandas as pd

In [2]:
bpmn = pm4py.read_bpmn('V_k09_text.bpmn')
model = SentenceTransformer("Maite89/Roberta_finetuning_semantic_similarity_stsb_multi_mt")
Activities_text=[]
relations=[('order is read from the automatic order management system', 'first product from the order is checked if in stock', 'Flow'),
('first product from the order is checked if in stock', 'withdrawn from the warehouse', 'XOR_gateway'),
('product is in stock', 'withdrawn from the warehouse', 'XOR_condition'),
('first product from the order is checked if in stock', 'reordered from the wholesaler', 'XOR_gateway'),
('product is not in stock', 'reordered from the wholesaler', 'XOR_condition'),
('reordered from the wholesaler', 'wait more then 10 days for the arrival of the product', 'XOR_gateway'),
('necessary to wait more then 10 days', 'delivery delay penalty is demanded from the wholesaler', 'XOR_condition'),
('wait less than or equal to 10 days', 'ordered product arrives', 'XOR_condition'),
('ordered product arrives', 'registered in the stock management system', 'Flow'),
('registered in the stock management system', 'whole order is ready for shipment', 'Flow'),
('whole order is ready for shipment', 'shipment process is created', 'XOR_gateway'),
('order is ready for shipment', 'shipment process is created', 'XOR_condition'),
('shipment process is created', 'courier is requested', 'parallel_gateway'),
('shipment process is created', 'products are packed', 'parallel_gateway'),
('products are packed', 'order is shipped', 'Flow'),
('courier is requested', 'order is shipped', 'Flow'),
('whole order is ready for shipment', 'next product from the order is selected', 'XOR_gateway'),
('order is not ready for shipment', 'next product from the order is selected', 'XOR_condition'),
('next product from the order is selected', 'first product from the order is checked if in stock', 'Flow')]
df_text = pd.DataFrame(columns=['Source', 'Target', 'Relation'])
for relation in relations:
    df_text = df_text.append({'Source': relation[0], 'Target': relation[1], 'Relation': relation[2]}, ignore_index=True)
    Activities_text.append(relation[1])
    if relation[2]!='XOR_condition':
        Activities_text.append(relation[0])
queries=list(set(Activities_text))

In [4]:
# def detect_text_and_model(bpmn,queries,model):
Activities_BPMN=[]
for node in bpmn.get_nodes():
    if(isinstance(node,BPMN.EndEvent) or isinstance(node,BPMN.StartEvent)):
        continue
    if(isinstance(node,BPMN.Activity) or isinstance(node,BPMN.Event)):
        Activities_BPMN.append(node.name)

# entity match
df = pd.DataFrame(columns=['text', 'BPMN', 'score'])
temp_list=[]
for query in queries:
    query_embedding = torch.FloatTensor(model.encode(query))
    scores=[]
    for activity in Activities_BPMN:
        activity_embedding = torch.FloatTensor(model.encode(activity))
        cos_sim = F.cosine_similarity(query_embedding, activity_embedding, dim=0)
        scores.append(cos_sim.item())
    # print(scores)
    max_score_pos=np.argmax(scores)
    max_score=np.max(scores)
    if(max_score<0.5):
        df = df.append({'text': query, 'BPMN': "No match found", 'score': max_score}, ignore_index=True)
    else:
        if(max_score_pos in temp_list):
            score_tep=df.loc[df['BPMN']==Activities_BPMN[max_score_pos]]['score'].tolist()[0]
            if(max_score>score_tep):
                df.loc[df['BPMN']==Activities_BPMN[max_score_pos],'BPMN']="No match found"
                df = df.append({'text': query, 'BPMN': Activities_BPMN[max_score_pos], 'score': max_score}, ignore_index=True)
            else:
                df = df.append({'text': query, 'BPMN': "No match found", 'score': max_score}, ignore_index=True)
        else:
            temp_list.append(max_score_pos)
            df = df.append({'text': query, 'BPMN': Activities_BPMN[max_score_pos], 'score': max_score}, ignore_index=True)
for i in range(len(Activities_BPMN)):
    if i not in temp_list:
        df = df.append({'text': "No match found", 'BPMN': Activities_BPMN[i], 'score': 0}, ignore_index=True)


temp=[]
for flow in bpmn.get_flows():
    temp.append([flow.source,flow.target,flow.get_name()])

for rel in temp:
    if(isinstance(rel[1],BPMN.Gateway) and len(rel[1].get_in_arcs())>1):
        rel[1]=rel[1].get_out_arcs()[0].target
        while(isinstance(rel[1],BPMN.Gateway)):
            rel[1]=rel[1].get_out_arcs()[0].target

temp1=[]
for rel in temp:
    if(rel[0]==rel[1]):
        continue
    elif(isinstance(rel[0],BPMN.Gateway) and len(rel[0].get_in_arcs())>1):
        continue
    else:
        temp1.append(rel)
df_BPMN = pd.DataFrame(columns=['Source', 'Target', 'Relation'])
for rel in temp1:
    if(isinstance(rel[1],BPMN.EndEvent) or isinstance(rel[0],BPMN.StartEvent)):
        continue
    if(isinstance(rel[0],BPMN.ExclusiveGateway)):
        # temp2.append([rel[0].get_in_arcs()[0].source, rel[1]])
        df_BPMN = df_BPMN.append({'Source': rel[0].get_in_arcs()[0].source.name, 'Target': rel[1].name, 'Relation': 'XOR_gateway'}, ignore_index=True)
        df_BPMN = df_BPMN.append({'Source': rel[2], 'Target': rel[1].name, 'Relation': 'XOR_condition'}, ignore_index=True)
        # print([rel[0].get_in_arcs()[0].source, rel[1]])
    elif(isinstance(rel[1],BPMN.ExclusiveGateway)):
        continue
    elif(isinstance(rel[0],BPMN.ParallelGateway)):
        df_BPMN = df_BPMN.append({'Source': rel[0].get_in_arcs()[0].source.name, 'Target': rel[1].name, 'Relation': 'parallel_gateway'}, ignore_index=True)
    elif(isinstance(rel[1],BPMN.ParallelGateway)):
        continue
    else:
        df_BPMN = df_BPMN.append({'Source': rel[0].name, 'Target': rel[1].name, 'Relation': 'Flow'}, ignore_index=True)

# replace
df_text_new = pd.DataFrame(columns=['Source', 'Target', 'Relation'])
for row in df_text.itertuples(index=False):
    source=row.Source
    target=row.Target
    relation=row.Relation
    if(source!="No match found"):
        if(len(df.loc[df['text']==source]['BPMN'].tolist())>0):
            if(df.loc[df['text']==source]['BPMN'].tolist()[0]!="No match found"):
                source=df.loc[df['text']==source]['BPMN'].tolist()[0]
    if(target!="No match found"):
        if(len(df.loc[df['text']==target]['BPMN'].tolist())>0):
            if(df.loc[df['text']==target]['BPMN'].tolist()[0]!="No match found"):
                target=df.loc[df['text']==target]['BPMN'].tolist()[0]
    df_text_new = df_text_new.append({'Source': source, 'Target': target, 'Relation': relation}, ignore_index=True)
df_text_new.loc[2,'Source']='Yes'
df_text_new.loc[4,'Source']='No'
df_text_new.loc[6,'Source']='Yes'
df_text_new.loc[7,'Source']='No'
df_text_new.loc[11,'Source']='Yes'
df_text_new.loc[17,'Source']='No'
list_text=[]
for row in df_text_new.itertuples(index=False):
    list_text.append((row.Source,row.Target,row.Relation))
list_BPMN=[]
for row in df_BPMN.itertuples(index=False):
    list_BPMN.append((row.Source,row.Target,row.Relation))
len(set(list_text).intersection(set(list_BPMN)))
precision=len(set(list_text).intersection(set(list_BPMN)))/len(list_text)
recall=len(set(list_text).intersection(set(list_BPMN)))/(len(list_BPMN))
print('precision:',precision)
print('recall:',recall)


precision: 0.6842105263157895
recall: 0.6190476190476191


In [5]:
# description transformation results
df_text_new

Unnamed: 0,Source,Target,Relation
0,Read order from the automatic order management...,Check if product is in stock,Flow
1,Check if product is in stock,Withdraw the Product from warehouse,XOR_gateway
2,Yes,Withdraw the Product from warehouse,XOR_condition
3,Check if product is in stock,Reorder from wholesaler,XOR_gateway
4,No,Reorder from wholesaler,XOR_condition
5,Reorder from wholesaler,wait more then 10 days for the arrival of the ...,XOR_gateway
6,Yes,Demand delivery delay penalty from the wholesaler,XOR_condition
7,No,Waiting until arrives,XOR_condition
8,Waiting until arrives,Register in stock management system,Flow
9,Register in stock management system,Check if the whole order is ready for shipment,Flow
