In [3]:
!ollama list-remote

Error: unknown command "list-remote" for "ollama"


In [9]:
import os
os.environ["OLLAMA_DEVICE"] = "cpu"

In [5]:
#final chain-of-thought con csv mappings
#final chain-of-thought
import os
import xml.etree.ElementTree as ET
from itertools import combinations
import ollama
import csv
import re

# -----------------------------
# Forza l'uso della CPU per evitare problemi di memoria
# -----------------------------
os.environ["OLLAMA_DEVICE"] = "cpu"

# -----------------------------
# Funzioni di supporto
# -----------------------------
def extract_task_names(file_path):
    """Estrae i nomi dei task da un file BPMN."""
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        ns = {'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL'}
        task_names = []
        for task_type in ['task', 'userTask', 'manualTask', 'serviceTask', 'scriptTask',
                          'businessRuleTask', 'sendTask', 'receiveTask', 'callActivity']:
            for task in root.findall(f".//bpmn:{task_type}", ns):
                name = task.attrib.get('name')
                if name:
                    task_names.append(name)
        return task_names
    except ET.ParseError as e:
        print(f"Errore parsing BPMN file {file_path}: {e}")
        return []
    except FileNotFoundError:
        print(f"File non trovato: {file_path}")
        return []

def extract_mappings_from_response(text):
    """
    Estrae le mapping dalla risposta LLama usando regex robuste.
    """
    pattern = r'"(.+?)"\s*->\s*"(.+?)"\s*\[(VB|MC|HR),\s*similarity:\s*([0-9.]+)\]'
    matches = re.findall(pattern, text)
    mappings = []
    for a, b, category, sim in matches:
        mappings.append({
            "task_model_1": a,
            "task_model_2": b,
            "similarity": sim,
            "category": category,
        })
    return mappings

def compare_bpmn_files(file1_path, file2_path, output_dir="results"):
    """Confronta due modelli BPMN usando Llama3 su CPU e salva il risultato su file."""
    os.makedirs(output_dir, exist_ok=True)

    name_1 = os.path.basename(file1_path)
    name_2 = os.path.basename(file2_path)

    tasks1 = extract_task_names(file1_path)
    tasks2 = extract_task_names(file2_path)

    if not tasks1 or not tasks2:
        print(f"Skipping comparison for {name_1} vs {name_2} due to missing tasks.")
        return

    prompt = f"""
# Context and Role Specification 
You are a process analysis expert specialized in BPMN comparison, process similarity, and model alignment.

# Scope and Task Definition 
Compare two BPMN models and identify correspondences (mappings) between their components.
Then perform a structured self-critique and correction phase before producing final metrics.

Input:
BPMN Model 1: {name_1}
{chr(10).join(tasks1)}

BPMN Model 2: {name_2}
{chr(10).join(tasks2)}

#  Procedure Design and Output Structuring 
1) Identify all **1:1 correspondences** between elements of the two models.
   (If 1:N or N:M exist, mention them briefly, but focus on 1:1.)
2) For each mapping, compute a **similarity score (0‚Äì1)** using cosine similarity between vector embeddings of the element labels and context features.  
   This score combines lexical, semantic, and functional resemblance.
3) Classify each mapping as:
   - VB (Verbatim): similarity > 0.90
   - MC (Modified Copy): similarity 0.65‚Äì0.90
   - HR (High Revision): similarity < 0.65
4) Output mappings **grouped by category** (VB first, then MC, then HR) in this format:
   "Element A" -> "Element B" [VB/MC/HR, similarity: x.xx]
   Include a short justification (1‚Äì2 lines) for each mapping.
5) After listing all mappings, perform a **self-critique and correction** phase:
   - Identify potential errors or ambiguities (misclassifications, vague tasks, naming issues).
   - Explain reasoning limits (missing context, unclear actor, inconsistent wording).
   - Propose corrected or adjusted mappings when needed.
   - Clearly separate **revised mappings** from initial ones.
6) Conclude with **final metrics**:
   - Total tasks in each model
   - Count + % of VB, MC, HR
   - Ambiguous / duplicated / missing tasks
   - Global similarity score (weighted average of mapping similarities using cosine similarity)

Expected output sections:
A) Initial mappings (VB ‚Üí MC ‚Üí HR)
B) Self-critique and revised mappings
C) Final metrics and global similarity

# Example Integration (Few-Shot In-Context Demonstration) 
## 1:1 Examples
"Record Payment" -> "Record Payment" [VB, similarity: 1.00]  
Explanation: identical label and meaning.

"Approve Order" -> "Approve Purchase Order" [MC, similarity: 0.83]  
Explanation: same intent, minor lexical addition.

"Create Contract" -> "Draft Agreement" [HR, similarity: 0.47]  
Explanation: significantly revised meaning and terminology.

## 1:N Example
"Send Invoice" -> ["Generate Invoice", "Email Invoice"] [similarity: 0.58]  
Explanation: original task split into multiple steps; functionally equivalent but more granular.

# === Step 5: Verification, Termination, and Iterative Refinement ===
After producing the initial mappings:
- Verify internal consistency across VB, MC, HR classifications.
- Re-evaluate any ambiguous or borderline mappings.
- If errors or mismatches are found, iteratively refine similarity scores and category labels (based on cosine similarity).
- Terminate once mappings, critique, and metrics are coherent and internally justified.

# === Final Instruction ===
Execute all steps in order: mappings by category ‚Üí self-critique and corrections ‚Üí metrics ‚Üí verification and refinement.
Return the complete structured analysis in English.
"""

    try:
        response = ollama.chat(model='llama3:latest', messages=[{'role': 'user', 'content': prompt}])
        content = response['message']['content']

        # Salva il risultato su file
        filename = f"{output_dir}/comparison_{name_1}_vs_{name_2}.txt"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)

        print(f"‚úÖ Comparison saved: {filename}")

        # === Salvataggio mapping in CSV ===
        mappings = extract_mappings_from_response(content)
        csv_name = f"{output_dir}/initial_mapping_{name_1}_vs_{name_2}.csv"

        with open(csv_name, "w", newline="", encoding="utf-8") as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=["task_model_1", "task_model_2", "similarity", "category"])
            writer.writeheader()
            writer.writerows(mappings)

        print(f"üìÑ Initial mapping CSV saved: {csv_name}")

    except Exception as e:
        print(f"Errore durante la comparazione {name_1} vs {name_2}: {e}")

def batch_compare_bpmn_files(bpmn_files, batch_size=2):
    """Esegue il confronto di tutti i file BPMN nella lista, batch ridotti per CPU."""
    for i in range(0, len(bpmn_files), batch_size):
        batch = bpmn_files[i:i+batch_size]
        for file1, file2 in combinations(batch, 2):
            compare_bpmn_files(file1, file2)

# -----------------------------
# Lista dei file BPMN
# -----------------------------
bpmn_files = [
    "Cologne.bpmn",
    "Frankfurt.bpmn",
    "IIS_Erlangen.bpmn",
    "Fu_Berlin.bpmn",
    "Hohenheim.bpmn",
    "Potsdam.bpmn",
    "Muenster.bpmn",
    "Tu_Munich.bpmn",
    "Wuerzburg.bpmn"
]

# Avvia il batch (batch_size piccolo per evitare errori di memoria)
batch_compare_bpmn_files(bpmn_files, batch_size=2)


‚úÖ Comparison saved: results/comparison_Cologne.bpmn_vs_Frankfurt.bpmn.txt
üìÑ Initial mapping CSV saved: results/initial_mapping_Cologne.bpmn_vs_Frankfurt.bpmn.csv
‚úÖ Comparison saved: results/comparison_IIS_Erlangen.bpmn_vs_Fu_Berlin.bpmn.txt
üìÑ Initial mapping CSV saved: results/initial_mapping_IIS_Erlangen.bpmn_vs_Fu_Berlin.bpmn.csv
‚úÖ Comparison saved: results/comparison_Hohenheim.bpmn_vs_Potsdam.bpmn.txt
üìÑ Initial mapping CSV saved: results/initial_mapping_Hohenheim.bpmn_vs_Potsdam.bpmn.csv
‚úÖ Comparison saved: results/comparison_Muenster.bpmn_vs_Tu_Munich.bpmn.txt
üìÑ Initial mapping CSV saved: results/initial_mapping_Muenster.bpmn_vs_Tu_Munich.bpmn.csv


In [13]:
#final error-guided prompt
import os
import xml.etree.ElementTree as ET
from itertools import combinations
import ollama

# -----------------------------
# Forza l'uso della CPU per evitare problemi di memoria
# -----------------------------
os.environ["OLLAMA_DEVICE"] = "cpu"

# -----------------------------
# Carica il CSV unificato
# -----------------------------
initial_mapping_csv_path = "results/mappings_dataset.csv"
try:
    with open(initial_mapping_csv_path, "r", encoding="utf-8") as f:
        initial_mapping_csv = f.read()
except FileNotFoundError:
    print(f"‚ö†Ô∏è CSV unificato non trovato: {initial_mapping_csv_path}")
    initial_mapping_csv = "NO_MAPPING_CSV_FOUND"

# Placeholder per ground truth e refinement rules se non sono separati
ground_truth_mappings = "GROUND_TRUTH_NOT_PROVIDED"
refinement_rules = "REFINEMENT_RULES_NOT_PROVIDED"

# -----------------------------
# Funzioni di supporto
# -----------------------------
def extract_task_names(file_path):
    """Estrae i nomi dei task da un file BPMN."""
    try:
        tree = ET.parse(file_path)
        root = tree.getroot()
        ns = {'bpmn': 'http://www.omg.org/spec/BPMN/20100524/MODEL'}
        task_names = []
        for task_type in ['task', 'userTask', 'manualTask', 'serviceTask', 'scriptTask',
                          'businessRuleTask', 'sendTask', 'receiveTask', 'callActivity']:
            for task in root.findall(f".//bpmn:{task_type}", ns):
                name = task.attrib.get('name')
                if name:
                    task_names.append(name)
        return task_names
    except ET.ParseError as e:
        print(f"Errore parsing BPMN file {file_path}: {e}")
        return []
    except FileNotFoundError:
        print(f"File non trovato: {file_path}")
        return []

def compare_bpmn_files(file1_path, file2_path, output_dir="results"):
    """Confronta due modelli BPMN usando Llama3 su CPU e salva il risultato su file."""
    os.makedirs(output_dir, exist_ok=True)

    name_1 = os.path.basename(file1_path)
    name_2 = os.path.basename(file2_path)

    tasks1 = extract_task_names(file1_path)
    tasks2 = extract_task_names(file2_path)

    if not tasks1 or not tasks2:
        print(f"Skipping comparison for {name_1} vs {name_2} due to missing tasks.")
        return

    # -----------------------------
    # Prompt aggiornato con CSV unificato
    # -----------------------------
    prompt = f"""
# Context and Role
You are a process alignment expert specialized in BPMN comparison, semantic similarity, and self-evaluation.

# Input Data
Initial Mapping CSV:
{initial_mapping_csv}

Ground Truth Mappings:
{ground_truth_mappings}

Refinement Rules from Training:
{refinement_rules}

# Scope and Task Definition
1) Apply the refinement rules to guide detection of errors and corrections.
2) Identify errors in the CSV:
   - False positives (mapping present but not in ground truth)
   - False negatives (mapping missing but present in ground truth)
   - Category errors (VB vs MC vs HR)
3) Propose corrected mappings based on the refinement rules.
4) Compute evaluation metrics:
   - Precision = TP / (TP + FP)
   - Recall = TP / (TP + FN)
   - F1 = harmonic mean of Precision and Recall

# Procedure Design and Output Structuring
A) SelfCritique (detected errors + revised mappings using refinement rules)
B) Evaluation (metrics vs. ground truth)

# Verification, Termination, and Iterative Refinement
Terminate once mappings, critique, and metrics are coherent and internally justified.
Return the output in English and structured JSON-like blocks.
"""

    try:
        response = ollama.chat(model='llama3:latest', messages=[{'role': 'user', 'content': prompt}])
        content = response['message']['content']

        # Salva il risultato su file
        filename = f"{output_dir}/comparison_{name_1}_vs_{name_2}.txt"
        with open(filename, "w", encoding="utf-8") as f:
            f.write(content)

        print(f"‚úÖ Comparison saved: {filename}")

    except Exception as e:
        print(f"Errore durante la comparazione {name_1} vs {name_2}: {e}")

def batch_compare_bpmn_files(bpmn_files, batch_size=2):
    """Esegue il confronto di tutti i file BPMN nella lista, batch ridotti per CPU."""
    for i in range(0, len(bpmn_files), batch_size):
        batch = bpmn_files[i:i+batch_size]
        for file1, file2 in combinations(batch, 2):
            compare_bpmn_files(file1, file2)

# -----------------------------
# Lista dei file BPMN
# -----------------------------
bpmn_files = [
    "IIS_Erlangen - Copia (1).bpmn",
    "Fu_Berlin (2).bpmn",
    "Hohenheim - Copia (2).bpmn",
    "Potsdam - Copia (1).bpmn",
    "Muenster - Copia (1).bpmn",
    "Tu_Munich - Copia (1).bpmn",
    "Wuerzburg (1).bpmn"
]

# Avvia il batch (batch_size piccolo per evitare errori di memoria)
batch_compare_bpmn_files(bpmn_files, batch_size=2)


‚úÖ Comparison saved: results/comparison_IIS_Erlangen - Copia (1).bpmn_vs_Fu_Berlin (2).bpmn.txt
‚úÖ Comparison saved: results/comparison_Hohenheim - Copia (2).bpmn_vs_Potsdam - Copia (1).bpmn.txt
‚úÖ Comparison saved: results/comparison_Muenster - Copia (1).bpmn_vs_Tu_Munich - Copia (1).bpmn.txt
