# Template Definition

In [4]:
ROOT_FOLDER = ""

In [5]:
from langchain_core.prompts import ChatPromptTemplate
prompt_template = ChatPromptTemplate([
    ("system", """You are a helpful assistant expert of conceptual modeling, information extraction and UML modeling.
                  
                  You will be asked by the user to create a plant UMl model from specification text. Do so in the most
                  clear way possible, avoid class properties and assign molteplicity. 

                  Do not include attributes for classes. For example the class Book would be:

                  class Book{{}}

                  Use only bi-directional arc for relations and no description. For example a relation between
                  the class Book and the class Page, if the Book can have from one to many pages and the 
                  pages could have exactly one book, would be:

                  Book "1..1" -- "1..*" Page

                  Adapt the cardinality to each case. If the cardinality would be "0..*", the default one, omit it.

                  The plantuml has to be the class diagram. In generating the diagram perform this steps in order

                  1. Extract class from text
                  2. Extract relations form text
                  3. Assign the relation to the corresponding class
                  4. Add cardinality to the relations

                  Put everything in this order: first all classes and then all relations. In our example would be:

                  class Book{{}}
                  class Page{{}}

                  Book "1..1" -- "1..*" Page


                  Output plantuml without futher text or explaination.
                  """),
    ("user", "Transform into plant uml this specification text: {text}")
])


""" Step 1. Class
    Step 2. Association
    Step 3. Cardinaliy one by one"""

' Step 1. Class\n    Step 2. Association\n    Step 3. Cardinaliy one by one'

In [6]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate
prompt_template = PromptTemplate.from_template(
"""
You will be asked by the user to create a plant UMl model from specification text. Do so in the most
clear way possible, avoid class properties and assign molteplicity. 

Do not include attributes for classes. For example the class Book would be:

class Book{{}}

Use only bi-directional arc for relations and no description. For example a relation between
the class Book and the class Page, if the Book can have from one to many pages and the 
pages could have exactly one book, would be:

Book "1..1" -- "1..*" Page

Adapt the cardinality to each case.

The plantuml has to be the class diagram. In generating the diagram perform this steps in order

1. Extract class from text
2. Extract relations form text
3. Assign the relation to the corresponding class
4. Add cardinality to the relations

Put everything in this order: first all classes and then all relations. In our example would be:

@startuml

class Book{{}}
class Page{{}}

Book "1..1" -- "1..*" Page

@enduml

Output plantuml without futher text or explaination.

##############

The specification text is:

{text}

##############

The uml output is:
"""
)

In [7]:
import sys
import threading
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread

def quit_function(fn_name):
    # print to stderr, unbuffered in Python 2.
    print('{0} took too long'.format(fn_name), file=sys.stderr)
    sys.stderr.flush() # Python 3 stderr is likely buffered.
    thread.interrupt_main() # raises KeyboardInterrupt
    
def exit_after(s):
    '''
    use as decorator to exit process if 
    function takes longer than s seconds
    '''
    def outer(fn):
        def inner(*args, **kwargs):
            timer = threading.Timer(s, quit_function, args=[fn.__name__])
            timer.start()
            try:
                result = fn(*args, **kwargs)
            finally:
                timer.cancel()
            return result
        return inner
    return outer

In [8]:
import os
from tqdm import tqdm
from docx import Document

from langchain_core.tracers.context import tracing_v2_enabled

@exit_after(360)
def run_chain(chain, text):
    return chain.invoke({"text": text})

def process_subfolders_with_chain(root_folder_path, chain, type=''):
    """
    Explores subfolders of the root folder (depth 1), processes each subfolder's `text.txt`
    with the provided LangChain chain, and saves the result in a new file in the same folder.

    Args:
        root_folder_path (str): Path to the root folder.
        chain: A LangChain chain instance to process text inputs.
    """
    for subfolder_name in tqdm(os.listdir(root_folder_path)):
        subfolder_path = os.path.join(root_folder_path, subfolder_name)
        
        # Ensure the current item is a subfolder
        if os.path.isdir(subfolder_path):
            text_file_path = os.path.join(subfolder_path, "text.txt")
            
            # Check if `text.txt` exists in the subfolder
            if not os.path.isfile(text_file_path):
                # If `text.txt` is missing, check for any .docx file in the subfolder
                docx_files = [f for f in os.listdir(subfolder_path) if f.endswith(".docx")]
                if docx_files:
                    docx_file_path = os.path.join(subfolder_path, docx_files[0])
                    # Extract content from the .docx file
                    doc = Document(docx_file_path)
                    text_content = "\n".join([paragraph.text for paragraph in doc.paragraphs])

                    # Save the extracted content to `text.txt`
                    with open(text_file_path, "w", encoding="utf-8") as text_file:
                        text_file.write(text_content)

            # Recheck if `text.txt` now exists
            if os.path.isfile(text_file_path):
                with open(text_file_path, "r", encoding="utf-8") as file:
                    text = file.read()

                with tracing_v2_enabled():
                    # Call the LangChain chain with the input dictionary
                    try:
                        result = run_chain(chain, text)
                    except:
                        result = ""

                # Save the result to a new file in the same subfolder
                result_file_path = os.path.join(subfolder_path, f"result_{type}.txt")
                with open(result_file_path, "w", encoding="utf-8") as result_file:
                    result_file.write(result)

In [9]:
def delete_result_txt_files(root_folder_path):
    """
    Deletes every .txt file that starts with 'result_' in the subfolders of the root folder (depth 1).

    Args:
        root_folder_path (str): Path to the root folder.
    """
    for subfolder_name in os.listdir(root_folder_path):
        subfolder_path = os.path.join(root_folder_path, subfolder_name)
        
        # Ensure the current item is a subfolder
        if os.path.isdir(subfolder_path):
            for file_name in os.listdir(subfolder_path):
                if "mlx-community_Llama-3.2-3B-Instruct-4bit" in file_name and file_name.endswith(".txt"):
                    file_path = os.path.join(subfolder_path, file_name)
                    os.remove(file_path)

In [14]:
#delete_result_txt_files(ROOT_FOLDER)

# Zero Shot Open-AI

In [10]:
from dotenv import load_dotenv
assert load_dotenv()

In [13]:
MODEL_OPEN_AI = ["o3-mini", "gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"]

In [11]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from IPython.display import display_markdown

In [10]:
def open_ai_zero(model):
    model_ = ChatOpenAI(model=model)
    chain = prompt_template | model_ | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [11]:
def open_ai_make_example(model):
    model = ChatOpenAI(model=model)
    chain = prompt_template | model | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [14]:
display_markdown(open_ai_make_example(MODEL_OPEN_AI[0]), raw=True)

@startuml

class InsuranceCompany{}
class Customer{}
class InsurancePolicy{}
class Broker{}
class HeadOffice{}
class AccountManager{}
class HelpDesk{}
class Offer{}
class Contract{}
class Invoice{}
class ClaimCase{}
class Estimator{}
class Report{}
class CompensationDecision{}
class Payment{}

InsuranceCompany "1..1" -- "0..*" Customer
InsuranceCompany "1..1" -- "1..*" InsurancePolicy
Customer "1..1" -- "1..1" Broker
HeadOffice "1..1" -- "0..*" Broker
Customer "1..1" -- "1..1" AccountManager
HelpDesk "1..1" -- "1..1" AccountManager
Customer "1..1" -- "0..1" Offer
Offer "1..1" -- "0..1" Contract
Contract "1..1" -- "1..1" AccountManager
Customer "1..1" -- "0..*" Invoice
Customer "1..1" -- "0..*" ClaimCase
InsuranceCompany "1..1" -- "0..*" ClaimCase
ClaimCase "1..1" -- "1..*" Estimator
Estimator "1..1" -- "1..*" Report
ClaimCase "1..1" -- "0..1" CompensationDecision
CompensationDecision "1..1" -- "1..1" Payment
Customer "1..1" -- "0..1" Payment

@enduml

In [18]:
for model in MODEL_OPEN_AI[1:2]:
    print(f"Zero shot with {model}")
    open_ai_zero(model)

Zero shot with gpt-4o-mini


100%|███████████████████████████████████████████| 48/48 [00:55<00:00,  1.15s/it]


# Zero Shot Open LLM

In [17]:
import torch, gc

## Deepseek

In [21]:
from langchain_deepseek import ChatDeepSeek

In [42]:
MODEL_DEEPSEEK = ["deepseek-chat"]#, "deepseek-reasoner"]

In [43]:
def deepseek_make_example(model):
    model = llm = ChatDeepSeek(
            model=model,
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
            # api_key="...",
            # other params...
        )
    chain = prompt_template | model | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [45]:
def deepseek_zero(model):
    model_ = ChatDeepSeek(model=model)
    chain = prompt_template | model_ | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [44]:
%%time
display_markdown(deepseek_make_example(MODEL_DEEPSEEK[0]), raw=True)

@startuml

class AlphaInsurance{}
class Customer{}
class Broker{}
class InsurancePolicy{}
class Contract{}
class Offer{}
class Claim{}
class ClaimCase{}
class Estimator{}
class CompensationDecision{}
class Report{}

AlphaInsurance "1..1" -- "1..*" Customer
Customer "1..1" -- "1..1" Broker
Customer "1..1" -- "0..*" InsurancePolicy
Broker "1..1" -- "0..*" InsurancePolicy
InsurancePolicy "1..1" -- "1..1" Contract
Contract "1..1" -- "0..*" Offer
Customer "1..1" -- "0..*" Claim
Claim "1..1" -- "1..*" ClaimCase
ClaimCase "1..1" -- "1..*" Estimator
ClaimCase "1..1" -- "1..1" CompensationDecision
Estimator "1..1" -- "1..*" Report

@enduml

CPU times: user 95.4 ms, sys: 14.3 ms, total: 110 ms
Wall time: 18.1 s


In [46]:
for model in MODEL_DEEPSEEK:
    print(f"Zero shot with {model}")
    deepseek_zero(model)

Zero shot with deepseek-chat


100%|███████████████████████████████████████████| 48/48 [08:40<00:00, 10.83s/it]


Zero shot with deepseek-reasoner


run_chain took too long                       | 6/48 [12:21<1:26:04, 122.96s/it]
run_chain took too long                       | 8/48 [18:21<1:39:12, 148.82s/it]
run_chain took too long                       | 9/48 [24:22<2:10:18, 200.47s/it]
run_chain took too long                      | 11/48 [30:22<1:58:29, 192.14s/it]
run_chain took too long                      | 14/48 [44:57<2:17:39, 242.93s/it]
run_chain took too long                      | 17/48 [54:11<1:49:02, 211.06s/it]
run_chain took too long                    | 22/48 [1:10:49<1:26:27, 199.51s/it]
run_chain took too long█                   | 24/48 [1:22:09<1:41:46, 254.44s/it]
run_chain took too long█▊                  | 25/48 [1:28:09<1:47:48, 281.23s/it]
run_chain took too long██▌                 | 26/48 [1:34:10<1:50:49, 302.24s/it]
run_chain took too long████▉               | 29/48 [1:40:11<1:05:03, 205.46s/it]
run_chain took too long█████▊              | 30/48 [1:46:11<1:11:02, 236.79s/it]
run_chain took too long█████

## Sonnet

In [30]:
from langchain_anthropic import ChatAnthropic

In [36]:
MODEL_ANTHROPIC = ["claude-3-7-sonnet-20250219"]

In [34]:
def anthropic_make_example(model):
    model = ChatAnthropic(model=model,temperature=0,
    max_tokens=1024,
    timeout=None,
    max_retries=2,)
    chain = prompt_template | model | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [40]:
def anthropic_zero(model):
    model_ = ChatAnthropic(model=model)
    chain = prompt_template | model_ | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [38]:
display_markdown(anthropic_make_example(MODEL_ANTHROPIC[0]), raw=True)

```
@startuml

class Customer{}
class Broker{}
class InsurancePolicy{}
class Contract{}
class Offer{}
class Invoice{}
class Claim{}
class ClaimCase{}
class Estimator{}
class CompensationDecision{}
class Payment{}
class Report{}

Customer "1..*" -- "1..1" Broker
Customer "1..*" -- "0..*" InsurancePolicy
Customer "1..*" -- "0..*" Contract
Customer "1..*" -- "0..*" Offer
Customer "1..*" -- "0..*" Invoice
Customer "1..1" -- "0..*" Claim
Claim "1..1" -- "1..*" ClaimCase
ClaimCase "1..*" -- "1..*" Estimator
Estimator "1..*" -- "1..*" Report
ClaimCase "1..1" -- "0..1" CompensationDecision
CompensationDecision "1..1" -- "0..1" Payment
Contract "1..1" -- "1..*" InsurancePolicy

@enduml
```

In [41]:
for model in MODEL_ANTHROPIC:
    print(f"Zero shot with {model}")
    anthropic_zero(model)

Zero shot with claude-3-7-sonnet-20250219


100%|███████████████████████████████████████████| 48/48 [01:47<00:00,  2.23s/it]


## Ollama

In [18]:
from langchain_ollama import ChatOllama

In [19]:
MODEL_OLLAMA = [] #["llama3.2:3b-text-fp16"]

In [20]:
def ollama_zero(model):
    model_ = ChatOllama(
        model=model,
        temperature=0,
        timeout = 3,
    )
    chain = prompt_template | model_ | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [21]:
def ollama_make_example(model):
    model = ChatOllama(
        model=model,
        temperature=0,
        timeout = 3,
    )
    chain = prompt_template | model | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res
    

In [22]:
#display_markdown(ollama_make_example(MODEL_OLLAMA[0]), raw=True)

In [23]:
for model in MODEL_OLLAMA:
    print(f"Zero shot with {model}")
    ollama_zero(model)
    gc.collect()
    torch.mps.empty_cache()

## Huggingface

In [24]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace, HuggingFaceEndpoint
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

  Referenced from: <9DBE5D5C-AC87-30CA-96DA-F5BC116EDA2B> /Users/marcocalamo/anaconda3/lib/python3.11/site-packages/torchvision/image.so
  warn(


In [25]:
MODEL_HUGGINGFACE = [] #["Qwen/Qwen2.5-3B-Instruct", "microsoft/Phi-3-mini-4k-instruct", "google/gemma-2-27b-it"]

In [26]:
def huggingface_zero(model):
    model_id = model
    
    llm = HuggingFaceEndpoint(
        repo_id=model_id,
        task="text-generation",
        max_new_tokens=2048,
        do_sample=False,
        repetition_penalty=1.03,
        temperature=0.01,
    )

    chat = ChatHuggingFace(llm=llm, verbose=True)
    
    chain = prompt_template | chat | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model_id.replace("/","_"))

In [27]:
def huggingface_make_example(model):

    model_id = model
    
    

    llm = HuggingFacePipeline.from_model_id(
        model_id=model_id,
        task="text-generation",
        pipeline_kwargs={"temperature": 0.1, "max_new_tokens": 1024}
    )

    chat = ChatHuggingFace(llm=llm, verbose=True)
    
    chain = prompt_template | chat | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
        As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.
        
        In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 
        
        """})
    return res


In [28]:
#display_markdown(huggingface_make_example(MODEL_HUGGINGFACE[0]), raw=True)

In [29]:
for model in MODEL_HUGGINGFACE:
    print(f"Zero shot with {model}")
    huggingface_zero(model)
    gc.collect()
    torch.mps.empty_cache()

## Mlx-LLM

In [2]:
from langchain_community.llms import MLXPipeline
from langchain_community.chat_models import ChatMLX

In [3]:
MODEL_MLX = ["mlx-community/phi-4-8bit",
             "mlx-community/Falcon3-10B-Instruct-8bit", 
             "mlx-community/Qwen2.5-14B-Instruct-4bit",
             "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
             "mlx-community/DeepSeek-R1-Distill-Qwen-7B-8bit",
             "mlx-community/Llama-3.2-3B-Instruct",
             "mlx-community/gemma-2-9b-8bit",
             #"mlx-community/gemma-2-27b-it-4bit",
            # "mlx-community/Mamba-Codestral-7B-v0.1-8bit",
            #"mlx-community/CodeLlama-13b-Instruct-hf-4bit-MLX"
            ]

In [12]:
def mlx_zero(model):
    llm = MLXPipeline.from_model_id(
        model_id=model,
        pipeline_kwargs={"max_tokens": 15_000, "temp": 0.1},
    )
    chat = ChatMLX(llm=llm)
    chain = prompt_template | chat | StrOutputParser()
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model.replace("/","_"))

In [13]:
def mlx_make_example(model):
    llm = MLXPipeline.from_model_id(
        model_id=model,
        pipeline_kwargs={"max_tokens": 2000, "temp": 0.7},
    )
    chat = ChatMLX(llm=llm)
    chain = prompt_template | chat | StrOutputParser()
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res


In [14]:
display_markdown(mlx_make_example(MODEL_MLX[2]), raw=True)

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

@startuml

class Customer{}
class Broker{}
class Contract{}
class InsurancePolicy{}
class Claim{}
class Estimator{}
class CompensationDecision{}
class Document{}

Customer -- Broker
Customer -- Contract
Contract -- InsurancePolicy
Customer -- Claim
Claim -- Estimator
Claim -- CompensationDecision
CompensationDecision -- Document

@enduml

In [15]:
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [16]:
for model in MODEL_MLX[2:3]:
    print(f"Zero shot with {model}")
    mlx_zero(model)
    import gc, torch
    gc.collect()
    torch.mps.empty_cache()

Zero shot with mlx-community/Qwen2.5-14B-Instruct-4bit


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

100%|███████████████████████████████████████████| 48/48 [04:10<00:00,  5.21s/it]


## Clean Cache

In [17]:
import gc, torch
gc.collect()
torch.mps.empty_cache()

In [18]:
def check_relations(predicted, correct):
    """
        compute precision, recall, f1

        and score from 0 to 5 assigned this way:
            - 1 point if relation exists
            - up to 5 points for the cardinality
    """

    total_score = 0
    true_pos = 0
    tp_fn = len(correct)
    tp_fp = len(predicted)
    
    for rel in correct:
        score = 0
        for rel_ in predicted: 
            if set(rel.keys()) == set(rel_.keys()):
                true_pos += 1
                score += 1
                for r in rel.keys():
                    ## unify cardinality format
                    if ".." not in rel[r]:
                        rel[r] = '"' + rel[r].replace('"','') + ".."+ rel[r].replace('"','') +'"'

                    if ".." not in rel_[r]:
                        rel_[r] = '"' + rel_[r].replace('"','') + ".."+ rel_[r].replace('"','') + '"'
        
                    c_min = rel[r].split("..")[0]
                    c_min_ = rel_[r].split("..")[0]

                    if c_min == c_min_:
                        score+=1

                    c_max = rel[r].split("..")[1]
                    c_max_ = rel_[r].split("..")[1]

                    if c_max == c_max_:
                        score+=1

        total_score += score

    if true_pos > 0 :
        precision = true_pos / tp_fp 
        recall = true_pos / tp_fn 
        f1 = 2 * ((precision * recall) / (precision + recall))
    else:
        precision = 0
        recall = 0
        f1 = 0
    
    return {
        "total_score":total_score,
        "precision": round(precision,2),
        "recall": round(recall,2),
        "f1": round(f1,2),
        "len":len(correct) * 5
    }
            

In [19]:
def check_class(predicted, correct):
    """
    Computes precision, recall, and F1-score between the predicted and correct lists.

    Args:
        predicted (list): The list of predicted values.
        correct (list): The list of correct (ground truth) values.

    Returns:
        dict: A dictionary with precision, recall, and F1-score.
    """

    # precision: how many predicted that are in correct.
    if type(predicted) != type(set()):
        precision =  (len(set(predicted).intersection(set(correct)))) / len(set(predicted))
    else:
        precision =  (len((predicted).intersection((correct)))) / len((predicted))

    
    # recall : how many predicted are not in correct.
    if type(predicted) != type(set()):
        recall =  (len(set(predicted).intersection(set(correct)))) / len(set(correct)) 
    else:
        recall = (len((predicted).intersection((correct)))) / len((correct)) 

    # f1: classic f1
    f1 = 2 * ((precision * recall) / (precision + recall))

    return {
        "precision": round(precision,2),
        "recall": round(recall,2),
        "f1": round(f1,2)
    }

In [20]:
import re
def init_parser():
    with open("/Users/marcocalamo/Downloads/grammar.ebnf", encoding="utf-8") as grammar_file:
        parser = Lark(grammar_file.read())
        return parser
        
def parse_text(parser, ref):
    ref = ref.replace('`','')
    ref = ref.replace('plantuml','')

    if not '@startuml' in ref:
        ref = '@startuml\n' + ref
    if not '@enduml' in ref:
        ref = ref+'\n@enduml'

    ref = re.sub(r"<think>.*?</think>", "", ref, flags=re.DOTALL)
    
    #ref = ref.replace('@startuml','')
    #ref = ref.replace('@enduml','')
    parsed_ref = parser.parse(ref)
    return parsed_ref

def get_from_parsed(parsed_text, to_get="class"):
    found = parsed_text.find_pred(lambda v: v.data.value == to_get)
    
    to_ret = []

    for f in found:
        if to_get == 'relationship':
            one_rel = f.children[0].children            
            first_rel = one_rel[0].children[0].value
            assert one_rel[0].children[0].type == 'CNAME'

            try:
                first_card = one_rel[1].children[0].value
                assert one_rel[1].children[0].type == 'ESCAPED_STRING'
            except:
                first_card = "0..*"

            try:
                second_card = one_rel[5].children[0].value
                assert one_rel[5].children[0].type == 'ESCAPED_STRING'
            except:
                second_card = "0..*"

            second_rel = one_rel[-2].children[0].value
            assert one_rel[-2].children[0].type == 'CNAME'
            
            rel_tuple = {
                first_rel:first_card,
                second_rel:second_card
            }
            
            to_ret.append(rel_tuple)
        elif to_get == 'class':
            to_ret.append(f.children[0].children[0].value)
    
    return to_ret
    
def parse_path(path_to_uml, parser):
    
    if os.path.isfile(path_to_uml):
        with open(path_to_uml) as plant_ref:
            
            ref = plant_ref.read()
            parsed_ref = parse_text(parser, ref)
            found_class = get_from_parsed(parsed_ref, to_get="class")
            found_rel = get_from_parsed(parsed_ref, to_get="relationship")

            return found_class, found_rel
    else:
        raise FileNotFoundError
    

In [21]:
from lark import Lark

In [22]:
#parse_path("LouvreMuseum/uml.txt", init_parser())

In [23]:
def evaluate(path_uml, path_to_check, parser):

    # gold class and rel
    class_uml, rel_uml = parse_path(path_uml, parser)

    # candidate class and rel
    class_check, rel_check = parse_path(path_to_check, parser)

    # check class
    res_cl = check_class(class_check, class_uml)

    # check rel
    res_re = check_relations(rel_check, rel_uml)

    return res_cl, res_re

In [24]:
#evaluate("FactoriesAndProducts/uml.txt", "FactoriesAndProducts/result_gpt-4o-mini.txt",init_parser())

In [25]:
import os
import csv

def process_folders(root_folder):
    csv_file = os.path.join(root_folder, "evaluation_results.csv")
    header_written = False
    
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        
        for sub_folder in os.listdir(root_folder):
            sub_folder_path = os.path.join(root_folder, sub_folder)
            if not os.path.isdir(sub_folder_path):
                continue

            uml_file = os.path.join(sub_folder_path, "uml.txt")
            if not os.path.exists(uml_file):
                continue
            
            for filename in os.listdir(sub_folder_path):
                if filename.startswith("result_") and filename.endswith(".txt"):
                    result_file = os.path.join(sub_folder_path, filename)
                    model_name = "-".join(filename.replace(".txt", "").split("_")[1:])
                    parser = init_parser()
                    try:
                        results = evaluate(uml_file, result_file, parser)
                        if not header_written:
                            writer.writerow(["sub_folder_name", "model_name", 
                                             "precision_class", "recall_class", "f1_class",
                                             "precision_rel", "recall_rel", "f1_rel", "score_rel", "max_score"])
                            header_written = True
                        writer.writerow([sub_folder, model_name, results[0]["precision"], results[0]["recall"], results[0]["f1"],
                                        results[1]["precision"], results[1]["recall"], results[1]["f1"],
                                        results[1]["total_score"], results[1]["len"]])
                    except Exception as e:
                        print(e,sub_folder, model_name)
                        #raise Exception

In [26]:
#process_folders(ROOT_FOLDER)

In [27]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

def plot_data_class(csv):
    # Load the CSV file
    df = pd.read_csv(csv)
   # Compute average f1_class and f1_rel per model_name, and count instances
    avg_metrics = df.groupby('model_name').agg(
        f1_class_avg=('f1_class', 'mean'),
        count=('model_name', 'count')
    ).reset_index()
    
    # Set seaborn style
    sns.set_theme(style='whitegrid')
    
    # Plot average f1_class per model with count annotations
    plt.figure(figsize=(10, 5))
    ax = sns.barplot(x='model_name', y='f1_class_avg', data=avg_metrics, palette='husl', hue='model_name', legend=False)
    plt.title('Average F1 Class per Model')
    plt.xlabel('Model Name')
    plt.ylabel('Average F1 Class')
    plt.xticks(rotation=45)

    # Annotate counts on bars
    for index, row in avg_metrics.iterrows():
        ax.text(index, row.f1_class_avg, f'N={row["count"]}', ha='center', va='bottom', fontsize=10)

    plt.show()

def plot_data_rel(csv):
    df = pd.read_csv(csv)
    
    # Compute average f1_class and f1_rel per model_name, and count instances
    avg_metrics = df.groupby('model_name').agg(
        f1_class_avg=('f1_rel', 'mean'),
        count=('model_name', 'count')
    ).reset_index()
    
    # Set seaborn style
    sns.set_theme(style='whitegrid')
    
    # Plot average f1_class per model with count annotations
    plt.figure(figsize=(10, 5))
    ax = sns.barplot(x='model_name', y='f1_class_avg', data=avg_metrics, palette='husl', hue='model_name', legend=False)
    plt.title('Average F1 Rel per Model')
    plt.xlabel('Model Name')
    plt.ylabel('Average F1 Rel')
    plt.xticks(rotation=45)

    # Annotate counts on bars
    for index, row in avg_metrics.iterrows():
        ax.text(index, row.f1_class_avg, f'N={row["count"]}', ha='center', va='bottom', fontsize=10)

    plt.show()


def plot_data_score(csv):
    df = pd.read_csv(csv)
    
    # Compute average f1_class and f1_rel per model_name, and count instances
    avg_metrics = df.groupby('model_name').agg(
        f1_class_avg=('score_rel', 'mean'),
        count=('model_name', 'count')
    ).reset_index()
    
    # Set seaborn style
    sns.set_theme(style='whitegrid')
    
    # Plot average f1_class per model with count annotations
    plt.figure(figsize=(10, 5))
    ax = sns.barplot(x='model_name', y='f1_class_avg', data=avg_metrics, palette='husl',hue='model_name', legend=False)
    plt.title('Average Score rel per Model')
    plt.xlabel('Model Name')
    plt.ylabel('Average Score')
    plt.xticks(rotation=45)

    # Annotate counts on bars
    for index, row in avg_metrics.iterrows():
        ax.text(index, row.f1_class_avg, f'N={row["count"]}', ha='center', va='bottom', fontsize=10)

    plt.show()

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


In [28]:
#plot_data_class("evaluation_results.csv")

In [29]:
#plot_data_class("/evaluation_results_chat_zero.csv")

In [30]:
#plot_data_rel("evaluation_results.csv")

In [31]:
#plot_data_rel("evaluation_results_chat_zero.csv")

In [32]:
#plot_data_score("evaluation_results.csv")

In [33]:
#plot_data_score("evaluation_results_chat_zero.csv")