# Template Definition

In [1]:
ROOT_FOLDER = ""

In [2]:
prompt_template_class = """
Step 1:

You will be asked to extact, from text specification, the list of class to use in 
a uml conceptual model diagram. Include only classes relevant for conceptual modeling.

Choose the key words from text and output the results in a list form:

[ClassName1, ClassName2, ...]

i.e. for a Book text specification:

[Book, Page, Author, ...]

Output only the list and no other text.
Keep the list as short as possible and do not add irrelevant classes.

##############

The specification text is: 

{text}.

##############

The class list is:
"""

prompt_template_assoc =  """
Step 2:

You will be asked to extact, from text specification and a list of classes, the associations amongst
the classes to use in a uml conceptual model diagram. 

Include only associations relevant for conceptual modeling. Use only classes from the list.


Choose the key words from class list according to text specifications and output the results in a list form:

[(ClassName1, ClassName2), (ClassName1,Classname3), ...]

i.e. for a Book text specification:

[(Book, Page), (Book, Author), ...]

All associations are meant to be symmetrical. If (ClassName1, ClassName2) is in the association list,
there is no need to add (Classname2, ClassName1).

Output only the list and no other text.
Keep the list as short as possible and do not add irrelevant association.

##############

The class list is: 

{classes}

#############

The specification text is: 

{text}

##############

The association list is:
    
"""


prompt_template_card = """
Step 3:

You will be asked to extact, from text specification and a list of association, the cardinality for 
each association to use in a uml conceptual model diagram. 

Use only association from the list. The format of the cardinality is:

min_cardinality..max_cardinlaity

For example 1..100 has 1 as minimum cardinality and 100 as maximum cadinality.
Use the asterisk (*) for an undefined number of maximum cardinality,
and 0 for an undefined number of minumum cardinality.

If you are not sure assign default cardinality: 0..*

Decide the correct cardinality for each association from text and output the results in a list form:

[(ClassName1, Cadinality1 ,ClassName2, Cardinality2), ...]

i.e. for a Book text specification:

[(Book, 1..* ,Page, 1..1),  ...]

Output only the list and no other text.

##############

The association list is:

{association}

#############

The specification text is: 

{text}

##############

The cardinality list is:
"""
    

prompt_template_plant = """
Step 4:

You will be asked by the user to create a plant UMl model from a class list and a cardinality list.
Do so in the most clear way possible, avoid class properties and assign molteplicity. 

The format of class list is:

[ClassName1, ClassName2, ...]

The format of association list is:

[(ClassName1, Cadinality1 ,ClassName2, Cardinality2), ...]

for every class in class list write:

class ClassName1{{}}
class ClassName2{{}}

for every tuple in association list write:

ClassName1 "Cardinality1" -- "Cardinality2" Classname2.

Output plantuml without futher text or explaination. Use tags 
@startuml
at the beginning and 
@enduml 
at the end of the output.

##############

The class list is: 

{classes}

#############

##############

The cardinality list is:

{card}

#############

The plant uml is:
                                    
"""

In [3]:
from langchain_core.prompts import ChatPromptTemplate, PromptTemplate

In [4]:
from langchain.chains import LLMChain
from langchain.llms import OpenAI
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser

from langchain_ollama import ChatOllama

In [5]:
from langchain_core.runnables import RunnableParallel, RunnablePassthrough, RunnableLambda

In [6]:
def build_chian_from_llms(llm):
    prompt_class = PromptTemplate(
        input_variables=["text"],
        template = prompt_template_class                     
    )
    
    prompt_assoc = PromptTemplate(
        input_variables=["text","classes"],
        template = prompt_template_assoc                    
    )
    
    prompt_card = PromptTemplate(
        input_variables=["text","association"],
        template = prompt_template_card                  
    )
    
    prompt_uml = PromptTemplate(
        input_variables=["classes","association"],
        template = prompt_template_plant                
    )
    
    class_chain = prompt_class | llm | StrOutputParser()
    assoc_chain = prompt_assoc | llm | StrOutputParser()
    card_chain  =  prompt_card | llm | StrOutputParser()
    uml_chain   =   prompt_uml | llm | StrOutputParser()
    
    full_chain = (
         RunnableLambda(lambda x: {"text": x["text"]})
        | {
            "text": lambda x: x["text"],
            "classes": class_chain
        }
        | {
            "text": lambda x: x["text"],
            "classes": lambda x: x["classes"],
            "association": assoc_chain
        }
        | {
            "text": lambda x: x["text"],
            "association": lambda x: x["association"],
            "classes": lambda x: x["classes"],
            "card": card_chain
        }
        | {
            "classes": lambda x: x["classes"],
            "card": lambda x: x["card"],
            "uml": uml_chain
        } 
        | RunnableLambda(lambda x: x["uml"])
        | StrOutputParser()
    )
    return full_chain


In [7]:
import sys
import threading
from time import sleep
try:
    import thread
except ImportError:
    import _thread as thread

def quit_function(fn_name):
    # print to stderr, unbuffered in Python 2.
    print('{0} took too long'.format(fn_name), file=sys.stderr)
    sys.stderr.flush() # Python 3 stderr is likely buffered.
    thread.interrupt_main() # raises KeyboardInterrupt
    
def exit_after(s):
    '''
    use as decorator to exit process if 
    function takes longer than s seconds
    '''
    def outer(fn):
        def inner(*args, **kwargs):
            timer = threading.Timer(s, quit_function, args=[fn.__name__])
            timer.start()
            try:
                result = fn(*args, **kwargs)
            finally:
                timer.cancel()
            return result
        return inner
    return outer

In [8]:
import os
from tqdm import tqdm
from docx import Document

from langchain_core.tracers.context import tracing_v2_enabled

#@exit_after(360)
def run_chain(chain, text):
    return chain.invoke({"text": text})
    
def process_subfolders_with_chain(root_folder_path, chain, type=''):
    """
    Explores subfolders of the root folder (depth 1), processes each subfolder's `text.txt`
    with the provided LangChain chain, and saves the result in a new file in the same folder.

    Args:
        root_folder_path (str): Path to the root folder.
        chain: A LangChain chain instance to process text inputs.
    """
    for subfolder_name in tqdm(os.listdir(root_folder_path)):
        subfolder_path = os.path.join(root_folder_path, subfolder_name)
        
        # Ensure the current item is a subfolder
        if os.path.isdir(subfolder_path):
            text_file_path = os.path.join(subfolder_path, "text.txt")
            
            # Check if `text.txt` exists in the subfolder
            if not os.path.isfile(text_file_path):
                # If `text.txt` is missing, check for any .docx file in the subfolder
                docx_files = [f for f in os.listdir(subfolder_path) if f.endswith(".docx")]
                if docx_files:
                    docx_file_path = os.path.join(subfolder_path, docx_files[0])
                    # Extract content from the .docx file
                    doc = Document(docx_file_path)
                    text_content = "\n".join([paragraph.text for paragraph in doc.paragraphs])

                    # Save the extracted content to `text.txt`
                    with open(text_file_path, "w", encoding="utf-8") as text_file:
                        text_file.write(text_content)

            # Recheck if `text.txt` now exists
            if os.path.isfile(text_file_path):
                with open(text_file_path, "r", encoding="utf-8") as file:
                    text = file.read()

                with tracing_v2_enabled():
                    # Call the LangChain chain with the input dictionary
                    try:
                        result = run_chain(chain, text)
                    except:
                        reuslt = ""

                # Save the result to a new file in the same subfolder
                result_file_path = os.path.join(subfolder_path, f"result_cot_{type}.txt")
                with open(result_file_path, "w", encoding="utf-8") as result_file:
                    result_file.write(result)

In [9]:
def delete_result_txt_files(root_folder_path):
    """
    Deletes every .txt file that starts with 'result_' in the subfolders of the root folder (depth 1).

    Args:
        root_folder_path (str): Path to the root folder.
    """
    for subfolder_name in os.listdir(root_folder_path):
        subfolder_path = os.path.join(root_folder_path, subfolder_name)
        
        # Ensure the current item is a subfolder
        if os.path.isdir(subfolder_path):
            for file_name in os.listdir(subfolder_path):
                if file_name.startswith("result_") and file_name.endswith(".txt"):
                    file_path = os.path.join(subfolder_path, file_name)
                    os.remove(file_path)

In [10]:
#delete_result_txt_files(ROOT_FOLDER)

# COT Open-AI

In [10]:
from dotenv import load_dotenv
assert load_dotenv()

In [10]:
MODEL_OPEN_AI = ["o3-mini","gpt-4o-mini", "gpt-4o", "gpt-4-turbo", "gpt-3.5-turbo"]

In [11]:
from langchain_openai import ChatOpenAI
from langchain_core.output_parsers import StrOutputParser
from langchain.agents.chat.output_parser import ChatOutputParser
from IPython.display import display_markdown

In [12]:
def open_ai_zero(model):
    model_ = ChatOpenAI(model=model)
    chain = build_chian_from_llms(model_)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [13]:
def open_ai_make_example(model):
    model = ChatOpenAI(model=model)
    chain = build_chian_from_llms(model)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [14]:
display_markdown(open_ai_make_example(MODEL_OPEN_AI[0]), raw=True)

@startuml
class Customer {}
class Broker {}
class InsurancePolicy {}
class Offer {}
class Contract {}
class Invoice {}
class ClaimCase {}
class Estimator {}
class Report {}
class CompensationDecision {}
class Payment {}

Customer "1..1" -- "0..*" Broker
Customer "1..*" -- "0..*" InsurancePolicy
Customer "0..*" -- "1..1" Offer
Customer "0..*" -- "1..1" Contract
Customer "0..*" -- "1..1" Invoice
Customer "0..*" -- "1..1" ClaimCase
ClaimCase "1..*" -- "0..*" Estimator
Estimator "0..*" -- "1..1" Report
ClaimCase "1..*" -- "1..1" Report
ClaimCase "0..1" -- "1..1" CompensationDecision
Customer "0..*" -- "1..1" Payment
@enduml

In [15]:
for model in MODEL_OPEN_AI[0:1]:
    print(f"COT with {model}")
    open_ai_zero(model)

COT with o3-mini


100%|███████████████████████████████████████████| 48/48 [29:02<00:00, 36.30s/it]


# COT Open LLM

In [24]:
import torch, gc
gc.collect()
torch.mps.empty_cache()

## Deepseek

In [15]:
from langchain_deepseek import ChatDeepSeek

In [16]:
MODEL_DEEPSEEK = ["deepseek-chat"] #, "deepseek-reasoner"]

In [20]:
def deepseek_make_example(model):
    model = llm = ChatDeepSeek(
            model=model,
            temperature=0,
            max_tokens=None,
            timeout=None,
            max_retries=2,
            # api_key="...",
            # other params...
        )
    chain = build_chian_from_llms(model)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [18]:
def deepseek_cot(model):
    model_ = ChatDeepSeek(model=model)
    chain = build_chian_from_llms(model_)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [21]:
%%time
display_markdown(deepseek_make_example(MODEL_DEEPSEEK[0]), raw=True)

```plantuml
@startuml
class InsuranceCompany{}
class Customer{}
class Broker{}
class InsurancePolicy{}
class Contract{}
class Claim{}
class ClaimCase{}
class Estimator{}
class CompensationDecision{}
class Invoice{}

InsuranceCompany "1..1" -- "0..*" Customer
Customer "1..1" -- "1..1" Broker
Customer "1..*" -- "0..*" InsurancePolicy
Broker "1..1" -- "0..*" InsurancePolicy
InsurancePolicy "1..1" -- "1..1" Contract
Customer "1..*" -- "0..*" Claim
Claim "1..1" -- "1..*" ClaimCase
ClaimCase "1..1" -- "1..*" Estimator
ClaimCase "1..1" -- "1..1" CompensationDecision
Contract "1..1" -- "1..*" Invoice
@enduml
```

CPU times: user 254 ms, sys: 48 ms, total: 302 ms
Wall time: 54.6 s


In [22]:
for model in MODEL_DEEPSEEK:
    print(f"COT with {model}")
    deepseek_cot(model)

CoT shot with deepseek-chat


100%|███████████████████████████████████████████| 48/48 [31:46<00:00, 39.71s/it]


## Anthropic

In [24]:
from langchain_anthropic import ChatAnthropic

In [25]:
MODEL_ANTHROPIC = ["claude-3-7-sonnet-20250219"]

In [26]:
def anthropic_make_example(model):
    model = ChatAnthropic(model=model,temperature=0,
    max_tokens=1024,
    timeout=None,
    max_retries=2,)
    chain = build_chian_from_llms(model)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res

In [27]:
def anthropic_cot(model):
    model_ = ChatAnthropic(model=model)
    chain = build_chian_from_llms(model_)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [28]:
display_markdown(anthropic_make_example(MODEL_ANTHROPIC[0]), raw=True)

@startuml
class Customer{}
class Broker{}
class InsurancePolicy{}
class Contract{}
class Offer{}
class Invoice{}
class Claim{}
class ClaimCase{}
class Estimator{}
class Report{}
class CompensationDecision{}
class Payment{}

Customer "1..1" -- "1..*" Broker
Customer "1..*" -- "0..*" InsurancePolicy
Customer "1..*" -- "1..*" Contract
Customer "1..*" -- "0..*" Offer
Customer "1..*" -- "0..*" Invoice
Customer "1..*" -- "0..*" Claim
Customer "1..*" -- "0..*" Payment
Broker "1..*" -- "0..*" Contract
Broker "1..*" -- "0..*" Offer
InsurancePolicy "1..1" -- "1..1" Contract
InsurancePolicy "1..1" -- "0..*" Invoice
Contract "1..1" -- "0..1" Offer
Claim "1..1" -- "1..*" ClaimCase
ClaimCase "1..*" -- "1..*" Estimator
ClaimCase "1..1" -- "0..1" CompensationDecision
Estimator "1..*" -- "1..*" Report
CompensationDecision "1..1" -- "0..1" Payment
@enduml

In [30]:
for model in MODEL_ANTHROPIC:
    print(f"COT with {model}")
    anthropic_cot(model)

COT shot with claude-3-7-sonnet-20250219


100%|███████████████████████████████████████████| 48/48 [04:52<00:00,  6.09s/it]


## Ollama

In [25]:
from langchain_ollama import ChatOllama

In [26]:
MODEL_OLLAMA = [] #["llama3.2:3b-text-fp16"]

In [27]:
def ollama_zero(model):
    model_ = ChatOllama(
        model=model,
        temperature=0,
    )
    chain = build_chian_from_llms(model_)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model)

In [28]:
def ollama_make_example(model):
    model = ChatOllama(
        model=model,
        temperature=0,
    )
    chain = build_chian_from_llms(model)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res
    

In [29]:
#display_markdown(ollama_make_example(MODEL_OLLAMA[0]), raw=True)

In [30]:
for model in MODEL_OLLAMA:
    print(f"COT with {model}")
    ollama_zero(model)
    gc.collect()
    torch.mps.empty_cache()

## Huggingface

In [31]:
from langchain_huggingface import HuggingFacePipeline, ChatHuggingFace, HuggingFaceEndpoint
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

  Referenced from: <9DBE5D5C-AC87-30CA-96DA-F5BC116EDA2B> /Users/marcocalamo/anaconda3/lib/python3.11/site-packages/torchvision/image.so
  warn(


In [32]:
MODEL_HUGGINGFACE = [] #["google/gemma-2-2b-it"] #["Qwen/Qwen2.5-3B-Instruct", "microsoft/Phi-3-mini-4k-instruct", "google/gemma-2-27b-it"]

In [33]:
def huggingface_cot(model):
    model_id = model
    
    llm = HuggingFaceEndpoint(
        repo_id=model_id,
        task="text-generation",
        max_new_tokens=2048,
        do_sample=False,
        repetition_penalty=1.03,
        temperature=0.01,
)

    chat = ChatHuggingFace(llm=llm, verbose=True)
    
    chain = build_chian_from_llms(chat)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model_id.replace("/","_"))

In [34]:
def huggingface_make_example(model):

    model_id = model
    
    

    llm = HuggingFacePipeline.from_model_id(
        model_id=model_id,
        task="text-generation",
        pipeline_kwargs={"temperature": 0.1, "max_new_tokens": 1024}
    )

    chat = ChatHuggingFace(llm=llm, verbose=True)
    
    chain = build_chian_from_llms(chat)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
        As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.
        
        In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 
        
        """})
    return res


In [35]:
#display_markdown(huggingface_make_example(MODEL_HUGGINGFACE[0]), raw=True)

In [36]:
for model in MODEL_HUGGINGFACE:
    print(f"COT with {model}")
    huggingface_zero(model)
    gc.collect()
    torch.mps.empty_cache()

## Mlx-LLM

In [37]:
from langchain_community.llms import MLXPipeline
from langchain_community.chat_models import ChatMLX

In [38]:
MODEL_MLX = ["mlx-community/phi-4-8bit",
             "mlx-community/Falcon3-10B-Instruct-8bit", 
             "mlx-community/Qwen2.5-14B-Instruct-4bit",
             "mlx-community/Mistral-7B-Instruct-v0.3-4bit",
             "mlx-community/DeepSeek-R1-Distill-Qwen-7B-8bit",
             "mlx-community/Llama-3.2-3B-Instruct",
             "mlx-community/gemma-2-9b-8bit",
             #"mlx-community/gemma-2-27b-it-4bit",
            # "mlx-community/Mamba-Codestral-7B-v0.1-8bit",
            #"mlx-community/CodeLlama-13b-Instruct-hf-4bit-MLX"
            ]

In [39]:
def mlx_zero(model):
    llm = MLXPipeline.from_model_id(
        model_id=model,
        pipeline_kwargs={"max_tokens": 30_000, "temp": 0.1},
    )
    chat = ChatMLX(llm=llm)
    chain = build_chian_from_llms(chat)
    process_subfolders_with_chain(ROOT_FOLDER, chain, type=model.replace("/","_"))

In [40]:
def mlx_make_example(model):
    llm = MLXPipeline.from_model_id(
        model_id=model,
        pipeline_kwargs={"max_tokens": 3000, "temp": 0.7},
    )
    chat = ChatMLX(llm=llm)
    chain = build_chian_from_llms(chat)
    res = chain.invoke({"text": """Alpha Insurance is an insurance company that provides its clients with various types of insurance policies. 
As soon as a customer addresses Alpha Insurance, a broker is assigned to follow the customer’s file. The broker is registered in the system, so that when a customer calls, based on the contract, the help desk can immediately trace who is the customer's first account manager. After the broker is assigned to the customer, the latter indicates which type(s) of insurance policy they would like to sign for, so the broker could, depending on the case, either assess the customer’s profile on the spot or send the customer’s file for analysis to the head office. After the customer’s profile has been assessed and the customer has been deemed trustworthy , a preliminary contract/offer on an insurance product is made to the customer either in person or by email. (Such offers can also be extended to already existing customers.) If the customer agrees to the offer, the contract is signed by both parties. After the signing of the contract, the client enjoys the coverage and is invoiced (monthly or yearly – depending on the choice made in the contract) according to the price of the insurance product they bought.

In case the insured event happens, a customer should send a claim for compensation. Then the company opens one or several claim cases (e.g. in case of an accident, often material damage & physical damage are handled separately). Once the case file is complete, it is sent for assessment by different estimators based on their area of expertise. According to the reports issued by the estimators, it is decided whether the claim case is approved. In case of approval the compensation decision is registered that stipulates which costs are eligible for (partial) refund.  For the supplied documents, the sum of compensation is calculated and the compensation is paid to the customer’s account.  The estimators’ reports must be stored in the database for at least one year after the payment of compensation for legal purposes. 

"""})
    return res


In [41]:
display_markdown(mlx_make_example(MODEL_MLX[2]), raw=True)

Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

@startuml
class InsurancePolicy{}
class Customer{}
class Broker{}
class Contract{}
class Claim{}
class Estimator{}
class Compensation{}

Customer "0..1" -- "1..*" Broker
Customer "0..*" -- "1..*" Contract
Customer "0..*" -- "1..*" Claim
Broker "0..*" -- "0..*" Contract
Estimator "0..*" -- "0..*" Claim
Claim "1..1" -- "0..*" Compensation
@enduml

In [42]:
os.environ["PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION"] = "python"

In [43]:
for model in MODEL_MLX:
    print(f"COT with {model}")
    mlx_zero(model)
    import gc, torch
    gc.collect()
    torch.mps.empty_cache()

COT with mlx-community/phi-4-8bit


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

100%|███████████████████████████████████████████| 44/44 [25:02<00:00, 34.16s/it]


COT with mlx-community/Falcon3-10B-Instruct-8bit


Fetching 8 files:   0%|          | 0/8 [00:00<?, ?it/s]

100%|███████████████████████████████████████████| 44/44 [16:38<00:00, 22.70s/it]


COT with mlx-community/Qwen2.5-14B-Instruct-4bit


Fetching 10 files:   0%|          | 0/10 [00:00<?, ?it/s]

100%|███████████████████████████████████████████| 44/44 [11:26<00:00, 15.61s/it]


COT with mlx-community/Mistral-7B-Instruct-v0.3-4bit


Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

100%|███████████████████████████████████████████| 44/44 [09:18<00:00, 12.70s/it]


COT with mlx-community/DeepSeek-R1-Distill-Qwen-7B-8bit


Fetching 7 files:   0%|          | 0/7 [00:00<?, ?it/s]

  2%|▉                                        | 1/44 [07:00<5:01:17, 420.40s/it]Failed to send compressed multipart ingest: Connection error caused failure to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. Please confirm your internet connection. ConnectionError(MaxRetryError('HTTPSConnectionPool(host=\'api.smith.langchain.com\', port=443): Max retries exceeded with url: /runs/multipart (Caused by NameResolutionError("<urllib3.connection.HTTPSConnection object at 0x3c30c8ad0>: Failed to resolve \'api.smith.langchain.com\' ([Errno 8] nodename nor servname provided, or not known)"))'))
Content-Length: 5354
API Key: lsv2_********************************************4b
  5%|█▊                                      | 2/44 [28:46<10:59:02, 941.49s/it]Failed to send compressed multipart ingest: Connection error caused failure to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. Please confirm your internet connection. ConnectionError(MaxRetryError('HTT

COT with mlx-community/Llama-3.2-3B-Instruct


100%|███████████████████████████████████████████| 44/44 [23:21<00:00, 31.84s/it]


### Clean Cache

In [44]:
import gc, torch
gc.collect()
torch.mps.empty_cache()