# An√°lisis de los diferentes modelos

## 1. Importaci√≥n de librer√≠as y modulos

In [1]:
#%pip install -r requirements.txt

In [2]:
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall
from langchain_community.document_loaders.pdf import PyPDFDirectoryLoader
from langchain_community.llms import Ollama
from langchain_chroma import Chroma
import nest_asyncio
from ragas.run_config import RunConfig
import tqdm
from module import *

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
embbedings = [
    "multilingual_large",
    "baai_large",
    "mxbai_large",
    "baai_small"
]

llm = [
    "llama3",
    "mistral",
    "phi3",
    "gemma"
]

emb_model = embbedings[0]
llm_model = llm[0]

## 2. Carga de datos 

In [4]:
documents_loader = PyPDFDirectoryLoader("./data")
documents = documents_loader.load()
print(f"Loaded {len(documents)} documents")

Loaded 132 documents


In [5]:
chunks = split_documents(documents, chunk_size=512)
print("Number of chunks: ", len(chunks))

Number of chunks:  616


## 3. Creaci√≥n de la base de datos vectorial

In [6]:
# Load the existing database.
db = Chroma(
    collection_name="test3",
    persist_directory="../database",
    embedding_function=get_embedding_function(emb_model)
)



In [7]:
# Calculate Page IDs.
chunks_with_ids = calculate_chunk_ids(chunks)

# Add or Update the documents.
existing_items = db.get(include=[])  # IDs are always included by default
existing_ids = set(existing_items["ids"])
print(f"Number of existing documents in DB: {len(existing_ids)}")

Number of existing documents in DB: 0


In [8]:
# Only add documents that don't exist in the DB.
new_chunks = []
for chunk in chunks_with_ids:
    if chunk.metadata["id"] not in existing_ids:
        new_chunks.append(chunk)

In [9]:
if len(new_chunks):
    print(f"üëâ Adding new documents: {len(new_chunks)}")
    with tqdm.tqdm(total=len(new_chunks)) as pbar:
        for chunk in new_chunks:
            db.add_documents([chunk], ids=[chunk.metadata["id"]])
            pbar.update(1)
    print("Documents added correctly ‚úÖ")
else:
    print("‚úÖ No new documents to add")

print(f"{len(documents)} documents added to the database correctly")

üëâ Adding new documents: 616


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 616/616 [01:34<00:00,  6.55it/s]

Documents added correctly ‚úÖ
132 documents added to the database correctly





## 4. Preguntas de inter√©s

In [10]:
questions_docs_cat = [
    "Quins s√≥n els principals medicaments que poden desencadenar o exacerbar la insufici√®ncia card√≠aca segons l'article?",
    "Quins mecanismes estan implicats en la toxicitat miocard√≠aca indu√Øda per certs f√†rmacs?",
    "Com es categoritzen els medicaments segons el risc de provocar insufici√®ncia card√≠aca en pacients amb aquesta patologia?",
    "Com actuen els inhibidors de l'enzim convertidor d'angiotensina (IECA) en el tractament de la insufici√®ncia card√≠aca?",
    "Per qu√® √©s important no interrompre el tractament farmacol√≤gic de la insufici√®ncia card√≠aca fins i tot si els s√≠mptomes milloren?",
    "Com es realitza el seguiment m√®dic d'un pacient amb insufici√®ncia card√≠aca en tractament farmacol√≤gic?"
]

questions_docs_en = [
    "What is the fundamental postulate of Einstein's theory of special relativity?",
    "Explain the mass-energy equivalence formula and its significance in special relativity.",
    "How does special relativity reconcile with the principle of the constancy of the speed of light in a vacuum?",
    "Explain the concept of the 'Imitation Game' (Turing Test) and its significance in evaluating machine intelligence.",
    "What role does the concept of discrete state machines play in Turing's argument about machine intelligence?",
    "Discuss the significance of the concept of 'universal machines' as introduced by Turing in the context of machine intelligence."
]

questions_docs_es = [
    "¬øQu√© papel juega la arquitectura von Neumann en la estructura funcional de un ordenador?",
    "Explica la funci√≥n de la Unidad de Control (UC) en un CPU y c√≥mo afecta el rendimiento del ordenador.",
    "¬øCu√°l es la f√≥rmula que define la ejecuci√≥n de un programa en un ordenador seg√∫n las instrucciones y datos de entrada?",
    "¬øQu√© es el algoritmo K-means y c√≥mo se utiliza en el contexto del an√°lisis de consumo energ√©tico?",
    "¬øC√≥mo se determina el n√∫mero √≥ptimo de cl√∫steres en un an√°lisis con K-means?",
    "¬øQu√© metodolog√≠a se utiliza para tratar los datos at√≠picos antes de aplicar el algoritmo K-means?"
]

questions = questions_docs_cat + questions_docs_en + questions_docs_es

In [11]:
contexts = []

for question in questions:
    # Get the top 5 most relevant documents
    results = db.similarity_search_with_score(question, k=3)

    # Make a list of the contexts
    question_contexts = []
    for doc, _score in results:
        question_contexts.append(doc.page_content)

    # Append the context sub-list to the list of contexts
    contexts.append(question_contexts)

Ahora creamos las respuestas esperadas para cada pregunta.

In [12]:
answers_docs_cat = [
    "Els principals medicaments que poden desencadenar o exacerbar la insufici√®ncia card√≠aca inclouen antiinflamatoris no estero√Ødals (AINE), certs anest√®sics, antiarr√≠tmics, i alguns antidiab√®tics, entre d'altres, segons els mecanismes de toxicitat miocard√≠aca o disfunci√≥ card√≠aca.",
    "Els mecanismes implicats en la toxicitat miocard√≠aca inclouen l'estr√®s oxidatiu, la inhibici√≥ de prostaglandines, la retenci√≥ de sodi i aigua, i la depressi√≥ de la funci√≥ mioc√†rdica, entre d'altres. Aquests mecanismes poden variar segons el f√†rmac i la seva acci√≥ espec√≠fica sobre el cor.",
    "Els f√†rmacs associats amb la prolongaci√≥ de l'interval QT i el risc de torsades de pointes inclouen amiodarona, citalopram, escitalopram, metadona, sotalol, entre d'altres. Aquests medicaments poden provocar ar√≠tmies greus, especialment en pacients amb insufici√®ncia card√≠aca.",
    "Els inhibidors de l'enzim convertidor d'angiotensina (IECA) actuen dilatant els vasos sanguinis, cosa que facilita el bombeig de sang per part del cor i redueix la pressi√≥ arterial, ajudant a millorar els s√≠mptomes de la insufici√®ncia card√≠aca.",
    "√âs important no interrompre el tractament perqu√® els medicaments no nom√©s milloren els s√≠mptomes, sin√≥ que tamb√© prevenen l'empitjorament de la insufici√®ncia card√≠aca. Encara que el pacient es trobi b√©, ha de continuar amb el tractament prescrit.",
    "El seguiment m√®dic inclou la monitoritzaci√≥ de la pressi√≥ arterial, el ritme card√≠ac, el pes del pacient, i la realitzaci√≥ d'anal√≠tiques de sang per controlar els nivells de potassi, sodi, i la funci√≥ renal. Tamb√© es poden fer electrocardiogrames per avaluar la funci√≥ card√≠aca."
]

answers_docs_en = [
    "The fundamental postulate of Einstein's theory of special relativity is that the laws of physics are the same in all inertial frames of reference, and that the speed of light in a vacuum is constant for all observers, regardless of the motion of the light source or the observer.",
    "The mass-energy equivalence formula is given by: E=mc^2, where E is the energy, m is the mass, and c is the speed of light. This formula signifies that mass and energy are interchangeable, and a small amount of mass can be converted into a large amount of energy.",
    "Special relativity reconciles with the principle of the constancy of the speed of light by postulating that the speed of light in a vacuum is the same for all observers, regardless of their motion relative to the light source. This leads to the need for a new understanding of space and time as being interwoven into a four-dimensional spacetime.",
    "The 'Imitation Game' involves a human interrogator communicating with both a human and a machine through written text, without knowing which is which. The interrogator's task is to determine which participant is the machine. If the machine can consistently convince the interrogator that it is human, it is considered to have passed the test, suggesting a form of intelligence.",
    "Discrete state machines are central to Turing's argument as they represent the basic structure of digital computers, which operate by transitioning between distinct states based on inputs. Turing argues that digital computers, as discrete state machines, can be programmed to simulate any other discrete state machine, including those that mimic human intelligence.",
    "The concept of 'universal machines' is significant because it implies that a single machine, with appropriate programming, can perform the tasks of any other machine. Turing suggests that this universality allows digital computers to potentially exhibit behaviors that we would classify as intelligent, supporting his thesis that machines can think."
]

answers_docs_es = [
    "La arquitectura von Neumann es clave en la estructura funcional de un ordenador porque establece que tanto los datos como las instrucciones del programa se almacenan en la memoria principal, permitiendo que el ordenador ejecute programas de manera secuencial y eficiente.",
    "La Unidad de Control (UC) en un CPU es responsable de descodificar y ejecutar las instrucciones almacenadas en la memoria principal, enviando se√±ales de control a las dem√°s unidades del ordenador. La frecuencia del reloj de la UC, medida en MHz o GHz, determina en parte la velocidad de funcionamiento del ordenador, afectando su rendimiento.",
    "La f√≥rmula que define la ejecuci√≥n de un programa en un ordenador es: Datos de salida=f(Datos de entrada,Instrucciones), esta f√≥rmula indica que los datos de salida de un programa dependen tanto de los datos de entrada como de las instrucciones del programa almacenado en el ordenador.",
    "El algoritmo K-means es un m√©todo de agrupamiento no jer√°rquico que particiona un conjunto de datos en un n√∫mero espec√≠fico de grupos (cl√∫steres) bas√°ndose en las caracter√≠sticas compartidas. En el contexto del an√°lisis de consumo energ√©tico, K-means se utiliza para identificar patrones de consumo similares entre diferentes clientes, permitiendo una mejor gesti√≥n y optimizaci√≥n de recursos energ√©ticos.",
    "El n√∫mero √≥ptimo de cl√∫steres en un an√°lisis con K-means se determina mediante la evaluaci√≥n de la precisi√≥n y la calidad de los cl√∫steres utilizando medidas proporcionadas por la teor√≠a de los conjuntos aproximados (RST), as√≠ como mediante la ejecuci√≥n repetida del algoritmo con diferentes particiones iniciales.",
    "La metodolog√≠a utilizada para tratar los datos at√≠picos antes de aplicar el algoritmo K-means incluye la detecci√≥n de at√≠picos utilizando un rango de tres desviaciones est√°ndar y la imputaci√≥n de valores ausentes o at√≠picos mediante el valor medio del resto de las lecturas v√°lidas, asegurando que los datos sean representativos y minimizando la distorsi√≥n en los cl√∫steres resultantes."
]

answers = answers_docs_cat + answers_docs_en + answers_docs_es

Finalmente creamos el DataFrame para evaluar los modelos.

In [13]:
data_samples = {
    'question': questions,
    'answer': answers,
    'contexts': contexts,
    'ground_truth': answers
}

dataset = Dataset.from_dict(data_samples)

## 5. Evaluaci√≥n de los modelos

In [14]:
embeddings = get_embedding_function(emb_model)
llm = Ollama(model=llm_model)

In [15]:
nest_asyncio.apply()

try:
    score = evaluate(
        dataset,
        metrics=[faithfulness, answer_relevancy, context_precision, context_recall],
        llm=llm,
        embeddings=embeddings,
        raise_exceptions=False,
        run_config=RunConfig(
            max_retries=30, # Default is 10
            max_wait=180, # Default is 60
            #max_workers=64 # Default is 16
        )
    )
except Exception as e:
    print(f"An error ocurred: {e}")

df_score = score.to_pandas()
print(df_score)

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 72/72 [07:17<00:00,  6.07s/it]


                                             question  \
0   Quins s√≥n els principals medicaments que poden...   
1   Quins mecanismes estan implicats en la toxicit...   
2   Com es categoritzen els medicaments segons el ...   
3   Com actuen els inhibidors de l'enzim convertid...   
4   Per qu√® √©s important no interrompre el tractam...   
5   Com es realitza el seguiment m√®dic d'un pacien...   
6   What is the fundamental postulate of Einstein'...   
7   Explain the mass-energy equivalence formula an...   
8   How does special relativity reconcile with the...   
9   Explain the concept of the 'Imitation Game' (T...   
10  What role does the concept of discrete state m...   
11  Discuss the significance of the concept of 'un...   
12  ¬øQu√© papel juega la arquitectura von Neumann e...   
13  Explica la funci√≥n de la Unidad de Control (UC...   
14  ¬øCu√°l es la f√≥rmula que define la ejecuci√≥n de...   
15  ¬øQu√© es el algoritmo K-means y c√≥mo se utiliza...   
16  ¬øC√≥mo se de

In [16]:
df_score.head(10)

Unnamed: 0,question,answer,contexts,ground_truth,faithfulness,answer_relevancy,context_precision,context_recall
0,Quins s√≥n els principals medicaments que poden...,Els principals medicaments que poden desencade...,[Aquest article explica les pautes que se segu...,Els principals medicaments que poden desencade...,1.0,0.638305,1.0,0.5
1,Quins mecanismes estan implicats en la toxicit...,Els mecanismes implicats en la toxicitat mioca...,[F√†rmacs que poden donar lloc o \nexacerbar la...,Els mecanismes implicats en la toxicitat mioca...,0.4,0.700025,1.0,1.0
2,Com es categoritzen els medicaments segons el ...,Els f√†rmacs associats amb la prolongaci√≥ de l'...,[card√≠aca o un factor d‚Äôempitjorament. En aque...,Els f√†rmacs associats amb la prolongaci√≥ de l'...,0.0,0.579291,1.0,1.0
3,Com actuen els inhibidors de l'enzim convertid...,Els inhibidors de l'enzim convertidor d'angiot...,[Canal Salut. Medicaments i Farm√†cia Tra...,Els inhibidors de l'enzim convertidor d'angiot...,0.5,0.880366,1.0,1.0
4,Per qu√® √©s important no interrompre el tractam...,√âs important no interrompre el tractament perq...,[Data d‚Äôactualitzaci√≥: 23.03.2017 1/6 \nht...,√âs important no interrompre el tractament perq...,1.0,0.573702,1.0,1.0
5,Com es realitza el seguiment m√®dic d'un pacien...,El seguiment m√®dic inclou la monitoritzaci√≥ de...,[card√≠aca o un factor d‚Äôempitjorament. En aque...,El seguiment m√®dic inclou la monitoritzaci√≥ de...,0.2,0.581315,1.0,0.5
6,What is the fundamental postulate of Einstein'...,The fundamental postulate of Einstein's theory...,[special theory of relativity to distinguish i...,The fundamental postulate of Einstein's theory...,1.0,1.0,0.833333,1.0
7,Explain the mass-energy equivalence formula an...,The mass-energy equivalence formula is given b...,"[always small in comparison with the second, w...",The mass-energy equivalence formula is given b...,0.5,0.861328,0.583333,1.0
8,How does special relativity reconcile with the...,Special relativity reconciles with the princip...,[propagation of light must necessarily hold wi...,Special relativity reconciles with the princip...,0.75,0.684563,1.0,1.0
9,Explain the concept of the 'Imitation Game' (T...,The 'Imitation Game' involves a human interrog...,"[433 VOL. LIX. NO. 236.] [October, 1950 \nM ...",The 'Imitation Game' involves a human interrog...,1.0,0.80038,1.0,1.0


In [17]:
df_score.to_csv(f"./test3_{llm_model}_{emb_model}.csv", index=False)