# RAGondin
Proto NB to make a RAG working out.

In [19]:
import torch

print("CUDA Available:", torch.cuda.is_available())
if torch.cuda.is_available():
    print("CUDA Version:", torch.version.cuda)
    print("Number of GPUs:", torch.cuda.device_count())
    print("GPU Name:", torch.cuda.get_device_name(0))


CUDA Available: True
CUDA Version: 12.1
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 4090


In [None]:
import sys
!{sys.executable} -m pip install  numpy pymilvus accelerate bitsandbytes
!{sys.executable} -m pip install  langchain langchain_experimental unstructured pillow_heif unstructured_inference pytesseract unstructured_pytesseract pikepdf timm
!{sys.executable} -m pip install  pypdf pdf2image pdfminer pdfminer-six pypdfium2 pdfplumber
!{sys.executable} -m pip install  rapidocr-onnxruntime
!{sys.executable} -m pip install  torch transformers accelerate bitsandbytes transformers sentence-transformers qdrant-client
!{sys.executable} -m pip install  ragatouille 

In [None]:
import sys
!{sys.executable} -m pip install langchain_openai

## Data Processing

### Uniformising Data

In [1]:
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders.pdf import PDFMinerLoader
from langchain.document_loaders.xml import UnstructuredXMLLoader
from langchain.document_loaders.csv_loader import CSVLoader
from langchain_community.document_loaders.text import TextLoader
from langchain_community.document_loaders import UnstructuredHTMLLoader
from langchain_community.document_loaders.merge import MergedDataLoader



# Define a dictionary to map file extensions to their respective loaders
loaders = {
    '.pdf': PDFMinerLoader,
    '.xml': UnstructuredXMLLoader,
    '.csv': CSVLoader,
    '.txt': TextLoader,
    '.html': UnstructuredHTMLLoader,
}

# Define a function to create a DirectoryLoader for a specific file type
def create_directory_loader(file_type, directory_path):
    return DirectoryLoader(
        path=directory_path,
        glob=f"**/*{file_type}",
        loader_cls=loaders[file_type],
    )

directory_path = "test_data/"
# Create DirectoryLoader instances for each file type
pdf_loader = create_directory_loader('.pdf', directory_path)
xml_loader = create_directory_loader('.xml', directory_path)
csv_loader = create_directory_loader('.csv', directory_path)
txt_loader = create_directory_loader('.txt', directory_path)
html_loader = create_directory_loader('.html', directory_path)

#loader_all = MergedDataLoader(loaders=[pdf_loader, xml_loader,csv_loader,txt_loader,html_loader])

# Load the files
pdf_documents = pdf_loader.load()
xml_documents = xml_loader.load()
csv_documents = csv_loader.load()
txt_documents = txt_loader.load()
html_documents = html_loader.load()

docs = pdf_documents + xml_documents + csv_documents + txt_documents + html_documents
#docs2 = loader_all.load()

In [4]:
type(docs[0])

langchain_core.documents.base.Document

### Chunking

In [2]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=200, add_start_index=True,
    separators=["\n\n", "\n", ".", " ", ""],)

chunked_docs = splitter.split_documents(docs)

In [8]:
type(chunked_docs[0])

langchain_core.documents.base.Document

In [3]:
print(chunked_docs[10].page_content)

Valeur en Points

Chaque  figurine  possède  une  valeur  en  points, 
qui est citée dans son profil. Cette valeur permet de dé-
terminer l’impact de la figurine à la bataille. Un simple 
milicien Hobbit coûte ainsi 4pts, tandis que Sauron en 
vaut 400 ! Certaines figurines valent beaucoup de points, 
car  elles  sont  capables  d’éliminer  des  douzaines  d’ad-
versaires  en  quelques  tours,  d’autres  sont  plus  utiles 
pour renforcer leurs alliés, etc. 
En cumulant les coûts en points de vos figurines, vous obtenez la valeur totale de votre armée. Cela vous 
permet de disputer des parties équitables : il faut beaucoup de Hobbits pour espérer mettre à bas le Seigneur 
des Ténèbres !

Taille de la Partie


### Embedings

In [4]:
from langchain_community.vectorstores import Qdrant
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.docstore.document import Document as LangchainDocument


EMBEDDING_MODEL_NAME = "thenlper/gte-small"

embedding_model = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    multi_process=True,
    model_kwargs={"device": "cuda"},
    encode_kwargs={"normalize_embeddings": True},  # set True for cosine similarity
)

### Vector Data Base

In [13]:
from qdrant_client import QdrantClient

#host = "10.0.0.177"
#port = "6333"
#client = QdrantClient(url=f"http://{host}:{port}", prefer_grpc=False)
#KNOWLEDGE_VECTOR_DATABASE = Qdrant(client=client, collection_name="texts", embeddings=embedding_model)
#KNOWLEDGE_VECTOR_DATABASE = Qdrant.from_documents(client= client,chunked_docs,embedding_model)
#KNOWLEDGE_VECTOR_DATABASE = Qdrant(client=client, collection_name="texts", embeddings=embedding_model)
#KNOWLEDGE_VECTOR_DATABASE = Qdrant.from_documents(chunked_docs, embedding=embedding_model)
#KNOWLEDGE_VECTOR_DATABASE = Qdrant.from_documents(
    #chunked_docs,
    #embedding_model,
    #location=":memory:",  # Local mode with in-memory storage only
    #collection_name="my_documents",
#)
qdrant = Qdrant.from_documents(
    chunked_docs,
    embedding_model,
    url="http://10.3.0.40:6333",
    collection_name="my_documents",
)


In [12]:
from qdrant_client import QdrantClient

def check_qdrant_data(host, port, collection_name):
    client = QdrantClient(host=host, port=port)
    
    # Initialize the vector with 384 dimensions
    query_vector = [0.0] * 100  # Adjust the dimension from 512 to 384

# Assuming this is the correct place to modify and use the vector
    search_results = client.search(
    collection_name=collection_name,
    query_vector=query_vector,  # Make sure this vector is correctly formatted
    limit=10  # Retrieve top 10 similar items
)


    
   # Assuming search_results contains a 'hits' key with the relevant search data
    if 'hits' in search_results:
        for result in search_results['hits']:
            print(result)  # Adjust this print statement according to the structure of 'result'
    else:
        print("No hits found in the search results.")


# Usage
check_qdrant_data('10.3.0.40', 6333, 'my_documents')


UnexpectedResponse: Unexpected Response: 400 (Bad Request)
Raw response content:
b'{"status":{"error":"Wrong input: Vector dimension error: expected dim: 384, got 100"},"time":0.00029}'

AssertionError: Unknown arguments: ['top']

## Bloc LLM
Here we work on the LLM model

### LLM Model

In [14]:
from transformers import pipeline
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
from langchain_community.llms import HuggingFacePipeline


READER_MODEL_NAME = "openchat/openchat-3.5-0106"

bnb_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config,low_cpu_mem_usage=True)
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)

pipe = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    do_sample=True,
    temperature=0.1,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=10000,
)

hf = HuggingFacePipeline(pipeline=pipe)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### Pipeline for the RAG

#### Prompt config

In [15]:
from langchain.prompts import PromptTemplate
prompt_template = """
<|system|>
Answer the question in french only using the following french context to help:

{context}

</s>
<|user|>
{question}
</s>
<|assistant|>

 """

RAG_PROMPT_TEMPLATE = PromptTemplate(
    input_variables=["context", "question"],
    template=prompt_template,
)


### Reranking Option

In [16]:
RAG_PROMPT_TEMPLATE.format(question="How to create a pipeline object?", context='Lol')

'\n<|system|>\nAnswer the question in french only using the following french context to help:\n\nLol\n\n</s>\n<|user|>\nHow to create a pipeline object?\n</s>\n<|assistant|>\n\n '

### Assembling

In [17]:
from transformers import Pipeline
from typing import Optional, Tuple, List
from langchain.chains import SimpleSequentialChain, LLMChain, StuffDocumentsChain
from langchain.retrievers import ContextualCompressionRetriever
from langchain_core.output_parsers import StrOutputParser
from ragatouille import RAGPretrainedModel

RAG = RAGPretrainedModel.from_pretrained("colbert-ir/colbertv2.0")


def answer_with_rag(
    question: str,
    llm: Pipeline,
    knowledge_index: Qdrant,
    num_retrieved_docs: int = 10,
    num_returned_docs: int = 5,
    rerank: bool = True,
) -> Tuple[str, List[LangchainDocument]]:
    # Gather documents with retriever
    print("=> Retrieving documents...")
    
    retriever = knowledge_index.as_retriever(search_kwargs={"k": num_retrieved_docs})

    
    if rerank:
        compression_retriever = ContextualCompressionRetriever(base_compressor=RAG.as_langchain_document_compressor(),
                                                           base_retriever=retriever)
        relevant_docs = compression_retriever.invoke(question)
    else:
        relevant_docs = retriever.invoke(question)

    #Build the chain

    relevant_docs = relevant_docs[:num_returned_docs]

    relevant_docs_txt = [doc.page_content for doc in relevant_docs]

    #Stuff Chain
    document_variable_name = "context"
    
    
    document_prompt = PromptTemplate(
        input_variables=["page_content"],
        template="{page_content}"
    )
    
    template = PromptTemplate(
        input_variables=["context", "question"],
        template=prompt_template,
    )
    
    chain = LLMChain(llm=hf, prompt=RAG_PROMPT_TEMPLATE, output_parser=StrOutputParser(),verbose= True)
    chainfinal = StuffDocumentsChain(
        llm_chain=chain,
        document_prompt=document_prompt,
        document_variable_name=document_variable_name,
        verbose =True
    )

    #Stuff Chain
    



    # Redact an answer
    print("=> Generating answer...")
    
    
    answer = chainfinal.run(question = question, input_documents = relevant_docs)

    return answer, relevant_docs_txt

def answer_without_rag(
    question: str,
    llm: Pipeline,
) -> Tuple[str, List[LangchainDocument]]:
    answer = llm(question)[0]["generated_text"]

    return answer,

artifact.metadata:   0%|          | 0.00/1.63k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/405 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

### Testing

In [18]:
question = "Qu'elle sont les 5 phases du jeu MESBG?"
answer, relevant_docs = answer_with_rag(question, hf, qdrant,num_retrieved_docs = 10 )
print(answer)

=> Retrieving documents...


100%|██████████| 1/1 [00:00<00:00,  4.22it/s]


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

Le Jeu

Que ce soit en jouant des scénarios historiques ou bien compétitifs, la manière de jouer est la même.

La partie est séparée est en une succession de tours. Chaque tour est composée de 5 phases :

Initiative : c’est la phase qui détermine quel joueur va jouer en premier pour le tour.

Mouvement : c’est la phase la plus importante où quasiment tout ce décide. Les figurines auront le droit de se déplacer, faire de la magie, de charger, etc. On commencera toujours par le joueur qui a l’initiative. Une fois qu’il a terminé, l’autre joueur pourra bouger ses figurines.

Tir : c’est la phase où les figurines pourront utiliser leurs armes de tir (arc, arbalète, sarbacane ou engin de siège,…). On commence par le joueur qui a l’initiative. E

  warn_deprecated(



[1m> Finished chain.[0m

[1m> Finished chain.[0m

Les 5 phases du jeu MESBG sont :

1. Phase d'Initiative : les joueurs lancent chacun un dé pour voir qui a l'initiative.
2. Phase de Mouvement : chaque joueur bouge leurs figurines. Le joueur ayant l'initiative bouge ses figurines en premier.
3. Phase de Tir : chaque joueur peut faire tirer ses figurines qui ont une arme de tir en commençant par le joueur qui a l'initiative.
4. Phase de Combat : toutes les figurines qui sont engagées en duel doivent combattre. Le joueur ayant l'initiative choisit l'ordre de résolution des combats.
5. Phase de Fin : on résout tous les effets qui se terminent jusqu'à la fin du tour comme Paralysie, etc.) et on enlève tous les tokens et les dés pour commencer un nouveau tour.


In [13]:
relevant_docs

['Le Jeu\n\nQue ce soit en jouant des scénarios historiques ou bien compétitifs, la manière de jouer est la même.\n\nLa partie est séparée est en une succession de tours. Chaque tour est composée de 5 phases :\n\nInitiative : c’est la phase qui détermine quel joueur va jouer en premier pour le tour.\n\nMouvement : c’est la phase la plus importante où quasiment tout ce décide. Les figurines auront le droit de se déplacer, faire de la magie, de charger, etc. On commencera toujours par le joueur qui a l’initiative. Une fois qu’il a terminé, l’autre joueur pourra bouger ses figurines.\n\nTir : c’est la phase où les figurines pourront utiliser leurs armes de tir (arc, arbalète, sarbacane ou engin de siège,…). On commence par le joueur qui a l’initiative. Ensuite son adversaire pourra tirer.\n\nCombat : c’est la phase où les figurines engagées aux corps à corps peuvent se livrer un duel. L’ordre des combats est choisi par le joueur ayant l’initiative.',
 'Subtilités du jeu\n\nNotre SBG, bien

In [11]:
relevant_docs

['Le Jeu\n\nQue ce soit en jouant des scénarios historiques ou bien compétitifs, la manière de jouer est la même.\n\nLa partie est séparée est en une succession de tours. Chaque tour est composée de 5 phases :\n\nInitiative : c’est la phase qui détermine quel joueur va jouer en premier pour le tour.\n\nMouvement : c’est la phase la plus importante où quasiment tout ce décide. Les figurines auront le droit de se déplacer, faire de la magie, de charger, etc. On commencera toujours par le joueur qui a l’initiative. Une fois qu’il a terminé, l’autre joueur pourra bouger ses figurines.\n\nTir : c’est la phase où les figurines pourront utiliser leurs armes de tir (arc, arbalète, sarbacane ou engin de siège,…). On commence par le joueur qui a l’initiative. Ensuite son adversaire pourra tirer.\n\nCombat : c’est la phase où les figurines engagées aux corps à corps peuvent se livrer un duel. L’ordre des combats est choisi par le joueur ayant l’initiative.',
 '1- Phase d’Initiative : les joueurs 

In [10]:
question2 = "Comment jouer un mumakil?"
answer, relevant_docs = answer_with_rag(question2, hf, KNOWLEDGE_VECTOR_DATABASE,num_retrieved_docs = 15 )
print(answer)

=> Retrieving documents...


100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 44.38it/s]


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

R : A moins de 3’’ de l’un d’entre eux.

Q : Est-ce que Smaug est une cible de Champ de Bataille ou une 
Cible de Siège ?

Q : Est-ce que les Capitaines Orques, les Guerriers Orques et les 
Chevaucheurs  de  Warg  conservent  la Haine  des  Elfes  conférée 
par Razgûsh si celui-ci meurt ?

R : Une cible de Champ de Bataille.

R : Oui.

Q : Est-ce que Smaug peut être Immobilisé ?

R : Oui.

Q  :  Si  la  Légion  des  Assaillants  de  la  Lothlórien  joue  le  scénario 
Affrontement au Clair de Lune, reçoit-elle un bonus cumulé de 
+2 pour blesser au Tir ?

R : Non. Le bonus de +1 ne s’applique qu’une fois.

LÉGIONS LÉGENDAIRES

Q  :  Si  le  Chef  de  Guerre  des  Mûmakil  déclare  un  Défi  Héroïque 
alors  qu’il  est  monté  sur  son  Mûm

# Evaluation

In [9]:
import json
from huggingface_hub import InferenceClient


repo_id = "mistralai/Mixtral-8x7B-Instruct-v0.1"

llm_client = InferenceClient(
    model=repo_id,
    timeout=120,
)


def call_llm(inference_client: InferenceClient, prompt: str):
    response = inference_client.post(
        json={
            "inputs": prompt,
            "parameters": {"max_new_tokens": 1000},
            "task": "text-generation",
        },
    )
    return json.loads(response.decode())[0]["generated_text"]


call_llm(llm_client, "This is a test context")

'This is a test context for the `@mui/material` library.\n\n## Installation\n\n```sh\nnpm install @mui/material\n```\n\n## Usage\n\n```jsx\nimport React from \'react\';\nimport { Button } from \'@mui/material\';\n\nfunction App() {\n  return (\n    <div className="App">\n      <Button variant="contained" color="primary">\n        Hello World\n      </Button>\n    </div>\n  );\n}\n\nexport default App;\n```\n\n## Documentation\n\n- [Material-UI](https://material-ui.com/)\n- [Material Design](https://material.io/)'

In [38]:
QA_generation_prompt = """
Your task is to write a factoid question in french and an answer in french given a context in french.
Your factoid question should be answerable with a specific, concise piece of factual information from the context.
Your factoid question should be formulated in the same style as questions users could ask in a search engine.
This means that your factoid question MUST NOT mention something like "according to the passage" or "context".

Provide your answer as follows:

Output:::
Factoid question: (your factoid question)
Answer: (your answer to the factoid question)

Now here is the context.

Context: {context}\n
Output:::"""

In [39]:
chunked_docs[0].page_content

'Compendium des \nParties Compétitives\n\nRègles – Alliances\nScénarios – Références Rapides\n\n\x0c\x0cSommaire'

In [40]:
docs_processed = chunked_docs

In [41]:
import random
from tqdm import tqdm

N_GENERATIONS = 10  # We intentionally generate only 10 QA couples here for cost and time considerations

print(f"Generating {N_GENERATIONS} QA couples...")

outputs = []

sample_docs = random.sample(docs_processed, N_GENERATIONS)

for sampled_context in tqdm(sample_docs):
    # Generate QA couple
    output_QA_couple = call_llm(llm_client, QA_generation_prompt.format(context=sampled_context.page_content))
    try:
        question = output_QA_couple.split("Factoid question: ")[-1].split("Answer: ")[0]
        answer = output_QA_couple.split("Answer: ")[-1]
        assert len(answer) < 300, "Answer is too long"
        outputs.append(
            {
                "context": sampled_context.page_content,
                "question": question,
                "answer": answer,
                "source_doc": sampled_context.metadata["source"],
            }
        )
    except:
        continue

Generating 10 QA couples...


100%|███████████████████████████████████████████| 10/10 [00:30<00:00,  3.03s/it]


In [47]:
question_groundedness_critique_prompt = """
You will be given a context and a question both in french.
Your task is to provide a 'total rating' scoring how well one can answer the given question unambiguously with the given context.
Give your answer on a scale of 1 to 5, where 1 means that the question is not answerable at all given the context, and 5 means that the question is clearly and unambiguously answerable with the context.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here are the question and context.

Question: {question}\n
Context: {context}\n
Answer::: """

question_relevance_critique_prompt = """
You will be given a question both in french.
Your task is to provide a 'total rating' representing how useful this question can be to machine learning developers building NLP applications with the Hugging Face ecosystem.
Give your answer on a scale of 1 to 5, where 1 means that the question is not useful at all, and 5 means that the question is extremely useful.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here is the question.

Question: {question}\n
Answer::: """

question_standalone_critique_prompt = """
You will be given a question both in french
Your task is to provide a 'total rating' representing how context-independant this question is.
Give your answer on a scale of 1 to 5, where 1 means that the question depends on additional information to be understood, and 5 means that the question makes sense by itself.
For instance, if the question refers to a particular setting, like 'in the context' or 'in the document', the rating must be 1.
The questions can contain obscure technical nouns or acronyms like Gradio, Hub, Hugging Face or Space and still be a 5: it must simply be clear to an operator with access to documentation what the question is about.

For instance, "What is the name of the checkpoint from which the ViT model is imported?" should receive a 1, since there is an implicit mention of a context, thus the question is not independant from the context.

Provide your answer as follows:

Answer:::
Evaluation: (your rationale for the rating, as a text)
Total rating: (your rating, as a number between 1 and 5)

You MUST provide values for 'Evaluation:' and 'Total rating:' in your answer.

Now here is the question.

Question: {question}\n
Answer::: """

In [48]:
output_QA_couple

'\nYour task is to write a factoid question in french and an answer in french given a context in french.\nYour factoid question should be answerable with a specific, concise piece of factual information from the context.\nYour factoid question should be formulated in the same style as questions users could ask in a search engine.\nThis means that your factoid question MUST NOT mention something like "according to the passage" or "context".\n\nProvide your answer as follows:\n\nOutput:::\nFactoid question: (your factoid question)\nAnswer: (your answer to the factoid question)\n\nNow here is the context.\n\nContext: La variété des Héros dans ce jeu est très vaste. On va du simple Capitaine de Gobelinville à Aragorn, Roi Elessar en passant par Sauron. Vous pouvez donc vous faire plaisir et jouer le héros qui vous plaît le plus! Qui n’a pas rêver d’incarner Theoden lors de la charge du Pelennor ou encore Azog lors de la Bataille des 5 Armées.\n\nLes\n\nHéros apportent également à votre arm

In [49]:
print("Generating critique for each QA couple...")
for output in tqdm(outputs):
    evaluations = {
        "groundedness": call_llm(
            llm_client,
            question_groundedness_critique_prompt.format(context=output["context"], question=output["question"]),
        ),
        "relevance": call_llm(
            llm_client,
            question_relevance_critique_prompt.format(question=output["question"]),
        ),
        "standalone": call_llm(
            llm_client,
            question_standalone_critique_prompt.format(question=output["question"]),
        ),
    }
    try:
        for criterion, evaluation in evaluations.items():
            score, eval = (
                int(evaluation.split("Total rating: ")[-1].strip()),
                evaluation.split("Total rating: ")[-2].split("Evaluation: ")[1],
            )
            output.update(
                {
                    f"{criterion}_score": score,
                    f"{criterion}_eval": eval,
                }
            )
    except Exception as e:
        continue

Generating critique for each QA couple...


100%|███████████████████████████████████████████| 10/10 [01:38<00:00,  9.81s/it]


In [52]:
import pandas as pd
import datasets


pd.set_option("display.max_colwidth", None)

generated_questions = pd.DataFrame.from_dict(outputs)

print("Evaluation dataset before filtering:")
display(
    generated_questions[
        [
            "question",
            "answer",
            "groundedness_score",
            "relevance_score",
            "standalone_score",
        ]
    ]
)
generated_questions = generated_questions.loc[
    (generated_questions["groundedness_score"] >= 4)
    & (generated_questions["relevance_score"] >= 1)
    & (generated_questions["standalone_score"] >= 4)
]
print("============================================")
print("Final evaluation dataset:")
display(
    generated_questions[
        [
            "question",
            "answer",
            "groundedness_score",
            "relevance_score",
            "standalone_score",
        ]
    ]
)

eval_dataset = datasets.Dataset.from_pandas(generated_questions, split="train", preserve_index=False)

Evaluation dataset before filtering:


Unnamed: 0,question,answer,groundedness_score,relevance_score,standalone_score
0,Quand les joueurs notent-ils leurs objectifs secrets dans le scénario Brouillard de Guerre ?\n,Après que les deux camps se sont déployés.,3,1,2
1,Les figurines de Cavalerie reçoivent-elles un bonus en chargeant un ennemi qui défend une barrière ?\n,No.,5,1,5
2,Quel est le score de combat de Thorin?\n,Le score de combat de Thorin est 6.,1,1,3
3,Quels sont les bonus spéciaux qu'une figurine de Cavalerie reçoit lorsqu'elle charge une figurine d'Infanterie ?\n,Une figurine de Cavalerie reçoit deux bonus spéciaux lorsqu'elle charge une figurine d'Infanterie : une attaque supplémentaire et la possibilité de projeter au sol.,5,1,5
4,Quel est le rôle de la monture lors d'un combat de cavalerie?\n,La monture apporte son poids et sa vitesse d'impact lors de la charge lors d'un combat de cavalerie.,3,1,5
5,Combien d'unités équipées d'arcs et/ou d'arbalètes maximum une armée peut-elle contenir ?\n,Une armée ne peut contenir (hors règles spéciales) qu'un tiers (arrondi au supérieur) d'unités équipées d'arcs et/ou d'arbalètes.,5,1,5
6,Les Mercenaires Gobelins peuvent-ils apparaître dans une maison dont le toit n'est pas déboitable ?\n,"Non, la règle spéciale Embuscade des mercenaires ne permet pas aux Mercenaires Gobelins d'apparaître dans ou sur des décors pleins, tels qu'une maison dont le toit n'est pas déboitable.",1,1,5
7,Comment une figurine peut-elle se désengager d'un combat?\n,Une figurine peut se désengager d'un combat si la figurine qui l'a chargée est tuée par une arme de jet ou un pouvoir magique.,5,1,5
8,Combien d'actions héroïques un héros peut-il déclarer par tour selon les règles ?\n,"Un héros peut déclarer une action héroïque par phase, ce qui signifie qu'il peut en déclarer deux par tour.",5,1,5
9,Quelles sont les trois actions héroïques communes à tous les héros?\n,"Les trois actions héroïques communes à tous les héros sont l'élan héroïque, le tir héroïque et le combat héroïque.",5,1,5


Final evaluation dataset:


Unnamed: 0,question,answer,groundedness_score,relevance_score,standalone_score
1,Les figurines de Cavalerie reçoivent-elles un bonus en chargeant un ennemi qui défend une barrière ?\n,No.,5,1,5
3,Quels sont les bonus spéciaux qu'une figurine de Cavalerie reçoit lorsqu'elle charge une figurine d'Infanterie ?\n,Une figurine de Cavalerie reçoit deux bonus spéciaux lorsqu'elle charge une figurine d'Infanterie : une attaque supplémentaire et la possibilité de projeter au sol.,5,1,5
5,Combien d'unités équipées d'arcs et/ou d'arbalètes maximum une armée peut-elle contenir ?\n,Une armée ne peut contenir (hors règles spéciales) qu'un tiers (arrondi au supérieur) d'unités équipées d'arcs et/ou d'arbalètes.,5,1,5
7,Comment une figurine peut-elle se désengager d'un combat?\n,Une figurine peut se désengager d'un combat si la figurine qui l'a chargée est tuée par une arme de jet ou un pouvoir magique.,5,1,5
8,Combien d'actions héroïques un héros peut-il déclarer par tour selon les règles ?\n,"Un héros peut déclarer une action héroïque par phase, ce qui signifie qu'il peut en déclarer deux par tour.",5,1,5
9,Quelles sont les trois actions héroïques communes à tous les héros?\n,"Les trois actions héroïques communes à tous les héros sont l'élan héroïque, le tir héroïque et le combat héroïque.",5,1,5


In [53]:
eval_dataset

Dataset({
    features: ['context', 'question', 'answer', 'source_doc', 'groundedness_score', 'groundedness_eval', 'relevance_score', 'relevance_eval', 'standalone_score', 'standalone_eval'],
    num_rows: 6
})

In [83]:
from langchain.chat_models.base import BaseChatModel
from typing import Optional, List, Tuple


def run_rag_tests(
    eval_dataset: datasets.Dataset,
    llm: BaseChatModel,
    knowledge_index: Qdrant,
    output_file: str,
    rerank: bool = True,
    verbose: Optional[bool] = True,
    test_settings: Optional[str] = None,  # To document the test settings used
):
    """Runs RAG tests on the given dataset and saves the results to the given output file."""
    try:  # load previous generations if they exist
        with open(output_file, "r") as f:
            outputs = json.load(f)
    except:
        outputs = []

    for example in tqdm(eval_dataset):
        question = example["question"]
        if question in [output["question"] for output in outputs]:
            continue

        answer, relevant_docs = answer_with_rag(question, llm, knowledge_index, rerank=rerank)
        if verbose:
            print("=======================================================")
            print(f"Question: {question}")
            print(f"Answer: {answer}")
            print(f'True answer: {example["answer"]}')
        result = {
            "question": question,
            "true_answer": example["answer"],
            "source_doc": example["source_doc"],
            "generated_answer": answer,
            "retrieved_docs": [doc for doc in relevant_docs],
        }
        if test_settings:
            result["test_settings"] = test_settings
        outputs.append(result)

        with open(output_file, "w") as f:
            json.dump(outputs, f)

In [117]:
EVALUATION_PROMPT = """
###Context:
You are a fair evaluator language model for french documentation.
###Task Description:
An instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.
1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.
2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.
3. The output format should look as follows: \"Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}\"
4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.

###The instruction to evaluate:
{instruction}

###Response to evaluate:
{response}

###Reference Answer (Score 5):
{reference_answer}

###Score Rubrics:
[Is the response correct, accurate, and factual based on the reference answer?]
Score 1: The response is completely incorrect, inaccurate, and/or not factual.
Score 2: The response is mostly incorrect, inaccurate, and/or not factual.
Score 3: The response is somewhat correct, accurate, and/or factual.
Score 4: The response is mostly correct, accurate, and factual.
Score 5: The response is completely correct, accurate, and factual.

###Feedback:"""

evaluation_prompt_template = PromptTemplate(
    input_variables=["instruction", "response","reference_answer"],
    template=EVALUATION_PROMPT,
)


In [118]:
evaluation_prompt_template

PromptTemplate(input_variables=['instruction', 'reference_answer', 'response'], template='\n###Context:\nYou are a fair evaluator language model for french documentation.\n###Task Description:\nAn instruction (might include an Input inside it), a response to evaluate, a reference answer that gets a score of 5, and a score rubric representing a evaluation criteria are given.\n1. Write a detailed feedback that assess the quality of the response strictly based on the given score rubric, not evaluating in general.\n2. After writing a feedback, write a score that is an integer between 1 and 5. You should refer to the score rubric.\n3. The output format should look as follows: "Feedback: {{write a feedback for criteria}} [RESULT] {{an integer number between 1 and 5}}"\n4. Please do not generate any other opening, closing, and explanations. Be sure to include [RESULT] in your output.\n\n###The instruction to evaluate:\n{instruction}\n\n###Response to evaluate:\n{response}\n\n###Reference Answ

In [119]:

eval_chat_model = llm_client

def evaluate_answers(
    answer_path: str,
    eval_chat_model: InferenceClient,
    evaluator_name: str,
    evaluation_prompt_template: PromptTemplate,
) -> None:
    """Evaluates generated answers. Modifies the given answer file in place for better checkpointing."""
    answers = []
    if os.path.isfile(answer_path):  # load previous generations if they exist
        answers = json.load(open(answer_path, "r"))

    for experiment in tqdm(answers):
        if f"eval_score_{evaluator_name}" in experiment:
            continue

        eval_prompt = evaluation_prompt_template.format(
            instruction=experiment["question"],
            response=experiment["generated_answer"],
            reference_answer=experiment["true_answer"],
        )
        eval_result = call_llm(eval_chat_model, eval_prompt).split("###Feedback")[-1]
        
        feedback, score = [item.strip() for item in eval_result.split("[RESULT]")]
        experiment[f"eval_score_{evaluator_name}"] = score
        experiment[f"eval_feedback_{evaluator_name}"] = feedback

        with open(answer_path, "w") as f:
            json.dump(answers, f)

In [121]:
import os
if not os.path.exists("./output"):
    os.mkdir("./output")

evaluator_name = llm_client.model

for chunk_size in [200]:  # Add other chunk sizes (in tokens) as needed
    for embeddings in ["thenlper/gte-small"]:  # Add other embeddings as needed
        for rerank in [True, False]:
            settings_name = f"chunk:{chunk_size}_embeddings:{embeddings.replace('/', '~')}_rerank:{rerank}_reader-model:{'open-chat'}"
            output_file_name = f"./output/rag_{settings_name}.json"

            print(f"Running evaluation for {settings_name}:")

            print("Loading knowledge base embeddings...")

#            knowledge_index = load_embeddings(
#                RAW_KNOWLEDGE_BASE,
#                chunk_size=chunk_size,
#                embedding_model_name=embeddings,
#            )
            
            knowledge_index = KNOWLEDGE_VECTOR_DATABASE
            print("Running RAG...")
            run_rag_tests(
                eval_dataset=eval_dataset,
                llm=hf,
                knowledge_index=knowledge_index,
                output_file=output_file_name,
                rerank=rerank,
                verbose=False,
                test_settings=settings_name,
            )

            print("Running evaluation...")
            evaluate_answers(
                output_file_name,
                eval_chat_model,
                evaluator_name,
                evaluation_prompt_template,
            )

Running evaluation for chunk:200_embeddings:thenlper~gte-small_rerank:True_reader-model:open-chat:
Loading knowledge base embeddings...
Running RAG...


  0%|                                                     | 0/6 [00:00<?, ?it/s]

=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 79.45it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

R : Non. Les figurines de Cavalerie ne reçoivent pas de bonus en 
chargeant un ennemi qui défend une barrière. 

Q  :  Si  une  figurine  avec  la  Règle  Spéciale Empalement  charge 
une figurine de Cavalerie et tue la monture pendant la charge, 
bénéficie-t-elle de ses bonus de charge étant donné qu’elle est 
désormais en combat avec une figurine d’Infanterie ?

R : Non car elle a chargé une figurine de Cavalerie à l’origine.

Q : Si une figurine montée sur un Chameau de guerre entre en 
contact  avec  plusieurs  figurines  en  même  temps,  est-ce  que 
chacune d’entre elle reçoit une touche d’Empalement ?

R : Non, une seule figurine recevra une touche. Pour enchaîner 
les Empalements, le Chameau doit empaler et tuer les figurines 
une

 17%|███████▌                                     | 1/6 [00:06<00:30,  6.01s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 80.59it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

La charge de cavalerie
Le  plus  grand  des  avantages  à  monter  une  monture  dans  une 
bataille  est  qu’un  combattant  sur  une  puissante  monture  est 
très difficile à arrêter. Il existe deux règles pour représenter ça. 
Si  une  figurine  de  Cavalerie  charge  une  figurine  d’Infanterie, 
elle  reçoit  deux  bonus  spéciaux  pour  le  combat  suivant  :  une 
attaque supplémentaire et la possibilité de projeter au sol. 

Elles  reçoivent  ces  bonus  quelque  soit  le  nombre  de  figurines 
chargées,  tant  qu’elles  sont  toutes  des  figurines  d’Infanterie. 
Ces bonus continuent de s’appliquer même si d’autres figurines 
d’Infanterie ennemies contre-chargent la figurine de Cavalerie.

Les armes de maître
Une figurine qui u

 33%|███████████████                              | 2/6 [00:13<00:26,  6.64s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 55.96it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

Les armes de tir

Une  arme  de  tir  est  utilisée  pour  faire 
des attaques à distance. Comme pour les armes 
de combat, elles peuvent être de toutes formes 
et de toutes tailles. Si une figurine possède une 
arme  de  tir,  cela  sera  listé  dans  son  profil.  Si 
une  figurine  est  équipée  avec  plusieurs  armes 
de tir, le joueur contrôlant la figurine doit décla-
rer quelle arme est utilisée avant de faire son jet 
pour Toucher. 

Arcs
Le terme Arc couvre une grande gamme d’armes 
incluant les arcs humains, les arcs elfiques, les 
arcs  nains  et  les  grands  arcs,  mais  comme  ils 
fonctionnent  tous  de  la  même  manière,  nous 
avons  un  corps  de  règles  simples  pour  les  re-
présenter.  Les  différences  majeures  en

 50%|██████████████████████▌                      | 3/6 [00:23<00:25,  8.34s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 54.54it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

combat et ne peuvent plus bouger pour le reste du tour. 

Il est possible qu’une figurine qui était déjà engagée en combat 

se  retrouve  soudainement  désengagée  (car  la  figurine  qui  a 

chargé a été tué par une arme de jet ou un pouvoir magique). Si 

cela  arrive  et  que  la  figurine  fraîchement  désengagée  n’a  pas 

encore  eu  l’opportunité  de  bouger  (et  qu’elle  peut  encore  le 

faire), elle peut le faire. 

Il est tout à fait possible que selon l’ordre dans lequel vous bou-

gez  vos  figurines,  une  figurine  qui  ne  pouvait  pas  charger  au 

début de la phase puisse finalement le faire. Cela rend l’ordre 

des mouvements extrêmement important. Les figurines peuvent 

se bloquer entre elles, ou bien si l’ordre 

 67%|██████████████████████████████               | 4/6 [00:34<00:18,  9.47s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 54.52it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

- Refouler de Smaug, 

- Réduis’y en Bouillie de Tom le Troll

Q  :  Combien  d’Actions  Héroïques  un  Héros  peut-il  déclarer  par 
tour : une par tour ou une par phase ?

R : Une par phase.

Q  :  Lorsque  les  deux  joueurs  annoncent  à  tour  de  rôle  ne  pas 
(ou  ne  plus)  avoir  d’Action  Héroïque  à  déclarer,  le  premier 
joueur  peut-il  changer  d’avis  et  décider  de  déclarer  une  Action 
Héroïque ?

R : Non. Si les deux joueurs passent, la phase a lieu sans qu’aucun 
joueur ne puisse plus déclarer l’utilisation d’une Action Héroïque.

Q  :  Si  une  figurine  avec  la  Règle  Spéciale Maître Stratège  (X+) 
essaye  de  copier  une  Action  Héroïque  mais  rate  son  jet,  peut-
elle  dépenser  un  point  de  Puissance

 83%|█████████████████████████████████████▌       | 5/6 [00:40<00:08,  8.19s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.

100%|█████████████████████████████████████████████| 1/1 [00:00<00:00, 57.13it/s][A


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

La variété des Héros dans ce jeu est très vaste. On va du simple Capitaine de Gobelinville à Aragorn, Roi Elessar en passant par Sauron. Vous pouvez donc vous faire plaisir et jouer le héros qui vous plaît le plus! Qui n’a pas rêver d’incarner Theoden lors de la charge du Pelennor ou encore Azog lors de la Bataille des 5 Armées.

Les

Héros apportent également à votre armée des caractéristiques héroïques qui leur permettent de faire des actions dont nul autre guerrier ne serait capable. Cela se traduit par les points de puissance dans son profil que vous pouvez dépenser pendant une partie.

Tous les héros ont accès à trois

actions héroïques communes :

L’élan héroïque qui vous fait déplacer avant votre adversaire;

Le tir héroïque qui vou

100%|█████████████████████████████████████████████| 6/6 [00:49<00:00,  8.21s/it]



[1m> Finished chain.[0m

[1m> Finished chain.[0m
Running evaluation...


100%|█████████████████████████████████████████████| 6/6 [00:11<00:00,  2.00s/it]


Running evaluation for chunk:200_embeddings:thenlper~gte-small_rerank:False_reader-model:open-chat:
Loading knowledge base embeddings...
Running RAG...


  0%|                                                     | 0/6 [00:00<?, ?it/s]

=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

pas les bonus de charge de cavalerie (en incluant les bonus de la lance de cavalerie) lorsqu’elles chargent 

le défenseur d’une barrière. Souvenez-vous que la barrière doit faire au moins la moitié de la taille de 

l’attaquant (monture incluse, donc) pour qu’elle compte comme barrière. 

Cavalerie et Pouvoirs Magiques

Si une figurine utilise un pouvoir magique contre une figurine 

de  Cavalerie,  l’ensemble  de  la  figurine  est  considérée  comme 

étant la cible du pouvoir. Cela signifie que soit le cavalier soit 

la monture peut utiliser de la Volonté pour résister au Pouvoir 

Magique, et si le pouvoir est réussi, l’ensemble de la figurine en 

subit les effets, à moins que le contraire soit spécifié. 

Cavalerie et seuil de démo

 17%|███████▌                                     | 1/6 [00:06<00:30,  6.10s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

La charge de cavalerie
Le  plus  grand  des  avantages  à  monter  une  monture  dans  une 
bataille  est  qu’un  combattant  sur  une  puissante  monture  est 
très difficile à arrêter. Il existe deux règles pour représenter ça. 
Si  une  figurine  de  Cavalerie  charge  une  figurine  d’Infanterie, 
elle  reçoit  deux  bonus  spéciaux  pour  le  combat  suivant  :  une 
attaque supplémentaire et la possibilité de projeter au sol. 

Elles  reçoivent  ces  bonus  quelque  soit  le  nombre  de  figurines 
chargées,  tant  qu’elles  sont  toutes  des  figurines  d’Infanterie. 
Ces bonus continuent de s’appliquer même si d’autres figurines 
d’Infanterie ennemies contre-chargent la figurine de Cavalerie.

Ces  bonus  ne  s’appliquent  pas  lor

 33%|███████████████                              | 2/6 [00:13<00:27,  6.76s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

Tableau des Armes de Tir

Nom

Arbalète

Arc

Arc Court

Arc d’Esgaroth

Arc Elfique

Arc Long

Arc Long Nain

Arc Nain

Arc Orque

Arc Uruk-haï

Arme de Jet

Fronde

Grand Arc

Javelot

Sarbacane

Portée
24’’

Force
4

24’’

18’’

24’’

24’’

24’’

24’’

18’’

18’’

18’’

6’’

12’’

24’’

8’’

12’’

2

2

3

3

3

2

3

2

3

3

1

4

3

2

70

 

Arbalètes
Une  figurine  avec  une  Arbalète  ne  peut  pas  tirer  du 
tout dans un tour où elle s’est déplacée.

Armes de Jet
Une figurine avec une Arme de Jet peut la lancer une 
fois  par  tour  pendant  la  phase  de  Tir  même  si  elle 
s’est déplacée de plus de la moitié de son Mouvement. 
Une figurine ne peut pas combattre avec ses Armes de 
Jet pendant la phase de Combat.

Les Armes d

 50%|██████████████████████▌                      | 3/6 [00:19<00:19,  6.57s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

combat et ne peuvent plus bouger pour le reste du tour. 

Il est possible qu’une figurine qui était déjà engagée en combat 

se  retrouve  soudainement  désengagée  (car  la  figurine  qui  a 

chargé a été tué par une arme de jet ou un pouvoir magique). Si 

cela  arrive  et  que  la  figurine  fraîchement  désengagée  n’a  pas 

encore  eu  l’opportunité  de  bouger  (et  qu’elle  peut  encore  le 

faire), elle peut le faire. 

Il est tout à fait possible que selon l’ordre dans lequel vous bou-

gez  vos  figurines,  une  figurine  qui  ne  pouvait  pas  charger  au 

début de la phase puisse finalement le faire. Cela rend l’ordre 

des mouvements extrêmement important. Les figurines peuvent 

se bloquer entre elles, ou bien si l’ordre 

 67%|██████████████████████████████               | 4/6 [00:29<00:15,  7.89s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

Les joueurs peuvent ainsi continuer à déclarer des Actions Héroïques de cette manière, jusqu’à ce qu’ils 
soient satisfaits.

Veuillez noter que parfois, le choix d’un joueur de faire ou non une Action Héroïque poussera l’autre 
joueur à répondre avec une Action Héroïque de son choix. Vous pouvez toujours déclarer une Action 
Héroïque si vous avez un Héros disponible avec de la Puissance – ce que vous ne pouvez pas faire par 
contre, c’est revenir en arrière pour changer ou annuler une Action Héroïque déjà déclarée.

Si les deux joueurs souhaitent réaliser des Actions Héroïques avec leurs Héros pendant la même phase, 
ils doivent nommer un Héros à tour de rôle, en commençant par le joueur qui n’a pas l’initiative. Déclarez 
l’Action Héroïq

 83%|█████████████████████████████████████▌       | 5/6 [00:36<00:07,  7.47s/it]


[1m> Finished chain.[0m

[1m> Finished chain.[0m
=> Retrieving documents...


0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.


=> Generating answer...


[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3m
<|system|>
Answer the question in french only using the following french context to help:

Quelques Actions Héroïques interfèrent avec l’ordre d’initiative qui a été déterminé. Par exemple, si un 
Héros déclare un Combat Héroïque, ce combat est réalisé en premier. Si plusieurs Héros du même camp 
souhaitent faire une même Action Héroïque de ce genre, le joueur qui les contrôle choisit l’ordre des ac-
tions. Si plusieurs Héros des deux camps sont dans ce cas, suivez le processus suivant :
1- Chaque joueur nomme un Héros pour faire l’Action Héroïque, qui perd un point de Puissance comme 
décrit ci-dessus.
2- Le joueur avec l’initiative jette 1D6. Sur un 1-3, le joueur du Mal réalise la première Action Héroïque. 
Sur un 4-6, c’est le joueur du Bien qui commence.
3- Les joueurs alternent ensuite les Actions Héroïques jusqu’à ce qu’i

100%|█████████████████████████████████████████████| 6/6 [00:44<00:00,  7.39s/it]



[1m> Finished chain.[0m

[1m> Finished chain.[0m
Running evaluation...


100%|█████████████████████████████████████████████| 6/6 [00:12<00:00,  2.06s/it]


In [122]:
import glob

outputs = []
for file in glob.glob("./output/*.json"):
    output = pd.DataFrame(json.load(open(file, "r")))
    output["settings"] = file
    outputs.append(output)
result = pd.concat(outputs)

In [123]:
outputs

[                                                                                                             question  \
 0              Les figurines de Cavalerie reçoivent-elles un bonus en chargeant un ennemi qui défend une barrière ?\n   
 1  Quels sont les bonus spéciaux qu'une figurine de Cavalerie reçoit lorsqu'elle charge une figurine d'Infanterie ?\n   
 2                         Combien d'unités équipées d'arcs et/ou d'arbalètes maximum une armée peut-elle contenir ?\n   
 3                                                         Comment une figurine peut-elle se désengager d'un combat?\n   
 4                                 Combien d'actions héroïques un héros peut-il déclarer par tour selon les règles ?\n   
 5                                               Quelles sont les trois actions héroïques communes à tous les héros?\n   
 
                                                                                                                                                