## Maestría en Inteligencia Artificial Aplicada (MNA)
### Proyecto Integrador
### Dra. Grettel Barceló Alonso / Dr. Carlos Alberto Villaseñor Padilla
### Avance 5. Implementación de GraphRAG
### Integrantes
- A01794457 - Iossif Moises Palli Laura
- A01793984 - Brenda Zurazy Rodríguez Pérez
- A01794630 - Jesús Ramseths Echeverría Rivera

In [19]:
# !pip install langchain_community
# !pip install sentence_transformers
# !pip install transformers
# !pip install datasets peft bitsandbytes
# !pip install -U bitsandbytes
# !pip install bert-score
# !pip install ragas

In [48]:
from IPython.display import display, Markdown
from langchain_community.vectorstores import SKLearnVectorStore
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from transformers import pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, pipeline
import re
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import BitsAndBytesConfig
from datasets import Dataset
import torch
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

### 0. Init Conf

In [4]:
# Inicio de sesión en el Hub de Hugging Face
from huggingface_hub import login

# Token de huggingface
login('')

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### 1. Carga de Embeddings

In [6]:
# Se usa este modelo por ser ligero en cuestión de recursos computacionales
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1")
persist_path = "./embeddings_db"
# Especificación de la VectorDB
vector_store = SKLearnVectorStore(embedding=embeddings, persist_path=persist_path, serializer='parquet')

### 2. Carga del Modelo

In [7]:
# Cargar un modelo de lenguaje preentrenado (Llama) para generación de texto
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(
    model_name,
    device_map='auto'
)

tokenizer_config.json:   0%|          | 0.00/54.5k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/878 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/20.9k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.46G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/189 [00:00<?, ?B/s]

In [9]:
# Cargar los adaptadores LoRa del Fine Tuning
lora_weights_path = "./llama-3.2-3B-FN/"

model = PeftModel.from_pretrained(
    model,
    lora_weights_path,
    device_map="auto",
)

In [10]:
# Definir pipeline para la generación de texto
generate_text = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15,
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

In [11]:
# Configurar el modelo de lenguaje dentro del pipeline de Hugging Face
llm = HuggingFacePipeline(pipeline=generate_text)

In [12]:
retriever = vector_store.as_retriever(search_kwargs={"k": 5})

In [13]:
prompt_template = """
Utiliza la siguiente información para responder la pregunta al final de manera muy corta en un solo reglón.

Información:
{context}

Pregunta: {question}

Respuesta:
"""

In [14]:
prompt = PromptTemplate(
    input_variables=["context","question"],
    template=prompt_template
)

In [34]:
# Crear la cadena de preguntas y respuestas (QA) basada en el sistema RAG
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": prompt,
    }
)

### Evaluación

In [80]:
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, answer_similarity, answer_correctness
import openai
import os
key = ""
os.environ["OPENAI_API_KEY"] = key


In [75]:
test_df = pd.read_csv('./test_q_a.csv')

In [60]:
def extract_answer(result):
    output = result['result']

    if "Respuesta:" in output:
        answer = output.split("Respuesta:")[-1].strip()
    else:
        answer = output.strip()

    return answer

In [76]:
results = []
contexts = []
for question in test_df['question']:
    result = qa_chain({"query": question})
    answer = extract_answer(result)
    results.append(answer)
    sources = result["source_documents"]
    contents = []
    for i in range(len(sources)):
        contents.append(sources[i].page_content)
    contexts.append(contents)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_to

In [77]:
test_set = {
    "question": test_df['question'].tolist(),
    "answer": results,
    "contexts": contexts,
    "ground_truth": test_df['answer'].tolist()
}

In [78]:
dataset = Dataset.from_dict(test_set)
score = evaluate(dataset,metrics=[faithfulness, answer_relevancy, context_precision, context_recall, context_entity_recall, answer_similarity, answer_correctness])
score_df = score.to_pandas()

Evaluating:   0%|          | 0/420 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[228]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-BEvW3JZX1IsZQGvlDeqhmyUz on tokens per min (TPM): Limit 200000, Used 199160, Requested 5892. Please try again in 1.515s. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:ragas.executor:Exception raised in Job[217]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[252]: RateLimitError(Error code: 429 - {'error': {'message': 'Rate limit reached for gpt-4o-mini in organization org-BEvW3JZX1IsZQGvlDeqhmyUz on tokens per min (TPM): Limit 200000, Used 195189, Requested 6509. Please try again in 509ms. Visit https://platform.openai.com/account/rate-limits to learn more.', 'type': 'tokens', 'param': None, 'code': 'rate_limit_exceeded'}})
ERROR:ragas.executor:Exception raised in Job[298]: RateLimitError(Error code: 429 - {'error': {

In [79]:
score_df[['faithfulness', 'answer_relevancy', 'context_precision', 'context_recall',
          'context_entity_recall', 'semantic_similarity', 'answer_correctness']].mean(axis=0)

Unnamed: 0,0
faithfulness,0.419935
answer_relevancy,0.83545
context_precision,0.507431
context_recall,0.454678
context_entity_recall,0.184259
semantic_similarity,0.862158
answer_correctness,0.409171
