## Maestría en Inteligencia Artificial Aplicada (MNA)
### Proyecto Integrador
### Dra. Grettel Barceló Alonso / Dr. Carlos Alberto Villaseñor Padilla
### Avance 5. Implementación de GraphRAG
### Integrantes
- A01794457 - Iossif Moises Palli Laura
- A01793984 - Brenda Zurazy Rodríguez Pérez
- A01794630 - Jesús Ramseths Echeverría Rivera

In [37]:
# !pip install langchain_community
# !pip install sentence_transformers
# !pip install transformers
# !pip install datasets peft bitsandbytes
# !pip install -U bitsandbytes
# !pip install bert-score
# !pip install ragas
# !pip install langchain_experimental
# !pip install neo4j

In [76]:
from IPython.display import display, Markdown
from langchain_community.vectorstores import SKLearnVectorStore, Neo4jVector
from langchain_community.graphs import Neo4jGraph
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.chains import RetrievalQA
from langchain.llms import LlamaCpp
from langchain.prompts import PromptTemplate
from langchain_experimental.graph_transformers import LLMGraphTransformer
from transformers import pipeline
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import HuggingFacePipeline
from transformers import TrainingArguments, AutoTokenizer, AutoModelForCausalLM, LlamaForCausalLM, pipeline
import re
from bert_score import score
from peft import LoraConfig, get_peft_model, PeftModel
from transformers import BitsAndBytesConfig
from datasets import Dataset
import torch
import pandas as pd
import warnings
import os
from dotenv import load_dotenv
warnings.filterwarnings('ignore')
load_dotenv()

True

### 0. Init Conf

In [32]:
# Inicio de sesión en el Hub de Hugging Face
from huggingface_hub import login

# Token de huggingface
login(os.environ["HUGGING_KEY"])

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /root/.cache/huggingface/token
Login successful


### 1. Carga de la Base de Grafos



In [33]:
# Se usa este modelo por ser ligero en cuestión de recursos computacionales
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/multi-qa-MiniLM-L6-cos-v1")

In [38]:
graph_db = Neo4jVector.from_existing_index(
    embeddings,
    url=os.environ["NEO4J_URI"],
    username=os.environ["NEO4J_USERNAME"],
    password=os.environ["NEO4J_PASSWORD"],
    index_name="document_index",
)

### 2. Carga del Modelo

In [39]:
# Cargar un modelo de lenguaje preentrenado (Llama) para generación de texto
model_name = "meta-llama/Llama-3.2-3B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = LlamaForCausalLM.from_pretrained(
    model_name,
    device_map='auto'
)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [40]:
# Cargar los adaptadores LoRa del Fine Tuning
lora_weights_path = "./llama-3.2-3B-Instruct-FN/"

model = PeftModel.from_pretrained(
    model,
    lora_weights_path,
    device_map="auto",
)

In [42]:
# Definir pipeline para la generación de texto
generate_text = pipeline(
    task='text-generation',
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=200,
    temperature=0.7,
    top_p=0.95,
    repetition_penalty=1.15,
)

The model 'PeftModelForCausalLM' is not supported for text-generation. Supported models are ['BartForCausalLM', 'BertLMHeadModel', 'BertGenerationDecoder', 'BigBirdForCausalLM', 'BigBirdPegasusForCausalLM', 'BioGptForCausalLM', 'BlenderbotForCausalLM', 'BlenderbotSmallForCausalLM', 'BloomForCausalLM', 'CamembertForCausalLM', 'LlamaForCausalLM', 'CodeGenForCausalLM', 'CohereForCausalLM', 'CpmAntForCausalLM', 'CTRLLMHeadModel', 'Data2VecTextForCausalLM', 'DbrxForCausalLM', 'ElectraForCausalLM', 'ErnieForCausalLM', 'FalconForCausalLM', 'FuyuForCausalLM', 'GemmaForCausalLM', 'Gemma2ForCausalLM', 'GitForCausalLM', 'GPT2LMHeadModel', 'GPT2LMHeadModel', 'GPTBigCodeForCausalLM', 'GPTNeoForCausalLM', 'GPTNeoXForCausalLM', 'GPTNeoXJapaneseForCausalLM', 'GPTJForCausalLM', 'JambaForCausalLM', 'JetMoeForCausalLM', 'LlamaForCausalLM', 'MambaForCausalLM', 'Mamba2ForCausalLM', 'MarianForCausalLM', 'MBartForCausalLM', 'MegaForCausalLM', 'MegatronBertForCausalLM', 'MistralForCausalLM', 'MixtralForCausal

In [43]:
# Configurar el modelo de lenguaje dentro del pipeline de Hugging Face
llm = HuggingFacePipeline(pipeline=generate_text)

In [44]:
retriever = graph_db.as_retriever(search_kwargs={"k": 5}) # Se asigna el grafo como retriever

In [46]:
prompt_template = """
Utiliza el siguiente contexto para responder la pregunta al final de manera muy corta en un solo reglón.

Contexto:
{context}

Pregunta: {question}

Respuesta:
"""

In [47]:
prompt = PromptTemplate(
    input_variables=["context","question"],
    template=prompt_template
)

In [48]:
# Crear la cadena de preguntas y respuestas (QA) basada en el sistema GraphRAG
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={
        "prompt": prompt,
    }
)

### 3. Evaluación

#### 3.1 RAGAS

In [50]:
from ragas import evaluate
from ragas.metrics import answer_relevancy, answer_similarity

In [51]:
test_df = pd.read_csv('./test_q_a.csv')

In [52]:
# Se extrae la respuesta del contenido generado por el LLM
def extract_answer(result):
    output = result['result']

    if "Respuesta:" in output:
        answer = output.split("Respuesta:")[-1].strip()
    else:
        answer = output.strip()

    return answer

In [53]:
# Listas para guardar los respuestas predichas y los documentos recuperados
results = []
contexts = []
for question in test_df['question']:
    result = qa_chain({"query": question})
    answer = extract_answer(result)
    results.append(answer)
    sources = result["source_documents"]
    contents = []
    for i in range(len(sources)):
        contents.append(sources[i].page_content)
    contexts.append(contents)

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
Setting `pad_token_id` to `eos_token_id`:128001 for

In [67]:
# Se crea el diccionario con los elementos necesarios para evaluar
test_set = {
    "question": test_df['question'].tolist(),
    "answer": results,
    "contexts": contexts,
    "ground_truth": test_df['answer'].tolist()
}

In [68]:
dataset = Dataset.from_dict(test_set)
get_score = evaluate(dataset, metrics=[answer_relevancy, answer_similarity])
score_df = get_score.to_pandas()

Evaluating:   0%|          | 0/40 [00:00<?, ?it/s]

In [69]:
score_df[['answer_relevancy', 'semantic_similarity']].mean(axis=0)

Unnamed: 0,0
answer_relevancy,0.771799
semantic_similarity,0.984231


#### 3.2 BERT-Score

In [81]:
def calculate_bert_score_avg(generated_responses, real_responses, model_name):

  # Se obtiene la precisión, recall y F1 de acuerdo a la comparación.
  P, R, F1 = score(generated_responses, real_responses, lang='es', verbose=True)

  return F1.mean().item(), P.mean().item(), R.mean().item()

In [82]:
f1_score, precision, recall = calculate_bert_score_avg(results, test_df['answer'].tolist(), "GraphRAG")
print(f"Puntaje F1 promedio: {f1_score:.4f}")
print(f"Puntaje de Precisión promedio: {precision:.4f}")
print(f"Puntaje de Recall promedio: {recall:.4f}")

calculating scores...
computing bert embedding.


  0%|          | 0/1 [00:00<?, ?it/s]

computing greedy matching.


  0%|          | 0/1 [00:00<?, ?it/s]

done in 0.08 seconds, 251.65 sentences/sec
Puntaje F1 promedio: 0.9148
Puntaje de Precisión promedio: 0.9121
Puntaje de Recall promedio: 0.9183
