In [None]:
%%bash

nvidia-smi
..

In [None]:
import logging

logging.basicConfig(format="%(levelname)s - %(name)s -  %(message)s", level=logging.WARNING)
logging.getLogger("haystack").setLevel(logging.INFO)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
cd drive/MyDrive/hacktoberfest_ploomber

/content/drive/MyDrive/hacktoberfest_ploomber


In [None]:
%%bash

pip install --upgrade pip
pip install farm-haystack[colab,inference,faiss]


# Creación del FAISS document store

In [None]:
import os
from haystack import Pipeline
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import PreProcessor,EmbeddingRetriever
from haystack.utils import convert_files_to_docs

# Verificar si el archivo existe
if os.path.exists("faiss_document_store.db"):
    os.remove("faiss_document_store.db")
if os.path.exists("my_index.faiss"):
    os.remove("my_index.faiss")
if os.path.exists("my_config.json"):
    os.remove("my_config.json")

all_docs = convert_files_to_docs(dir_path="./Sentencias_test")
preprocessor = PreProcessor(
    clean_empty_lines=True,
    clean_whitespace=True,
    clean_header_footer=False,
    split_by="word",
    split_length=100,
)

document_store = FAISSDocumentStore(faiss_index_factory_str="Flat")
retriever = EmbeddingRetriever(document_store = document_store,
                               embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1")

docs = preprocessor.process(all_docs)
document_store.write_documents(docs)
document_store.update_embeddings(retriever)
document_store.save(index_path="my_index.faiss", config_path="my_config.json")


INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-001-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-002-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-003-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-004-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-005-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-006-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-007-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-008-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-009-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-010-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-011-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-012-22.txt
INFO:haystack.utils.preprocessing:Converting Sentencias_test/T-013-22.txt
INFO:haystack.utils.preprocessing:Conv

Batches:   0%|          | 0/313 [00:00<?, ?it/s]

Documents Processed:  95%|█████████▌| 10000/10475 [02:17<00:06, 72.49 docs/s]

Batches:   0%|          | 0/15 [00:00<?, ?it/s]

Documents Processed: 20000 docs [02:24, 138.26 docs/s]


Verificación corta de carga de document store

In [None]:
import os
from haystack import Pipeline
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever, PromptNode, PromptTemplate, AnswerParser

# verificando si existe el document store
if os.path.exists("my_index.faiss"):
   document_store = FAISSDocumentStore.load(index_path="my_index.faiss", config_path="my_config.json")
   print("encontrado faiss ds")

# Ejecución de pregunta
- Carga de document store creado en etapa anterior

In [None]:
import os
from haystack import Pipeline
from haystack.document_stores import FAISSDocumentStore
from haystack.nodes import EmbeddingRetriever, PromptNode, PromptTemplate, AnswerParser

# verificando si existe el document store
if os.path.exists("my_index.faiss"):
   document_store = FAISSDocumentStore.load(index_path="my_index.faiss", config_path="my_config.json")

# - Embedding de los documentos del document store

rag_prompt = PromptTemplate(
    prompt="""Synthesize a comprehensive answer from the following text for the given question.
                             Provide a clear and concise response that summarizes the key points and information presented in the text.
                             Your answer should be in your own words and be no longer than 50 words.
                             \n\n Related text: {join(documents)} \n\n Question: {query} \n\n Answer:""",
    output_parser=AnswerParser(),
)

#api key elka
#openai_api_key = ""

#api key anuar
openai_api_key = ""

model = "gpt-4"
#model = "text-davinci-003"
#model= "deepset/roberta-base-squad2"
#prompt_node = PromptNode()
# default_prompt_template = "deepset/question-answering-with-references"
#sentence-transformers/distiluse-base-multilingual-cased-v1

prompt_node = PromptNode(
    model_name_or_path= model, api_key=openai_api_key, default_prompt_template=rag_prompt
)

retriever = EmbeddingRetriever(document_store = document_store,
                               embedding_model="sentence-transformers/multi-qa-mpnet-base-dot-v1")

query_pipeline = Pipeline()
query_pipeline.add_node(component=retriever, name="Retriever", inputs=["Query"])
query_pipeline.add_node(component=prompt_node, name="PromptNode", inputs=["Retriever"])

respuesta = query_pipeline.run(query = "¿cuando se viola el derecho al trabajo?")

print(respuesta)

INFO:haystack.modeling.utils:Using devices: CUDA:0 - Number of GPUs: 1
INFO:haystack.nodes.retriever.dense:Init retriever using embeddings of model sentence-transformers/multi-qa-mpnet-base-dot-v1


Batches:   0%|          | 0/1 [00:00<?, ?it/s]

{'answers': [<Answer {'answer': 'El derecho al trabajo se viola cuando el empleado es despedido sin una justa causa, o es obligado a realizar un trabajo contra su voluntad. También se viola este derecho cuando se suspenden los contratos laborales sin la debida autorización del Inspector de Trabajo, lo cual pone al trabajador en desventaja.', 'type': 'generative', 'score': None, 'context': None, 'offsets_in_document': None, 'offsets_in_context': None, 'document_ids': ['1ad2774a408682784d5588eb111dbf63', '8513ef2c5e26671414b3debbcc847619', 'ebed880703baf25c117e980a03b68e63', '9369092f28758e01f7ad177818053246', '17238bf7423fad30e6b924750219ebb4', 'c8aea16a034ed3a9eb4585849c9ffb82', 'a06a098d70f704938a90b4454a6c08', '9690d6c978bcb8cb9277e1624fefcac6', 'bf8901965b671c22cd7e037a05472e43', '253da48cddf5075f51830b78fc96d8fb'], 'meta': {'prompt': 'Synthesize a comprehensive answer from the following text for the given question.\n                             Provide a clear and concise response 