# 1 SET UP

In [2]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

In [3]:
from langchain_openai import ChatOpenAI

llm = ChatOpenAI(model="gpt-4o-mini")

# 2 App RAG Básica

In [4]:
import bs4
#from langchain import hub
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
# from langchain_chroma import Chroma
from langchain.vectorstores import FAISS
from langchain_community.document_loaders import TextLoader
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter


from langchain_community.document_loaders import PyPDFLoader

file_path = "docs/CV_Arktic_D.F.S..pdf"

loader = PyPDFLoader(file_path)

docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

splits = text_splitter.split_documents(docs)

vectorstore = FAISS.from_documents(documents=splits, embedding=OpenAIEmbeddings())

retriever = vectorstore.as_retriever()

system_prompt = (
    "You are an assistant for question-answering tasks. "
    "Use the following pieces of retrieved context to answer "
    "the question. If you don't know the answer, say that you "
    "don't know. Use three sentences maximum and keep the "
    "answer concise."
    "\n\n"
    "{context}"
)

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{input}"),
    ]
)

In [5]:

question_answer_chain = create_stuff_documents_chain(llm, prompt)

rag_chain = create_retrieval_chain(retriever, question_answer_chain)

In [12]:
output = rag_chain.invoke({"input": "What is this pdf about?"});
print(output["answer"])

The PDF outlines the academic background and professional experience of an individual, highlighting their qualifications in civil engineering and multiple certifications in Red Hat technologies related to container orchestration and cloud management. It details their work experience in telecommunications, focusing on system maintenance, quality optimization, and cloud solutions implementation. Additionally, it lists their technical skills and language proficiency.


In [13]:
output = rag_chain.invoke({"input": "Cual era mi pregunta anterior?"});
print(output["answer"])

No sé cuál era tu pregunta anterior.


# 3  

In [None]:
from langchain_core.prompts import MessagesPlaceholder

contextualize_q_system_prompt = (
    "Given a chat history and the latest user question "
    "which might reference context in the chat history, "
    "formulate a standalone question which can be understood "
    "without the chat history. Do NOT answer the question, "
    "just reformulate it if needed and otherwise return it as is."
)

contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

In [15]:

from langchain.chains import create_history_aware_retriever

history_aware_retriever = create_history_aware_retriever(
    llm, retriever, contextualize_q_prompt)

In [16]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        MessagesPlaceholder("chat_history"),
        ("human", "{input}"),
    ]
)

question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

In [17]:
from langchain_core.messages import AIMessage, HumanMessage

chat_history = []

question = "What is this article about?"



In [18]:
ai_msg_1 = rag_chain.invoke({"input": question, "chat_history": chat_history})

In [21]:
ai_msg_1["answer"]

"The article outlines the academic background, certifications, work experience, and technical skills of a professional in the field of system administration and container orchestration, particularly with OpenShift and various Red Hat certifications. It highlights the individual's expertise in managing systems, cloud solutions, and implementing automated processes in container environments. Additionally, it mentions their capabilities in communication, both in Spanish and English."

In [None]:
chat_history.extend(
    [
        HumanMessage(content=question),
        AIMessage(content=ai_msg_1["answer"]),
    ]
)

In [23]:
second_question = "What was my previous question about?"

ai_msg_2 = rag_chain.invoke({"input": second_question, "chat_history": chat_history})

print(ai_msg_2["answer"])

Your previous question was asking for a summary or explanation of what the article is about.
