# RetrievalQA

## Imports

In [46]:
import os
from dotenv import load_dotenv
from langchain.globals import set_debug
from langchain.chains import RetrievalQA
from langchain_openai import ChatOpenAI
from langchain_openai import OpenAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import CharacterTextSplitter

load_dotenv()
set_debug(True)

## Code

In [47]:
llm = ChatOpenAI(
    model="gpt-3.5-turbo",
    temperature=0.5,
    api_key=os.getenv("OPENAI_API_KEY")
)

In [48]:
splitter = CharacterTextSplitter(separator='\n', chunk_size=200, chunk_overlap=50)

loaders = [
    PyPDFLoader('assets/edy-resume.pdf')
]

texts = sum([d.load_and_split(splitter) for d in loaders], [])

[Document(metadata={'source': 'assets/edy-resume.pdf', 'page': 0}, page_content='Edyvalberty Alenquer  \nData Scientist | Machine Learning Engineer  \n \n \nContact  \nAddress: 4 Street, No 120 , Fortaleza – Ceará - BR \nPhone: +55 (85) 99977 -5684  \nE-mail: edyalenquer@gmail.com'),
 Document(metadata={'source': 'assets/edy-resume.pdf', 'page': 0}, page_content="E-mail: edyalenquer@gmail.com  \n \n \nSummary  \nI'm a Data Scientist with over 9 years of experience in predictive modeling, machine learning, and data analytics,"),
 Document(metadata={'source': 'assets/edy-resume.pdf', 'page': 0}, page_content='holding a master’s degree in Modeling and Quantitative Methods with a focus on Machine Learning. I specialize in'),
 Document(metadata={'source': 'assets/edy-resume.pdf', 'page': 0}, page_content='building high -performance ma chine learning models, including a default risk classification model optimized for'),
 Document(metadata={'source': 'assets/edy-resume.pdf', 'page': 0}, page_

In [49]:
embeddings = OpenAIEmbeddings()
db = FAISS.from_documents(texts, embeddings)

<langchain_community.vectorstores.faiss.FAISS at 0x7ff080bec790>

In [50]:
qa_chain = RetrievalQA.from_chain_type(llm, retriever=db.as_retriever())

question = "What course(s) did Edyvalberty complete in 2022?"
response = qa_chain.invoke({"query": question})

[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA] Entering Chain run with input:
[0m{
  "query": "What course(s) did Edyvalberty complete in 2022?"
}
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:
[0m[inputs]
[32;1m[1;3m[chain/start][0m [1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:
[0m{
  "question": "What course(s) did Edyvalberty complete in 2022?",
  "context": "- Reinforcement Learning (Alberta University) - 2022  \n- Machine Learning (Stanford University) – 2022  \n- Statistical Formation with Python (Alura) – 2021\n\nEdyvalberty Alenquer  \nData Scientist | Machine Learning Engineer  \n \n \nContact  \nAddress: 4 Street, No 120 , Fortaleza – Ceará - BR \nPhone: +55 (85) 99977 -5684  \nE-mail: edyalenquer@gmail.com\n\n- Artificial Intelligence Certification (Huawei) – 2021  \n- Data Science Formation (Alura) – 2021  \n- Python for Data Science (IBM) –

In [51]:
print(response['result'])

Edyvalberty completed the following course(s) in 2022:
- Reinforcement Learning (Alberta University)
- Machine Learning (Stanford University)
