## Load API Keys

In [1]:
from dotenv import load_dotenv
load_dotenv()

True

## Loading the document

In [3]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Sample.pdf")
data = loader.load()

## Splitting/Chunking the document

In [4]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100,
    length_function = len,
    add_start_index = True,
)

texts = text_splitter.split_documents(data)

## Initialize Pinecone Client and the OpenAI embedding model

In [5]:
from langchain_openai.embeddings import OpenAIEmbeddings
from pinecone import Pinecone
import os

embeddings = OpenAIEmbeddings(openai_api_key= os.environ['OPENAI_API_KEY'])
pc = Pinecone(api_key = os.environ['PINECONE_API_KEY'])

# Check if index exist
index = pc.Index("ragchain-db")

#Delete all Vectors on the index
# index.delete(delete_all=True)

  from tqdm.autonotebook import tqdm


## Insert embeddings to pinecone and set a retriever

In [6]:
from langchain.vectorstores import Pinecone as langpc
vectordb = langpc.from_documents(texts, embeddings, index_name = "ragchain-db")

In [7]:
retriever = vectordb.as_retriever()

## Initialize LLM

In [8]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo")

AIMessage(content="As an AI language model, I don't have access to personal information about individuals unless it has been shared with me during our conversation. Therefore, I don't have any information about a specific individual named Lorna Alvarado and the skills she may possess. Skills can vary greatly depending on a person's background, education, and experience.")

## Initialize Memory and Chain

In [9]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(memory_key="chat_history", return_messages= True)
chain = ConversationalRetrievalChain.from_llm(llm = llm, retriever= retriever, memory = memory)

## Inference LLM

In [10]:
query = "Can you tell me the invoice number?"
chain.invoke({'question': query})

{'question': 'Can you tell me the invoice number?',
 'chat_history': [HumanMessage(content='Can you tell me the invoice number?'),
  AIMessage(content='Yes, the invoice number is INV-2024-001.')],
 'answer': 'Yes, the invoice number is INV-2024-001.'}