## Load API Keys

In [49]:
from dotenv import load_dotenv
load_dotenv()

True

## Loading the document

In [51]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("Sample.pdf")
data = loader.load()

## Splitting/Chunking the document

In [52]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size = 1000,
    chunk_overlap = 100,
    length_function = len,
    add_start_index = True,
)

texts = text_splitter.split_documents(data)

## Initialize Pinecone Client and the OpenAI embedding model

In [53]:
from langchain_openai.embeddings import OpenAIEmbeddings
from pinecone import Pinecone
import os

embeddings = OpenAIEmbeddings(openai_api_key= os.environ['OPENAI_API_KEY'])
pc = Pinecone(api_key = os.environ['PINECONE_API_KEY'])

# Check if index exist
index = pc.Index("ragchain-db")

#Delete all Vectors on the index
# index.delete(delete_all=True)

## Insert embeddings to pinecone and set a retriever

In [54]:
from langchain.vectorstores import Pinecone as langpc
vectordb = langpc.from_documents(texts, embeddings, index_name = "ragchain-db")

In [55]:
retriever = vectordb.as_retriever()

## Initialize LLM

In [56]:
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model="gpt-3.5-turbo")
llm.invoke("What skills do Lorna Alvarado have?")

AIMessage(content='As an AI language model, I do not have access to personal information about individuals unless it has been shared with me in the course of our conversation. I can provide general information about skills commonly associated with the name Lorna Alvarado. If you are referring to a specific individual, please provide more information.')

## Initialize Memory and Chain

In [59]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain

memory = ConversationBufferMemory(memory_key="chat_history", return_messages= True)
chain = ConversationalRetrievalChain.from_llm(llm = llm, retriever= retriever, memory = memory)

## Inference LLM

In [63]:
query = "What is the authors expectation to this paper?"
chain.invoke({'question': query})

{'question': 'What is the authors expectation to this paper?',
 'chat_history': [HumanMessage(content='What is the paper all about?'),
  AIMessage(content='The paper discusses the quantization of a large language model called Mistral-7B-v0.1. The quantization process makes the language model accessible for smaller devices that rely on CPU-based processing. The paper explores the potential of using the quantized model for processing text information within PDFs. It also mentions the need for improving the PDF parsing and chunking process and fine-tuning the model for specific use cases before quantization. The paper concludes by highlighting the promising beginning of a locally hosted model operating exclusively on CPU and RAM.'),
  HumanMessage(content='What is Llama.cpp?'),
  AIMessage(content='Llama.cpp refers to an open-source library that is designed to run large language models like Llama and Alpaca on commodity hardware. It provides options for optimizations to make the models sm