# Import Libraries

In [1]:
import os
import PyPDF2
import openai
import uuid

import warnings
warnings.filterwarnings('ignore')

import pinecone
import tiktoken

from itertools import chain
from tqdm.auto import tqdm
from pinecone import Pinecone
from pinecone import ServerlessSpec
from Credentials import OPENAI_API_KEY, PINECONE_API_KEY

from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Pinecone as lang_pinecone
from langchain.embeddings import OpenAIEmbeddings
from langchain.chat_models import ChatOpenAI
from langchain.chains import RetrievalQA
from langchain.callbacks import get_openai_callback
from langchain.prompts import PromptTemplate

# Load Data

In [2]:
os.chdir('..')
curr_dir = (os.getcwd()).replace('\\', '/')
file_path = curr_dir + '/Dataset/Corrective RAG.pdf'

In [3]:
tokenizer = tiktoken.get_encoding('p50k_base')

# create the length function
def tiktoken_len(text):
    tokens = tokenizer.encode(
        text,
        disallowed_special=()
    )
    return len(tokens)

In [4]:
def read_pdf(document):
       
    loader = PyPDFLoader(file_path)
    pdf_text = loader.load()
    pdf_text[0].page_content = pdf_text[0].page_content.replace('\n', '')

    return pdf_text    
        
def pdf_to_chunks(text, 
                  chunk_size, 
                  overlap):
    
    split_text = RecursiveCharacterTextSplitter(chunk_size=chunk_size, 
                                                chunk_overlap=overlap, 
                                                length_function=tiktoken_len)
    docs = split_text.split_text(text[0].page_content)
    
    chunks = [str(doc) for doc in docs]
        
    return chunks

In [5]:
text = read_pdf(file_path)

In [18]:
text

[Document(page_content='Introduction to RAG  Retrieval -augmented generation (RAG) is an AI framework for improving the quality of  LLM -generated  responses by grounding the model on external sources of knowledge to supplement the LLM’s internal representation of information. Implementing RAG in an LLM -based question answering system has two main benefits: It ensures that the model has access to the  most current, reliable facts, and that users have access to the model’s sources, ensuring that its claims can be checked for accuracy and ultimately trusted.  Prior research has introduced retrieval techniques to incorporate relevant knowledge and augment  generation, as  exemplified  by retrieval  augmented  generation ( RAG). In this framework, the input to  models is augmented  by prepending  relevant  documents  that are retrieved  from  an external  knowledge  corpus .  While  RAG  serves  as a practicable complement to  LLMs, its effectiveness is contingent upon the relevance  and 

In [6]:
pdf_chunks = pdf_to_chunks(text, 220, 10)

In [7]:
token_counts = tiktoken_len(text[0].page_content)
token_counts

791

In [41]:
uid = str(uuid.uuid4())
uid

data = [
    {
        'id' : f'{uid}-{chunk_idx}',
        'text' : chunk
    } for chunk_idx, chunk in enumerate(pdf_chunks)
]
data

[{'id': '07ff60c0-87ee-49d6-aabe-681ed759cd7f-0',
  'text': 'Introduction to RAG  Retrieval -augmented generation (RAG) is an AI framework for improving the quality of  LLM -generated  responses by grounding the model on external sources of knowledge to supplement the LLM’s internal representation of information. Implementing RAG in an LLM -based question answering system has two main benefits: It ensures that the model has access to the  most current, reliable facts, and that users have access to the model’s sources, ensuring that its claims can be checked for accuracy and ultimately trusted.  Prior research has introduced retrieval techniques to incorporate relevant knowledge and augment  generation, as  exemplified  by retrieval  augmented  generation ( RAG). In this framework, the input to  models is augmented  by prepending  relevant  documents  that are retrieved  from  an external  knowledge  corpus .  While  RAG  serves  as a practicable complement to  LLMs, its effectiveness i

# Text Embedding and Indexing in Pinecone

In [2]:
model_name = 'text-embedding-ada-002'

embed = OpenAIEmbeddings(
    model=model_name,
    api_key=OPENAI_API_KEY
)

  warn_deprecated(


In [3]:
pc = Pinecone(api_key=PINECONE_API_KEY)


    
index_name = 'chatbot'
if index_name not in pc.list_indexes().names():
    pc.create_index(name=index_name,
                    dimension=1536,
                    metric='cosine',
                    spec=ServerlessSpec(
                        cloud='aws', 
                        region='us-west-2'
                    )
    )

index = pc.Index(index_name)

index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4}

In [20]:
batch_limit = 100

texts = []

for text in enumerate(data[0].):
    print(text)

(0, 'id')
(1, 'text')


In [54]:
uid = str(uuid.uuid4())
chunk_id = []
embeddings = []
chunk_id = [str(f'{uid}-{idx}') for idx in range(len(pdf_chunks))]
embeddings = embed.embed_documents(pdf_chunks)
metadata = [{'chunk': chunk} for chunk in pdf_chunks]
index.upsert(vectors=zip(chunk_id, 
                         embeddings, 
                         metadata))

{'upserted_count': 4}

In [55]:
index.describe_index_stats()

{'dimension': 1536,
 'index_fullness': 0.0,
 'namespaces': {'': {'vector_count': 4}},
 'total_vector_count': 4}

# Querying

In [4]:
text_field = 'chunk'

vectorstore = lang_pinecone(index, 
                            embed.embed_query, 
                            text_field)

  warn_deprecated(


In [5]:
query = 'What is RAG?'

# Retrieveing the top 3 chunks with possible answer using similarity search
vectorstore.similarity_search(query, 
                              k=3)

[Document(page_content='Introduction to RAG  Retrieval -augmented generation (RAG) is an AI framework for improving the quality of  LLM -generated  responses by grounding the model on external sources of knowledge to supplement the LLM’s internal representation of information. Implementing RAG in an LLM -based question answering system has two main benefits: It ensures that the model has access to the  most current, reliable facts, and that users have access to the model’s sources, ensuring that its claims can be checked for accuracy and ultimately trusted.  Prior research has introduced retrieval techniques to incorporate relevant knowledge and augment  generation, as  exemplified  by retrieval  augmented  generation ( RAG). In this framework, the input to  models is augmented  by prepending  relevant  documents  that are retrieved  from  an external  knowledge  corpus .  While  RAG  serves  as a practicable complement to  LLMs, its effectiveness is contingent upon the relevance  and 

# Chatbot

In [6]:
llm = ChatOpenAI(api_key=OPENAI_API_KEY, 
                 model='gpt-3.5-turbo', 
                 temperature=0)

prompt_template = """If you don't know the answer, just say you don't know the answer. 
Don't try to make up an answer.
{context}

Question: {query}
Answer:
"""
prompt = PromptTemplate(template=prompt_template, 
                        input_variables=['context', 
                                         'query'])

  warn_deprecated(


In [13]:
retriever = vectorstore.as_retriever(k=3)
qa = RetrievalQA.from_chain_type(llm=llm, 
                                 chain_type='stuff', 
                                 retriever=vectorstore.as_retriever())

In [8]:
print(prompt.format(query=query, context=retriever))

If you don't know the answer, just say you don't know the answer. 
Don't try to make up an answer.
tags=['Pinecone'] vectorstore=<langchain_community.vectorstores.pinecone.Pinecone object at 0x0000020DBD71DAC0>

Question: What is RAG?
Answer:



In [9]:
qa.run(query)

  warn_deprecated(


"RAG stands for Retrieval-augmented generation. It is an AI framework that enhances the quality of responses generated by language models (LLMs) by incorporating external sources of knowledge to supplement the model's internal information representation. RAG ensures access to current, reliable facts and allows users to verify the model's sources for accuracy and trustworthiness."

In [14]:
def count_tokens(chain, query):
    with get_openai_callback() as cb:
        result = chain.run(query)
        print(f'Spent a total of {cb.total_tokens} tokens')

    return result

In [25]:
count_tokens(qa, 
             query)

Spent a total of 932 tokens


"RAG stands for Retrieval-augmented generation. It is an AI framework that enhances the quality of responses generated by language models by incorporating external sources of knowledge to supplement the model's internal information representation. RAG ensures access to current, reliable facts and allows users to verify the model's claims for accuracy."

In [10]:
query1 = 'What is a tire?'
qa.run(query1)

"I don't know."

In [15]:
count_tokens(qa, query1)

Spent a total of 875 tokens


"I don't know."