In [1]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
from langchain.document_loaders import TextLoader, PyPDFLoader, DirectoryLoader

In [5]:
#Extract data from the PDF
def load_pdf(data):
    loader = DirectoryLoader(data,
                    glob="*.txt",
                    loader_cls=TextLoader)
    
    documents = loader.load()

    return documents

In [6]:
extracted_data = load_pdf("data/")

In [7]:
extracted_data

[Document(page_content='Recurrent neural networks, long short-term memory [13] and gated recurrent [7] neural networks\nin particular, have been firmly established as state of the art approaches in sequence modeling and\ntransduction problems such as language modeling and machine translation [ 35 , 2 , 5]. Numerous\nefforts have since continued to push the boundaries of recurrent language models and encoder-decoder\narchitectures [38, 24, 15].\nRecurrent models typically factor computation along the symbol positions of the input and output\nsequences. Aligning the positions to steps in computation time, they generate a sequence of hidden\nstates ht, as a function of the previous hidden state ht−1 and the input for position t. This inherently\nsequential nature precludes parallelization within training examples, which becomes critical at longer\nsequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsignificant improvements in computational eff

In [8]:
# split documents
def text_split(extracted_data):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 150)
    text_chunks = text_splitter.split_documents(extracted_data)
    return text_chunks

In [10]:
from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTextSplitter

In [11]:
text_chunks = text_split(extracted_data)
print("length of my chunk:", len(text_chunks))

length of my chunk: 3


In [12]:
from langchain_openai import OpenAIEmbeddings

In [13]:
# define embedding
def embedding_in_use():
    embeddings = OpenAIEmbeddings()
    return embeddings

In [15]:
embeddings = embedding_in_use()

In [16]:
embeddings

OpenAIEmbeddings(client=<openai.resources.embeddings.Embeddings object at 0x00000124FDB1BBE0>, async_client=<openai.resources.embeddings.AsyncEmbeddings object at 0x00000124FDB1FC70>, model='text-embedding-ada-002', dimensions=None, deployment='text-embedding-ada-002', openai_api_version='', openai_api_base=None, openai_api_type='', openai_proxy='', embedding_ctx_length=8191, openai_api_key=SecretStr('**********'), openai_organization=None, allowed_special=None, disallowed_special=None, chunk_size=1000, max_retries=2, request_timeout=None, headers=None, tiktoken_enabled=True, tiktoken_model_name=None, show_progress_bar=False, model_kwargs={}, skip_empty=False, default_headers=None, default_query=None, retry_min_seconds=4, retry_max_seconds=20, http_client=None, http_async_client=None, check_embedding_ctx_length=True)

In [17]:
from langchain.vectorstores import Chroma

In [18]:
vectordb = Chroma.from_documents(documents = text_chunks, embedding = embeddings, persist_directory = "./chroma")

In [19]:
query = "What is attention"

docs=vectordb.similarity_search(query, k=3)

print("Result", docs)

Result [Document(page_content='sequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsignificant improvements in computational efficiency through factorization tricks [ 21 ] and conditional\ncomputation [ 32 ], while also improving model performance in case of the latter. The fundamental\nconstraint of sequential computation, however, remains.\nAttention mechanisms have become an integral part of compelling sequence modeling and transduc-\ntion models in various tasks, allowing modeling of dependencies without regard to their distance in\nthe input or output sequences [ 2, 19 ]. In all but a few cases [ 27 ], however, such attention mechanisms\nare used in conjunction with a recurrent network.\nIn this work we propose the Transformer, a model architecture eschewing recurrence and instead\nrelying entirely on an attention mechanism to draw global dependencies between input and output.', metadata={'source': 'data\\attention.txt'}), Document(pag

In [20]:
prompt_template="""
Use the following pieces of information to answer the user's question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.

Context: {context}
Question: {question}

Only return the helpful answer below and nothing else.
Helpful answer:
"""

In [22]:
from langchain.prompts import PromptTemplate
PROMPT=PromptTemplate(template=prompt_template, input_variables=["context", "question"])

In [23]:
from langchain_openai import ChatOpenAI

In [24]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)

In [26]:
from langchain.chains import RetrievalQA
qa=RetrievalQA.from_chain_type(
    llm = llm,
    chain_type="stuff",
    retriever=vectordb.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT})

In [27]:
user_input = 'what is attention?'

In [29]:
result=qa({"query": user_input})

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


In [30]:
result

{'query': 'what is attention?',
 'result': 'Attention is a mechanism that allows models to focus on different parts of the input sequence when making predictions, without being limited by the distance between the parts. It helps the model draw global dependencies between input and output sequences.',
 'source_documents': [Document(page_content='sequence lengths, as memory constraints limit batching across examples. Recent work has achieved\nsignificant improvements in computational efficiency through factorization tricks [ 21 ] and conditional\ncomputation [ 32 ], while also improving model performance in case of the latter. The fundamental\nconstraint of sequential computation, however, remains.\nAttention mechanisms have become an integral part of compelling sequence modeling and transduc-\ntion models in various tasks, allowing modeling of dependencies without regard to their distance in\nthe input or output sequences [ 2, 19 ]. In all but a few cases [ 27 ], however, such attention

In [35]:
import shutil

In [36]:
print("count before", vectordb._collection.count())

count before 3


In [38]:
vectordb.delete_collection()

In [39]:
result=qa({"query": user_input})

InvalidCollectionException: Collection 18b9e0ea-cc45-4c17-9a46-ee54fa9cd79e does not exist.