In [1]:
from langchain.document_loaders import TextLoader

text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3
Google is offering developers access to one of its most advanced AI language models: PaLM.
The search giant is launching an API for PaLM alongside a number of AI enterprise tools
it says will help businesses “generate text, images, code, videos, audio, and more from
simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by OpenAI or
Meta’s LLaMA family of models. Google first announced PaLM in April 2022. Like other LLMs,
PaLM is a flexible system that can potentially carry out all sorts of text generation and
editing tasks. You could train PaLM to be a conversational chatbot like ChatGPT, for
example, or you could use it for tasks like summarizing text or even writing code.
(It’s similar to features Google also announced today for its Workspace apps like Google
Docs and Gmail.)
"""

with open("my_file.txt", "w") as file:
    file.write(text)

loader = TextLoader("my_file.txt")
docs_from_file = loader.load()

print(len(docs_from_file))



1


In [2]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size = 200, chunk_overlap = 20)

docs = text_splitter.split_documents(docs_from_file)

print(len(docs))

Created a chunk of size 373, which is longer than the specified 200


2


In [4]:
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

from langchain.vectorstores import DeepLake

my_activeloop_org_id = "veaceslavcalestru"
my_activeloop_dataset_name = "langchain_course_indexers_retrievers1"
dataset_path = f"hub://{my_activeloop_org_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

db.add_documents(docs)

Your Deep Lake dataset has been successfully created!
The dataset is private so make sure you are logged in!


/

Dataset(path='hub://veaceslavcalestru/langchain_course_indexers_retrievers1', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
   text       text      (2, 1)      str     None   


 

['797bb006-6dd7-11ee-b8f9-cc4740c98b6b',
 '797bb007-6dd7-11ee-9aea-cc4740c98b6b']

In [5]:
retriever = db.as_retriever()

from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(model='text-davinci-003'),
                                       chain_type="stuff",
                                       retriever=retriever)

query = "How Google plans to challenge OpenAI?"
response = qa_chain.run(query)

print(response)

 Google is offering developers access to one of its most advanced AI language models, PaLM, with an API and AI enterprise tools. PaLM is a large language model, similar to the GPT series created by OpenAI. Google hopes that PaLM will be able to perform tasks like text generation, editing, summarizing text, and even writing code, which would challenge OpenAI's GPT series.


DocumentCompressor and LLMChainExtractor

In [7]:
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import LLMChainExtractor

llm = OpenAI(model='text-davinci-003', temperature=0)

compressor = LLMChainExtractor.from_llm(llm)

compression_retriever = ContextualCompressionRetriever(
    base_compressor=compressor,
    base_retriever=retriever
)

retrieved_docs = compression_retriever.get_relevant_documents("How Google plans to challenge OpenAI?")

print(retrieved_docs)

# qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(model='text-davinci-003'),
#                                        chain_type="stuff",
#                                        retriever=compression_retriever)

# query = "How Google plans to challenge OpenAI?"
# response = qa_chain.run(query)

# print(response)

[Document(page_content='Google is offering developers access to one of its most advanced AI language models: PaLM. The search giant is launching an API for PaLM alongside a number of AI enterprise tools it says will help businesses “generate text, images, code, videos, audio, and more from simple natural language prompts.”', metadata={'source': 'my_file.txt'}), Document(page_content='Google first announced PaLM in April 2022. Like other LLMs, PaLM is a flexible system that can potentially carry out all sorts of text generation and editing tasks.', metadata={'source': 'my_file.txt'})]
