In [10]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
from langchain.document_loaders import TextLoader

In [4]:
text = """Google opens up its AI language model PaLM to challenge OpenAI and GPT-3
Google is offering developers access to one of its most advanced AI 
language models: PaLM. The search giant is launching an API for PaLM alongside
a number of AI enterprise tools it says will help businesses “generate text, 
mages, code, videos, audio, and more from simple natural language prompts.”

PaLM is a large language model, or LLM, similar to the GPT series created by 
OpenAI or Meta’s LLaMA family of models. Google first announced PaLM in April
2022. Like other LLMs, PaLM is a flexible system that can potentially carry 
out all sorts of text generation and editing tasks. You could train PaLM to 
be a conversational chatbot like ChatGPT, for example, or you could use it 
for tasks like summarizing text or even writing code. (It’s similar to features 
Google also announced today for its Workspace apps like Google Docs and Gmail.)"""

with open("my.txt","w") as f:
    f.write(text)

In [5]:
loader = TextLoader("my.txt")
docs_from_file = loader.load()

print(len(docs_from_file))

1


In [8]:
from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20)

docs = text_splitter.split_documents(docs_from_file)

print((docs))

Created a chunk of size 374, which is longer than the specified 200


[Document(page_content='Google opens up its AI language model PaLM to challenge OpenAI and GPT-3\nGoogle is offering developers access to one of its most advanced AI \nlanguage models: PaLM. The search giant is launching an API for PaLM alongside\na number of AI enterprise tools it says will help businesses “generate text, \nmages, code, videos, audio, and more from simple natural language prompts.”', metadata={'source': 'my.txt'}), Document(page_content='PaLM is a large language model, or LLM, similar to the GPT series created by \nOpenAI or Meta’s LLaMA family of models. Google first announced PaLM in April\n2022. Like other LLMs, PaLM is a flexible system that can potentially carry \nout all sorts of text generation and editing tasks. You could train PaLM to \nbe a conversational chatbot like ChatGPT, for example, or you could use it \nfor tasks like summarizing text or even writing code. (It’s similar to features \nGoogle also announced today for its Workspace apps like Google Docs

In [15]:
# create vector embedding of each chunk
from langchain.embeddings import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-ada-002")

In [16]:
# need to store this in vector data stores.
from langchain.vectorstores import DeepLake

my_activeloop_ord_id = "charanvardhan"
my_activeloop_dataset_name = "indexesAndRetreivers"
dataset_path = f"hub://{my_activeloop_ord_id}/{my_activeloop_dataset_name}"
db = DeepLake(dataset_path=dataset_path, embedding_function=embeddings)

db.add_documents(docs)

Deep Lake Dataset in hub://charanvardhan/indexesAndRetreivers already exists, loading from the storage


Creating 2 embeddings in 1 batches of size 2:: 100%|██████████| 1/1 [00:06<00:00,  6.63s/it]

Dataset(path='hub://charanvardhan/indexesAndRetreivers', tensors=['embedding', 'id', 'metadata', 'text'])

  tensor      htype      shape     dtype  compression
  -------    -------    -------   -------  ------- 
 embedding  embedding  (2, 1536)  float32   None   
    id        text      (2, 1)      str     None   
 metadata     json      (2, 1)      str     None   
   text       text      (2, 1)      str     None   





['1395a684-13fe-11f0-a8e7-0e8d4396c6f5',
 '1395a742-13fe-11f0-a8e7-0e8d4396c6f5']

In [17]:
retriever = db.as_retriever()

In [18]:
from langchain.chains import RetrievalQA
from langchain.llms import OpenAI

#create a retrieval chain
qa_chain = RetrievalQA.from_chain_type(
    llm=OpenAI(model_name='gpt-3.5-turbo'),
    chain_type='stuff',
    retriever= retriever
)




In [19]:
query = "How Google plans to challenge OpenAI?"
response = qa_chain.run(query)
print(response)


Google plans to challenge OpenAI by offering developers access to its advanced AI language model PaLM, launching an API for PaLM, and providing AI enterprise tools for businesses to generate text, images, code, videos, audio, and more from natural language prompts.
