In [9]:
from langchain.document_loaders import TextLoader

loader = TextLoader("rag.txt")
data = loader.load()

from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)

In [10]:
from langchain.embeddings import GPT4AllEmbeddings
from langchain.vectorstores import Chroma

vectorstore = Chroma.from_documents(documents=all_splits, embedding=GPT4AllEmbeddings())

In [11]:
question = "sum of all entries"
docs = vectorstore.similarity_search(question)
len(docs)

4

In [12]:
docs[0]

Document(page_content='The date of metrics is 2020-11-01. The partner region is EMEA. The partner country is UK. The partner is EE. The brand name is Apple. The total visits is 279409. The phone model is None\nThe date of metrics is 2020-11-01. The partner region is EMEA. The partner country is UK. The partner is EE. The brand name is Oppo. The total visits is 25512. The phone model is None', metadata={'source': 'rag.txt'})

In [13]:
from langchain.llms import GPT4All

llm = GPT4All(
    model=r"C:\Users\Priyansh\PycharmProjects\ai-data-analytics\gpt4all-falcon-q4_0.gguf",
    max_tokens=2048)

In [14]:
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate

# Prompt
prompt = PromptTemplate.from_template(
    "What is the total Google visits in the last week visit for US? {docs}"
)

# Chain
llm_chain = LLMChain(llm=llm, prompt=prompt)

# Run
question = "What is the total Google visits in the last week visit for US?"
docs = vectorstore.similarity_search(question)
result = llm_chain(docs)

# Output
result["text"]

'\nThe total Google visits in the last week for US is 35,176.'

In [15]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=vectorstore.as_retriever(),
)


In [16]:
qa_chain({"query": "Total traffic"})

{'query': 'Total traffic',
 'result': ' The total traffic for the given date of metrics is 93763.'}