In [11]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader
from langchain.vectorstores import Chroma,FAISS

In [3]:
loader = DirectoryLoader('.', glob="./*.txt", loader_cls=TextLoader)

documents = loader.load()

In [5]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [6]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(documents=texts, 
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [7]:
# persiste the db to disk
vectordb.persist()
vectordb = None

In [8]:
vectordb = Chroma(persist_directory=persist_directory, 
                  embedding_function=embedding)

In [9]:
retriever = vectordb.as_retriever()

In [30]:
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [46]:
# create the chain to answer questions 
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(), 
                                  chain_type="stuff", 
                                  retriever=retriever, 
                                  return_source_documents=True)

In [26]:
# import os
# if os.path.exists("exl_jd.pkl"):
#     with open("exl_jd.pkl", 'rb') as f:
#         vectordb = pickle.load(f) 
#         print("Reading from disk")
        
# else:
#     #vectordb = Chroma.from_documents(texts,embedding=embedding)    
#     vectordb=FAISS.from_documents(texts,embedding=embedding)    
#     with open("exl_jd.pkl",'wb') as f:
#         pickle.dump(vectordb,f)
#         print("computing embedinegs")

In [34]:
query = "verizon or att offering the low price?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 AT&T is offering a low price with eligible trade-in.


Sources:
att.txt
att.txt
att.txt
verizon.txt


In [47]:
query = "verizon or att offering the best trade in policy?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 AT&T is offering a trade in and save $1000 policy for new lines, and a trade in and save $830 policy for upgrades. Verizon does not appear to be offering any trade in policies.


Sources:
att.txt
att.txt
verizon.txt
verizon.txt


In [53]:
query = "what is the product availabelity to deliver the product and which seller"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 You can get the product with free 2 day shipping with a new line, free Express Pickup, or select a store. The product is sold by AT&T.


Sources:
att.txt
att.txt
verizon.txt
att.txt


In [None]:
query = "what is the product availabelity to deliver the product from the verizon"
llm_response = qa_chain(query)
process_llm_response(llm_response)