In [23]:
import os
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.chains.question_answering import load_qa_chain
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
import time

In [48]:
### your open_ai_api key

In [2]:
OPENAI_API_KEY='Your Valid OpenAI API Key'
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

## Load_QA_Chain

In [5]:
%%time
loader = TextLoader('chatbot_for_survey.txt', encoding='utf8')
data = loader.load()
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
question = "What are the requirements for this program?"
chain = load_qa_chain(llm, chain_type='stuff')
result = chain.run(input_documents=data, question=question)
# alternatively we can run this chain with the following command
#answer=chain({"input_documents": data, "question": question},return_only_outputs=True)
#print(answer['output_text'])
print(result)

The requirements for this program are as follows:

1. The participant must have conducted similar trials in the past.
2. The participant must have used cover crops in their field.
3. The participant must be willing to share some of their personal information.
4. The participant should not have been involved in exactly similar surveys with other companies in 2024.
CPU times: total: 15.6 ms
Wall time: 3.17 s


In [None]:
## document chain: https://python.langchain.com/docs/modules/chains/document/

In [24]:
#[print(x) for x in dir(chain) if x[0] != '_']

### VectorstoreIndexCreator

In [6]:
%%time
loader = TextLoader('chatbot_for_survey.txt', encoding='utf8')
index = VectorstoreIndexCreator().from_loaders([loader])
answer=index.query(question)
print(answer)

 To participate in this program, you must have conducted similar trials in the past, used cover crops in your field, be willing to share some of your personal information, and not have been involved in a similar survey with other companies in 2024.
CPU times: total: 1.5 s
Wall time: 3.1 s


In [11]:
### providing parameters explicitly, we can change them if we want to use different parameters

In [10]:
index = VectorstoreIndexCreator(
    # split the documents into chunks
    text_splitter=CharacterTextSplitter(chunk_size=1000, chunk_overlap=0),
    # select which embeddings we want to use
    embedding=OpenAIEmbeddings(),
    # use Chroma as the vectorestore to index and search embeddings
    vectorstore_cls=Chroma
).from_loaders([loader])
index.query(llm=llm, question=question, chain_type="stuff")
print(index.query(question))

Created a chunk of size 1260, which is longer than the specified 1000
Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3
Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


 To participate in this program, farmers must have conducted similar trials in the past, used cover crops in their field, be willing to share some of their personal information, and not have been involved in a similar survey with other companies in 2024.


## RetrivalQA

In [12]:
%%time
loader = TextLoader('chatbot_for_survey.txt', encoding='utf8')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0, length_function = len)
data = text_splitter.split_documents(data)
llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
vectorstore = Chroma.from_documents(documents=data, embedding=OpenAIEmbeddings())
# embedding_function = SentenceTransformerEmbeddings(model_name="all-MiniLM-L6-v2")
# retriever = Chroma.from_documents(docs, embedding_function)
retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 3})
qa_chain = RetrievalQA.from_chain_type(llm,retriever=retriever, chain_type='stuff')
response=qa_chain({"query": question})
result = response['result']
print(result)

The requirements for this program are:
1. You must be willing to participate.
2. You must have a farm from specific locations.
3. You must fill up the form immediately upon enrolling in the program.
CPU times: total: 172 ms
Wall time: 3.82 s


In [14]:
embeddings = OpenAIEmbeddings()
text = "This is a test document."
# this is default one
print(embeddings.model)
# change the default model, we can choose different embedding models
embeddings = OpenAIEmbeddings(model='text-embedding-davinci-001')
print(embeddings.model)



text-embedding-ada-002
text-embedding-davinci-001


In [15]:
## Checking the similarity search and finding relevent documents based on the provided question, here the default value of k  is 4. 
retriever = vectorstore.as_retriever(search_type='similarity', search_kwargs={'k': 6})
relevant_docs = retriever.get_relevant_documents(question)
print(relevant_docs)

[Document(page_content='to participate in this program, you will have to fullfill the following commitments:', metadata={'source': 'chatbot_for_survey.txt'}), Document(page_content='are willing to participate in this program have to have farm from these locations. By participating', metadata={'source': 'chatbot_for_survey.txt'}), Document(page_content='marketing compain, you will be have fill up the form immeidatly you enrolled in this program.', metadata={'source': 'chatbot_for_survey.txt'}), Document(page_content='If your response is later than 09/30/2023, you will not be elegible to participate in this program.', metadata={'source': 'chatbot_for_survey.txt'}), Document(page_content='About this program: XYZ Survey Program for Lunching a New Corn Hybrid - X1276uyz-P', metadata={'source': 'chatbot_for_survey.txt'}), Document(page_content='3. they must be willing to share their some of their personal infomation', metadata={'source': 'chatbot_for_survey.txt'})]


In [16]:
# also we can use one liner 
vectorstore.similarity_search(question, k=6)

[Document(page_content='to participate in this program, you will have to fullfill the following commitments:', metadata={'source': 'chatbot_for_survey.txt'}),
 Document(page_content='are willing to participate in this program have to have farm from these locations. By participating', metadata={'source': 'chatbot_for_survey.txt'}),
 Document(page_content='marketing compain, you will be have fill up the form immeidatly you enrolled in this program.', metadata={'source': 'chatbot_for_survey.txt'}),
 Document(page_content='If your response is later than 09/30/2023, you will not be elegible to participate in this program.', metadata={'source': 'chatbot_for_survey.txt'}),
 Document(page_content='About this program: XYZ Survey Program for Lunching a New Corn Hybrid - X1276uyz-P', metadata={'source': 'chatbot_for_survey.txt'}),
 Document(page_content='3. they must be willing to share their some of their personal infomation', metadata={'source': 'chatbot_for_survey.txt'})]

In [26]:
#[print(x) for x in dir(qa_chain) if x[0] != '_']

## Memory Based Conversation

In [18]:
loader = TextLoader('chatbot_for_survey.txt', encoding='utf8')
data = loader.load()
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# documents = text_splitter.split_documents(data)
# you encounter the following message: Created a chunk of size 1260, which is longer than the specified 1000

text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0, length_function = len,)
documents = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)

In [123]:
#[x for x in dir(vectorstore) if x[0] != '_']

In [24]:
%%time
loader = TextLoader('chatbot_for_survey.txt', encoding='utf8')
data = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=250, chunk_overlap=0, length_function = len,)
documents = text_splitter.split_documents(data)
embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(documents, embeddings)
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
#memory = ConversationBufferMemory(memory_key="chat_history")
qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)
result=qa({"question":question})
print(result)
print(result['answer'])

{'question': 'What are the requirements for this program?', 'chat_history': [HumanMessage(content='What are the requirements for this program?', additional_kwargs={}, example=False), AIMessage(content='The requirements for this program are as follows:\n1. The participant must have conducted similar trials in the past.\n2. The participant must have used cover crops in their field.\n3. The participant must have a farm located in specific locations.', additional_kwargs={}, example=False)], 'answer': 'The requirements for this program are as follows:\n1. The participant must have conducted similar trials in the past.\n2. The participant must have used cover crops in their field.\n3. The participant must have a farm located in specific locations.'}
The requirements for this program are as follows:
1. The participant must have conducted similar trials in the past.
2. The participant must have used cover crops in their field.
3. The participant must have a farm located in specific locations.


In [25]:
result['chat_history']

[HumanMessage(content='What are the requirements for this program?', additional_kwargs={}, example=False),
 AIMessage(content='The requirements for this program are as follows:\n1. The participant must have conducted similar trials in the past.\n2. The participant must have used cover crops in their field.\n3. The participant must have a farm located in specific locations.', additional_kwargs={}, example=False)]

## Pass in chat history


In [26]:
# Above we use memory object to track chat history. We can track the chat history without using memory object as follow

In [27]:
def ask_with_memory(question, chat_history=[], k=3):
    # initiating chain with out memory object here
    qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever()) # make sure vectorstore and llm are defined aboved 
    result = qa({'question': question, 'chat_history': chat_history})
    chat_history.append((question, result['answer']))
    chat_history=chat_history[-k:]
    return result, chat_history

In [28]:
questions=["What is this program all about?",
"How can farmers benefit from this?",
"When will they will make the payment?",
"What do I need to do to join in?",
"Can I be part of this if I use organic farming?"]

chat_history = []
for question in questions:
    result, chat_history = ask_with_memory(question, chat_history)
    print(result['answer'])
    print(len(chat_history))
    print('\n')


This program is about launching a new corn hybrid, specifically the X1276uyz-P. It is a survey program that involves marketing companies and all associated costs will be included. Participants in this program will need to fill out a form immediately upon enrollment. The program requires participants to fulfill certain commitments, such as having conducted similar trials in the past and using cover crops in their fields. Participants will receive compensation based on the area (acres) they use for the trial, with a maximum limit of 1000 acres.
1


The benefits for farmers participating in this program include receiving a premium offering for the first two years of using the product, as well as compensation based on the number of acres they use for the trial. The compensation rate is $10 per acre, with a maximum compensation of $10,000.
2


The payment will be made in two installments. The first installment of $50 will be provided upon agreement, and the remaining amount will be provided

In [29]:
# qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever()) # initialize the chain without memory object
# chat_history=[]
# result = qa({'question': question, 'chat_history': chat_history})
# chat_history.append((question, result['answer']))
    

In [30]:
## Truncate the chat history

In [31]:
question="where to contact?"
result = qa({'question': question, 'chat_history': chat_history})
if len(chat_history) >= 3:
    chat_history.append((question, result['answer']))
    chat_history=chat_history[-3:]

In [32]:
len(chat_history)

3

In [33]:
chat_history

[('What do I need to do to join in?',
  'To join this program, you will need to follow these steps:\n\n1. Fill up the enrollment form immediately after you decide to participate.\n2. Make sure you have a farm located in one of the specified locations.\n3. Meet the commitments required for participation, which include:\n   a. Having conducted similar trials in the past.\n   b. Having used cover crops in your field.\n\nOnce you have completed these steps, you will be eligible to join the XYZ Survey Program for Launching a New Corn Hybrid - X1276uyz-P.'),
 ('Can I be part of this if I use organic farming?',
  'Based on the given context, there is no specific mention of organic farming as a requirement or restriction for participation in the program. Therefore, it is unclear whether you can participate in the program if you practice organic farming.'),
 ('where to contact?',
  'You can reach out to us at help@xyz.com for any additional questions or inquiries. You can also visit our website

#### Using ConversationBufferWindowMemory

In [42]:
from langchain.memory import ConversationBufferWindowMemory

llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
memory=ConversationBufferWindowMemory( k=3, memory_key="chat_history", return_messages=True)
#memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
#memory = ConversationBufferMemory(memory_key="chat_history")

qa = ConversationalRetrievalChain.from_llm(llm, vectorstore.as_retriever(), memory=memory)
result=qa({"question":question})
print(result)
#print(result['answer'])

{'question': 'where to contact?', 'chat_history': [], 'answer': 'You can reach out to us at help@xyz.com for any additional questions or inquiries. Additionally, you can visit our website at https://xyz.com for more information about our program.'}


In [43]:
result=qa({"question":'how the payment happens to this program?'})
result['chat_history']


[HumanMessage(content='where to contact?', additional_kwargs={}, example=False),
 AIMessage(content='You can reach out to us at help@xyz.com for any additional questions or inquiries. Additionally, you can visit our website at https://xyz.com for more information about our program.', additional_kwargs={}, example=False)]

In [44]:
result=qa({"question":'who can participate in this program'})
result['chat_history']

[HumanMessage(content='where to contact?', additional_kwargs={}, example=False),
 AIMessage(content='You can reach out to us at help@xyz.com for any additional questions or inquiries. Additionally, you can visit our website at https://xyz.com for more information about our program.', additional_kwargs={}, example=False),
 HumanMessage(content='how the payment happens to this program?', additional_kwargs={}, example=False),
 AIMessage(content='The payment for this program will be provided in installments. The first installment of $50 will be provided upon agreement, and the remaining amount will be provided after the harvest is completed.', additional_kwargs={}, example=False)]

In [45]:
result=qa({"question":'my farm is in IA, can I participate on this program?'})
result['chat_history']

[HumanMessage(content='where to contact?', additional_kwargs={}, example=False),
 AIMessage(content='You can reach out to us at help@xyz.com for any additional questions or inquiries. Additionally, you can visit our website at https://xyz.com for more information about our program.', additional_kwargs={}, example=False),
 HumanMessage(content='how the payment happens to this program?', additional_kwargs={}, example=False),
 AIMessage(content='The payment for this program will be provided in installments. The first installment of $50 will be provided upon agreement, and the remaining amount will be provided after the harvest is completed.', additional_kwargs={}, example=False),
 HumanMessage(content='who can participate in this program', additional_kwargs={}, example=False),
 AIMessage(content='To be eligible to participate in this program, individuals must have a farm located in specific locations. Additionally, they must have conducted similar trials in the past and have used cover cr

In [46]:
result=qa({"question":'where to start?'})
result['chat_history']


[HumanMessage(content='how the payment happens to this program?', additional_kwargs={}, example=False),
 AIMessage(content='The payment for this program will be provided in installments. The first installment of $50 will be provided upon agreement, and the remaining amount will be provided after the harvest is completed.', additional_kwargs={}, example=False),
 HumanMessage(content='who can participate in this program', additional_kwargs={}, example=False),
 AIMessage(content='To be eligible to participate in this program, individuals must have a farm located in specific locations. Additionally, they must have conducted similar trials in the past and have used cover crops in their fields.', additional_kwargs={}, example=False),
 HumanMessage(content='my farm is in IA, can I participate on this program?', additional_kwargs={}, example=False),
 AIMessage(content='Yes, you can participate in this program if your farm is located in Iowa.', additional_kwargs={}, example=False)]

In [47]:
## we see above only three conversations are stored in memory and the old one is eliminated

### Useful links

In [None]:
# splitter: https://python.langchain.com/docs/modules/data_connection/document_transformers/
# article: https://dev.to/eteimz/understanding-langchains-recursivecharactertextsplitter-2846
# chroma embedding: https://python.langchain.com/docs/integrations/vectorstores/chroma
# open ai embedding: https://platform.openai.com/docs/guides/embeddings/what-are-embeddings
# example embedding: https://python.langchain.com/docs/integrations/text_embedding/openai
# Langchain tools: https://integrations.langchain.com/