In [1]:
from langchain.document_loaders import UnstructuredURLLoader
from langchain.llms import OpenAI
from langchain.chat_models import ChatOpenAI
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
import requests
import tiktoken

In [2]:
## Read text data from github 

In [3]:
doc_url="https://raw.githubusercontent.com/DhunganaKB/customchat/main/survey-document.txt"
req = requests.get(doc_url)
doc1 = req.text
#print(req)

In [4]:
#OPENAI_API_KEY='Use your api key here'

In [5]:
custom_text_splitter = RecursiveCharacterTextSplitter(
    # Set custom chunk size
    chunk_size = 300,
    chunk_overlap  = 20,
    # Use length of the text as the size measure
    length_function = len,
)

# Create the chunks
chunks = custom_text_splitter.create_documents([doc1])

In [6]:
#creating db locally

In [7]:
persist_directory = 'db'
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectordb = Chroma.from_documents(documents=chunks,
                                 embedding=embedding,
                                 persist_directory=persist_directory)


In [8]:
def ask_with_memory(vectordb, question, chat_history=[]):
    from langchain.chains import ConversationalRetrievalChain
    from langchain.chat_models import ChatOpenAI
    
    llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
    retriever = vectordb.as_retriever(search_type='similarity', search_kwargs={'k': 3})
    
    crc = ConversationalRetrievalChain.from_llm(llm, retriever)
    result = crc({'question': question, 'chat_history': chat_history})
    chat_history.append((question, result['answer']))
    
    return result, chat_history

In [9]:
questions=["What is this program all about?",
"How can farmers benefit from this?",
"When will they will make the payment?",
"What do I need to do to join in?",
"Can I be part of this if I use organic farming?",
"I live in a different state, but my farm is in IA, can I still join?",
"Do I have to pay for shipping the product?",
"Do we need advance tools for this test?",
"Who will visit our fields to check the test?",
"Will this trial go beyond 2024?",
"How much mony I can make from this program?",
"Why this program?",
"how much a grower will get?"
]


In [10]:
chat_history = []
for question in questions:
    result, chat_history = ask_with_memory(vectordb, question, chat_history)
    print(f"Question: {question}")
    print(f"Answer: {result['answer']}")
    print("\n")


Question: What is this program all about?
Answer: This program is about introducing a new type of crop product to farmers in the upcoming year, 2024. The product is designed to yield higher crops, be environmentally friendly, have better survival rates during tough weather conditions, and come with a user-friendly protocol. Farmers who are interested in participating in the program need to meet certain requirements.


Question: How can farmers benefit from this?
Answer: The benefits for farmers in participating in this program include the ability to yield higher crops, better survival rates during tough weather conditions, and a user-friendly protocol. Additionally, participating farmers will have the opportunity to collaborate with Company XYZ and receive exclusive offers for significant discounts on various other products from the company.


Question: When will they will make the payment?
Answer: The payment will be divided into two parts. The initial 50% will be made upon agreement,

In [11]:
from langchain.schema.document import Document
from langchain.chains.question_answering import load_qa_chain


In [12]:
docnew=Document(page_content=doc1)
docnew = [docnew]


In [13]:
def loading_all_document(question):
    llm = ChatOpenAI(temperature=0, openai_api_key=OPENAI_API_KEY)
    chain = load_qa_chain(llm, chain_type="stuff")
    result=chain.run(input_documents=docnew, question=question)
    return result

In [14]:
for question in questions:
    result = loading_all_document(question)
    print(f"Question: {question}")
    print(f"Answer: {result}")
    print("\n")


Question: What is this program all about?
Answer: This program is about introducing a new type of crop product to farmers. The program involves a research trial that has shown a significant increase in crop yield. Participating farmers will receive compensation based on the amount of land they commit to the trial. The program is currently limited to a few states in the USA, but may be extended to additional states in the future. Farmers who participate will also have the opportunity to collaborate with the company and receive exclusive offers for discounts on other products. The program has certain requirements for participation, including prior trial experience and the use of cover crops.


Question: How can farmers benefit from this?
Answer: Farmers can benefit from this program in several ways:

1. Increased crop yield: The new crop product has shown a significant increase in crop yield during the research trial. By participating in the program, farmers have the opportunity to impro

## text augmentation

In [11]:
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")
len(encoding.encode(doc1))

498

In [12]:
llm = ChatOpenAI(model='gpt-4', temperature=1, openai_api_key=OPENAI_API_KEY)
template = '''
Disregard all prior instructions. As a seasoned agromonist, your command of English is exceptional.

Your task involves rephrasing the subsequent passage {text} in clear, straightforward English, doing so {number} times. 
Furthermore, enrich the paragraph using your profound agronomic knowledge, all while preserving its essence for farmers' enhanced program comprehension.

Employ a diverse array of grammatical structures to infuse variety into your revisions.
Incorporate a spectrum of vocabulary, ranging from the elementary to the intricate.
'''

prompt = PromptTemplate(input_variables=['text', 'number'],
                        template=template)
chain = LLMChain(llm=llm, prompt=prompt)

#output = chain.run({'text':'We are going to party tonight, but we are not sure about other atendees', 'number':'5'})
def combining_outputs(output):
    #return [x[3:] for x in output.split('\n')]
    return [x[3:] for x in output.split('\n') if 'Rewrite' not in x if x !='' if 'Review' not in x]

In [13]:
doc=chain.run({'text':doc1, 'number':'5'})

In [14]:
orgdoc=[x for x in doc1.split('\n') if x != '']

In [15]:
print(doc)

1. Synopsis of the Project: The esteemed company, XYZ, is on the precipice of showcasing to agrarian communities a novel and revolutionary agricultural product in the forthcoming year, 2024. Six years of dedicated research from 2016 to 2022 into this product has manifested stark improvements in crop production. Initially, the testing ground for this venture involves a select group of states, principally IA, IL, MO, IN, NE, geographically confined within the American borders. It's envisaged that post-2024, the company may spread its wings and incorporate more states into the ambit of this experiment. In order to ensure the smooth conduct of the trial phase, XYZ will be dispatching their in-house agronomists for constant monitoring and data collection from the fields throughout the agrarian calendar. Those farmers who throw their hat in the ring stand to profit substantially as per the area they pledge to the program, with the upper limit capped at 1000 acres. Compensation is bifurcated 

In [16]:
doc2=combining_outputs(doc)
#doc2
doc2 = doc2 + orgdoc
doc_final = " ".join(doc2)

In [17]:
print(len(encoding.encode(doc_final)))

1504


In [18]:
chunks_final = custom_text_splitter.create_documents([doc_final])

In [19]:
persist_directory1 = 'db_new'
embedding = OpenAIEmbeddings(openai_api_key=OPENAI_API_KEY)
vectordb1 = Chroma.from_documents(documents=chunks_final,
                                 embedding=embedding,
                                 persist_directory=persist_directory1)

In [20]:
chat_history = []
for question in questions:
    result, chat_history = ask_with_memory(vectordb1, question, chat_history)
    print(f"Question: {question}")
    print(f"Answer: {result['answer']}")
    print("\n")

Question: What is this program all about?
Answer: This program is about introducing a new type of crop product to farmers. The program aims to increase crop yield and has shown promising results in research trials conducted from 2016 to 2022. It is initially limited to the states of IA, IL, MO, IN, and NE in the USA, but there are plans for expansion after 2024. The product is designed to have better survival rates during tough weather conditions and comes with a user-friendly protocol. Farmers who want to participate in the program need to meet certain requirements, such as having prior trial experience and using cover crops in their fields.


Question: How can farmers benefit from this?
Answer: The benefits for farmers in participating in this program include:

1. Collaboration with Company XYZ: Participating farmers will have the opportunity to collaborate with Company XYZ, which can provide valuable insights and expertise in the agricultural industry.

2. Exclusive offers for signi