In [None]:
!pip install langchain
!pip install chromadb

In [10]:
from langchain import PromptTemplate, LLMChain
from langchain.chains.qa_with_sources import load_qa_with_sources_chain
from langchain.vectorstores import Chroma
from langchain.text_splitter import CharacterTextSplitter
from langchain.llms import OpenAI
from langchain.embeddings.openai import OpenAIEmbeddings
from config import openaiapikey as key

In [11]:
import os
os.environ["OPENAI_API_KEY"] = key

In [12]:
template = """ You are going to be my assistant.
Please try to give me the most beneficial answers to my
question with reasoning for why they are correct.

 Question: {input} Answer: """
prompt = PromptTemplate(template=template, input_variables=["input"])

In [13]:
chain = LLMChain(prompt=prompt, llm=OpenAI(temperature=0.8))

In [14]:
embeddings = OpenAIEmbeddings()

In [15]:
file = open("./sample_blog.txt", "r")
text = file.read()

text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
texts = text_splitter.split_text(text)


In [16]:
# pass the text and embeddings to ChromaDB
db = Chroma.from_texts(texts, embeddings,metadatas=[{"source": str(i)} for i in range(len(texts))])

Using embedded DuckDB without persistence: data will be transient


In [17]:
my_chain = load_qa_with_sources_chain(OpenAI(temperature=0.8), chain_type="refine", verbose=True)
query = "All places in Kochi"
documents = db.similarity_search(query)
que = "Make an itinerary with all the places mentioned along with their descriptions in one or two sentences."
result = my_chain({"input_documents": documents, "question": que})
result



[1m> Entering new StuffDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mGiven the following extracted parts of a long document and a question, create a final answer with references ("SOURCES"). 
If you don't know the answer, just say that you don't know. Don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.

QUESTION: Which state/country's law governs the interpretation of the contract?
Content: This Agreement is governed by English law and the parties submit to the exclusive jurisdiction of the English courts in  relation to any dispute (contractual or non-contractual) concerning this Agreement save that either party may apply to any court for an  injunction or other relief to protect its Intellectual Property Rights.
Source: 28-pl
Content: No Waiver. Failure or delay in exercising any right or remedy under this Agreement shall not constitute a waiver of such (or any other)  right or remedy.

11.7 

{'input_documents': [Document(page_content='By the end of the day, enjoy watching or participate in kolam making at the Brahmin Hindu Agraharam. Kolam is a colorful South Indian drawing made using rice flour, rock powder and color powder often drawn on the floor of Hindu homes by female members. Ride back to the ship with memories and beautiful photographs of this rich day. Book now to savor the rich culture of Kochi.\n\nThe tourist places in Kochi are absolutely captivating and are sure to leave you with a bag full of beautiful memories and aspirations. For those who wish to explore the real treasures of Kerala, a visit to Kochi tourist places is a must.', metadata={'source': '11'}),
  Document(page_content='Bolghatty Island is a short boat ride away from mainland Kochi and has Bolghatty Palace which is one of the oldest existing Dutch palaces outside Holland. Bolghatty Palace is now converted into a hotel and there is even a small golf course, a swimming pool, 9-hole golf course, ayu

In [18]:
print(result['output_text'])

 Itinerary: 
1. Fort Kochi: Chinese fishing nets, St Francis Church, and Vasco House;
2. Mattancherry: Dutch Palace and Jewish Synagogue;
3. Brahmin Hindu Agraharam: Kolam making;
4. Malayattoor: Malayatoor Church;
5. Lulu Shopping Mall: 300+ food outlets, stores and restaurants;
6. Bolghatty Island: Bolghatty Palace, 9-hole golf course, swimming pool, ayurvedic center and Kathakali performances;
7. Cherai & Vypeen Beach: Pallipuram Fort.
SOURCES: 0-pl, 2-pl, 3-pl, 11-pl
