In [1]:
!pip -q install langchain openai tiktoken chromadb

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m11.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m221.4/221.4 kB[0m [31m13.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m22.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m502.4/502.4 kB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m177.8/177.8 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.2/48.2 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.0/75.0 kB[0m [31m10.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m32.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [2]:
!pip show langchain

Name: langchain
Version: 0.0.345
Summary: Building applications with LLMs through composability
Home-page: https://github.com/langchain-ai/langchain
Author: 
Author-email: 
License: MIT
Location: /usr/local/lib/python3.10/dist-packages
Requires: aiohttp, anyio, async-timeout, dataclasses-json, jsonpatch, langchain-core, langsmith, numpy, pydantic, PyYAML, requests, SQLAlchemy, tenacity
Required-by: 


In [10]:
import os

os.environ["OPENAI_API_KEY"] = 'API_KEY'

In [4]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import TextLoader
from langchain.document_loaders import DirectoryLoader


## Load  documents

In [5]:
# Load and process the text files
# loader = TextLoader('single_text_file.txt')
# Document Loader
from langchain.document_loaders import TextLoader
loader = TextLoader('./docmt.txt')
documents = loader.load()

In [6]:
#splitting the text into
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)

In [7]:
len(texts)

6

In [8]:
texts[3]

Document(page_content='A person that gets education will be more open to the avenues for life of his choice. An educated person will be a better citizen and an able decision-maker. This is the reason why people always prefer an educated or more educated person over an uneducated or less educated person for employment purposes even to do a job which does not require much of education, like a office attendant or a domestic help. Literacy rate of India is 61% as contrast to 82% literacy rate of world. Female literacy rate is 54.16% as per the 2001 population census. These figures are not only embarrassing but also alarming.\nSome people cannot attain education because of their socio-economic problems and some other are devoid of education due to lack of resources, but some other simply avoid taking education due to lack of awareness about importance of education.', metadata={'source': './docmt.txt'})

## create the DB

In [11]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [12]:
# persiste the db to disk
vectordb.persist()
vectordb = None

In [13]:
# Now we can load the persisted database from disk, and use it as normal.
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding)

## Make a retriever

In [14]:
retriever = vectordb.as_retriever()

In [15]:
docs = retriever.get_relevant_documents("What is need of education?")

In [16]:
len(docs)

4

In [17]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2})

In [18]:
retriever.search_type

'similarity'

In [19]:
retriever.search_kwargs

{'k': 2}

## Make a chain

In [20]:
# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(),
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [21]:
## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [22]:
# full example
query = "What is need of education?"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 Education is necessary to help people acquire knowledge, form character, strengthen the mind, increase understanding, make informed decisions, and utilize potential to its fullest. Education dispels ignorance and makes a nation strong.


Sources:
./docmt.txt
./docmt.txt


In [None]:
query = "What are the literacy rates in India and how do they compare to the global literacy rate?"
llm_response = qa_chain(query)
# process_llm_response(llm_response)
llm_response

{'query': 'What are the literacy rates in India and how do they compare to the global literacy rate?',
 'result': ' The literacy rate of India is 61% as contrast to 82% literacy rate of the world. Female literacy rate is 54.16% as per the 2001 population census.',
 'source_documents': [Document(page_content='A person that gets education will be more open to the avenues for life of his choice. An educated person will be a better citizen and an able decision-maker. This is the reason why people always prefer an educated or more educated person over an uneducated or less educated person for employment purposes even to do a job which does not require much of education, like a office attendant or a domestic help. Literacy rate of India is 61% as contrast to 82% literacy rate of world. Female literacy rate is 54.16% as per the 2001 population census. These figures are not only embarrassing but also alarming.\nSome people cannot attain education because of their socio-economic problems and so

In [25]:
query = "why education is important"
llm_response = qa_chain(query)
# process_llm_response(llm_response)
llm_response

{'query': 'why education is important',
 'result': " Education is important because it encompasses both the teaching and learning of knowledge, proper conduct and technical competency. It increases the strength of mind and helps people become more independent and empowered. Education also sharpens and enhances people's understanding and learning abilities, and it is key to a healthy mind and a successful life.",
 'source_documents': [Document(page_content='The Importance Of Education \n“Education makes people easy to lead, but difficult to drive; easy to govern but impossible to enslave.”\nThe above quote aptly testifies to the importance of education. Education encompasses both the teaching and learning of knowledge, proper conduct and technical competency. Learning includes the moral values and improvement of character and methods to increase the strength of mind.', metadata={'source': './docmt.txt'}),
  Document(page_content='A well educated nation makes a great nation. We can adopt

In [27]:
query = "how education makes a great nation"
llm_response = qa_chain(query)
# process_llm_response(llm_response)
llm_response

{'query': 'how education makes a great nation',
 'result': ' Education helps to form character, strengthens the mind, increases knowledge and helps to make people independent. It dispels ignorance, helps people to use their potentials to their fullest, and helps people to become better thinkers and decision makers. Education also helps people to become familiar with the world around them and with history, which helps them to make better judgments about the present. All of these things combine to help make a great nation.',
 'source_documents': [Document(page_content='A well educated nation makes a great nation. We can adopt the motto of ‘Each one teach one” in our lives. We can teach uneducated people around us, because even informal education is any day better than no education. Let us take the uneducated towards the light of education and glow the glory of our nation.\nLet’s make our country more powerful by power of knowledge, i.e. education, to quote Francis Bacon: “Knowledge is po