**STEP 1 - DOWNLOADING THE REQ PACKAGES AND LIB**

In [None]:
!pip install langchain-classic
!pip install chromadb openai tiktoken

Collecting langchain-classic
  Downloading langchain_classic-1.0.0-py3-none-any.whl.metadata (3.9 kB)
Collecting langchain-core<2.0.0,>=1.0.0 (from langchain-classic)
  Downloading langchain_core-1.0.1-py3-none-any.whl.metadata (3.5 kB)
Collecting langchain-text-splitters<2.0.0,>=1.0.0 (from langchain-classic)
  Downloading langchain_text_splitters-1.0.0-py3-none-any.whl.metadata (2.6 kB)
Downloading langchain_classic-1.0.0-py3-none-any.whl (1.0 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_core-1.0.1-py3-none-any.whl (467 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m467.1/467.1 kB[0m [31m38.7 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_text_splitters-1.0.0-py3-none-any.whl (33 kB)
Installing collected packages: langchain-core, langchain-text-splitters, langchain-classic
  Attempting uninstall: langchain-core
    Found existing installation: langch

In [None]:
# the req lib
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.llms import OpenAI
from langchain.document_loaders import DirectoryLoader
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains import RetrievalQA

In [None]:
# use if u want to use a dropbox doc for the input data else upload mannualy
!wget -q = "link of dropbox"
!unzip -q "path of the folder" -d "path of the folder but wihout the zip command"

**STEP 2 - SETTING UP THE OPEN AI KEY**

In [None]:
import os
os.environ['OPENAI_API_KEY'] = ""

**STEP 3 - LOADING THE DATA**

In [None]:
loader = DirectoryLoader("/content/new_articles/", glob = "./*.txt", loader_cls= TextLoader)
document = loader.load()

**STEP 4 - CHUNKING**

In [None]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)
text = text_splitter.split_documents(document)

**STEP 5 - CREATING THE DB**

In [None]:
from langchain import embeddings
persist_directory = 'db'
embedding = OpenAIEmbeddings()
vectordb = Chroma.from_documents(documents=text,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

In [None]:
# saving the db to disk and loading it from the disc for usage
vectordb.persist()
vectordb = None
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding)

**STEP 6 - MAKING THE RETRIVER FUNC**

In [None]:
retriever = vectordb.as_retriever(search_kwargs={"k": 2}) # TO SET THE NUM OF OUTPUTS can also include the search type ie similarity search

**STEP 7 - MAKING QA CHAIN AS PER LANG CLASSIC**

In [None]:
llm=OpenAI()
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(),
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

In [None]:
# printing the answer along with its reference sources.
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

**STEP 8 - GENERATING THE ANS**

In [None]:
query = "How much money did Microsoft raise?"
llm_response = qa_chain(query)
process_llm_response(llm_response)