In [1]:
from langchain.vectorstores import Chroma
from langchain.embeddings import OpenAIEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.llms import OpenAI
from langchain.chains import RetrievalQA
from langchain.document_loaders import WikipediaLoader

import os
os.environ["OPENAI_API_KEY"] = ""

# wikipedia loader

In [7]:

loader = WikipediaLoader(query="Metal_Gear_Solid_3:_Snake_Eater", load_max_docs=100)

documents = loader.load()

# url loader

In [3]:
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
url = 'https://js.langchain.com/docs/modules/memory/examples/'
loader=RecursiveUrlLoader(url=url)
documents = loader.load()

In [4]:
len(docs)

documents[0].page_content[:50]

'\n\n\n\n\nEntity Memory | 🦜️🔗 Langchain\n\n\n\n\n\nSkip to ma'

Now, let's try a more extensive example, the docs root dir.

We will skip everything under api.

In [7]:
url = 'https://js.langchain.com/docs/'
exclude_dirs=['https://js.langchain.com/docs/api/']
loader=RecursiveUrlLoader(url=url,exclude_dirs=exclude_dirs)
documents = loader.load()


In [8]:
len(documents)

179

docs[0].page_content[:50]

docs[0].metadata

In [24]:

#splitting the text into
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
texts = text_splitter.split_documents(documents)


In [25]:

"""## create the DB"""

# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'

## here we are using OpenAI embeddings but in future we will swap out to local embeddings
embedding = OpenAIEmbeddings()

vectordb = Chroma.from_documents(documents=texts,
                                 embedding=embedding,
                                 persist_directory=persist_directory)

# persiste the db to disk
vectordb.persist()
vectordb = None

# Now we can load the persisted database from disk, and use it as normal.
vectordb = Chroma(persist_directory=persist_directory,
                  embedding_function=embedding)

In [33]:
"""## Make a retriever"""

retriever = vectordb.as_retriever()

docs = retriever.get_relevant_documents("The plot of metal gear solid 3")

len(docs)

retriever = vectordb.as_retriever(search_kwargs={"k": 5})

retriever.search_type

retriever.search_kwargs

{'k': 5}

In [34]:
"""## Make a chain"""

# create the chain to answer questions
qa_chain = RetrievalQA.from_chain_type(llm=OpenAI(),
                                  chain_type="stuff",
                                  retriever=retriever,
                                  return_source_documents=True)

## Cite sources
def process_llm_response(llm_response):
    print(llm_response['result'])
    print('\n\nSources:')
    for source in llm_response["source_documents"]:
        print(source.metadata['source'])

In [35]:
# full example
query = "What is the Shagohod??"
llm_response = qa_chain(query)
process_llm_response(llm_response)

 The Shagohod is a nuclear-powered tank designed by the Soviet Union in the game Metal Gear Solid 3.


Sources:
https://en.wikipedia.org/wiki/List_of_Metal_Gear_characters
https://en.wikipedia.org/wiki/Metal_Gear_Solid:_The_Twin_Snakes
https://en.wikipedia.org/wiki/The_Boss_(Metal_Gear)
https://en.wikipedia.org/wiki/Metal_Gear_Solid_Delta:_Snake_Eater
https://en.wikipedia.org/wiki/Metal_Gear_Solid:_The_Twin_Snakes


In [36]:
# break it down
query = "Who is The Boss?"
llm_response = qa_chain(query)
# process_llm_response(llm_response)
llm_response

{'query': 'Who is The Boss?',
 'result': " The Boss is a fictional character from Konami's Metal Gear series who made her first appearance in Metal Gear Solid 3: Snake Eater. She is a legendary American soldier, founder and leader of the Cobra Unit, the biological mother of Ocelot, mentor and mother figure to Naked Snake, and is known as the mother of the U.S. special forces.",
 'source_documents': [Document(lc_kwargs={'page_content': "The Boss (ザ・ボス, Za Bosu), also known as The Joy (ザ・ジョイ, Za Joi), is a fictional character from Konami's Metal Gear series who made her first appearance in Metal Gear Solid 3: Snake Eater.", 'metadata': {'title': 'The Boss (Metal Gear)', 'summary': "The Boss (ザ・ボス, Za Bosu), also known as The Joy (ザ・ジョイ, Za Joi), is a fictional character from Konami's Metal Gear series who made her first appearance in Metal Gear Solid 3: Snake Eater.\n\n", 'source': 'https://en.wikipedia.org/wiki/The_Boss_(Metal_Gear)'}}, page_content="The Boss (ザ・ボス, Za Bosu), also known

In [5]:
# To cleanup, you can delete the collection
vectordb.delete_collection()
vectordb.persist()

NameError: name 'vectordb' is not defined