In [6]:
import os
from langchain.document_loaders import ReadTheDocsLoader, GitbookLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings
from langchain.vectorstores import FAISS
from config import OPENAI_API_KEY
import subprocess

os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [7]:
loader = GitbookLoader('https://docs.uniswap.org/', load_all_paths=True)


In [8]:
raw_documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 750,chunk_overlap  = 125,length_function = len)
documents = text_splitter.split_documents(raw_documents)

Fetching text from https://docs.uniswap.org/blog
Fetching text from https://docs.uniswap.org/blog/archive
Fetching text from https://docs.uniswap.org/blog/intro-to-gas-optimization
Fetching text from https://docs.uniswap.org/search
Fetching text from https://docs.uniswap.org/
Fetching text from https://docs.uniswap.org/api/subgraph/guides/examples
Fetching text from https://docs.uniswap.org/api/subgraph/overview
Fetching text from https://docs.uniswap.org/concepts/glossary
Fetching text from https://docs.uniswap.org/concepts/governance/adversarial-circumstances
Fetching text from https://docs.uniswap.org/concepts/governance/changelog
Fetching text from https://docs.uniswap.org/concepts/governance/glossary
Fetching text from https://docs.uniswap.org/concepts/governance/guide-to-voting
Fetching text from https://docs.uniswap.org/concepts/governance/overview
Fetching text from https://docs.uniswap.org/concepts/governance/process
Fetching text from https://docs.uniswap.org/concepts/overvie

In [9]:
##remove V1 and V2 docs
len1 = len(documents)
source = []
for i in documents:
    if '/v2' in i.metadata['source']:
        documents.remove(i)
    elif '/v1' in i.metadata['source']:
        documents.remove(i)
    else:
        source.append(i.metadata['source'])


len2 = len(documents)

##how many docs were removed
print(len1-len2)



310


In [12]:
##print unique elements in source
import numpy as np 
print(np.unique(source))

['https://docs.uniswap.org/api/subgraph/guides/examples'
 'https://docs.uniswap.org/api/subgraph/overview'
 'https://docs.uniswap.org/blog' 'https://docs.uniswap.org/blog/archive'
 'https://docs.uniswap.org/blog/intro-to-gas-optimization'
 'https://docs.uniswap.org/concepts/glossary'
 'https://docs.uniswap.org/concepts/governance/adversarial-circumstances'
 'https://docs.uniswap.org/concepts/governance/changelog'
 'https://docs.uniswap.org/concepts/governance/glossary'
 'https://docs.uniswap.org/concepts/governance/guide-to-voting'
 'https://docs.uniswap.org/concepts/governance/overview'
 'https://docs.uniswap.org/concepts/governance/process'
 'https://docs.uniswap.org/concepts/overview'
 'https://docs.uniswap.org/concepts/protocol/concentrated-liquidity'
 'https://docs.uniswap.org/concepts/protocol/fees'
 'https://docs.uniswap.org/concepts/protocol/integration-issues'
 'https://docs.uniswap.org/concepts/protocol/oracle'
 'https://docs.uniswap.org/concepts/protocol/range-orders'
 'http

In [13]:
##embed remaining docs to vectorstore
embeddings = OpenAIEmbeddings()

vectorstore = FAISS.from_documents(documents, embeddings)

In [16]:
query = "What is UNIswap?"
docs = vectorstore.similarity_search(query)
print(docs[0].page_content)

Funded through an 
Ethereum Foundation grant
Resources
Website
GitHub
Twitter
Reddit
Email
Whitepaper
How it works
Uniswap is made up of a series of ETH-ERC20 exchange contracts. There is exactly one exchange contract per ERC20 token. If a token does not yet have an exchange it can be created by anyone using the Uniswap factory contract. The factory serves as a public registry and is used to look up all token and exchange addresses added to the system.


In [17]:
#save to local file
vectorstore.save_local(f"uniswap_v3_faiss_index")