In [None]:
from llama_index import VectorStoreIndex, SimpleDirectoryReader, StorageContext, load_index_from_storage, download_loader, GPTVectorStoreIndex
from llama_hub.github_repo import GithubRepositoryReader, GithubClient
import os
import openai
import pickle
import nest_asyncio
nest_asyncio.apply()


## Raw .txt Files

In [None]:
# Loading from a directory
documents = SimpleDirectoryReader('data').load_data()

# Construct a simple vector index
index = VectorStoreIndex.from_documents(documents)

# Saving Index for future use
index.storage_context.persist()

# Loading Index from local storage
storage_context = StorageContext.from_defaults(persist_dir="./storage")
index = load_index_from_storage(storage_context)


In [None]:
# Querying the index
query_engine = index.as_query_engine()
response = query_engine.query("What blockchains does Dune support?")
print(response)


## GitHub Repo

In [None]:
download_loader("GithubRepositoryReader")
github_client = GithubClient(os.environ['GH_TOKEN'])


In [None]:
loader = GithubRepositoryReader(
    github_client,
    owner="duneanalytics",
    repo="docs",
    filter_directories=(["docs"], GithubRepositoryReader.FilterType.INCLUDE),
    filter_file_extensions=(
        [".md"], GithubRepositoryReader.FilterType.INCLUDE),
    verbose=True,
    concurrent_requests=10,
)

docs = loader.load_data(branch="master")

with open("dune_docs.pkl", "wb") as f:
    pickle.dump(docs, f)


In [None]:
# If already exists
# if os.path.exists("docs.pkl"):
#    with open("docs.pkl", "rb") as f:
#        docs = pickle.load(f)


In [None]:
index = GPTVectorStoreIndex.from_documents(docs)


In [None]:
# Querying the index
query_engine = index.as_query_engine()
response = query_engine.query("what is a Dune wizard?")
print(response)
