In [None]:
from dotenv import load_dotenv

load_dotenv()

### Generate nodes and push to vector store
**Note** If a VectorStore already exists on disk go straight to load indexes step

In [None]:
from ingest_emails import emails_to_documents

docs = emails_to_documents(row_limit=5)

In [None]:
from llama_index.schema import MetadataMode

# See what will be fed into a LLM
print("The LLM sees this: \n", docs[1].get_content(metadata_mode=MetadataMode.LLM))
print("The Embedding model sees this: \n", docs[1].get_content(metadata_mode=MetadataMode.EMBED))

In [None]:
from create_vector_store import parse_nodes_from_docs

nodes = parse_nodes_from_docs(docs)

In [None]:
from create_vector_store import create_index, save_index

# Index nodes and presist index to disk
index = create_index(nodes)
save_index(index)

### Load VectorStore indexes from disk

In [None]:
from create_vector_store import load_index

index = load_index()

### Retrieve similar docs from store

In [None]:
from llama_index.indices.vector_store.retrievers.retriever import VectorIndexRetriever

def search_docstore(index, term):
    vi_retriever = VectorIndexRetriever(index=index, similarity_top_k=3)
    return vi_retriever.retrieve(term)

In [None]:
search_docstore(index=index, term="Which docs mention meetings?")

### Knowledge Graph Test

In [None]:
from ingest_emails import emails_to_documents

docs = emails_to_documents(row_limit=100)

In [None]:
from create_vector_store import parse_nodes_from_docs

nodes = parse_nodes_from_docs(docs)

In [None]:
# from llama_index import KnowledgeGraphIndex, ServiceContext
# from llama_index.llms import OpenAI, Anthropic
# from llama_index.storage.storage_context import StorageContext
# from llama_index.graph_stores import SimpleGraphStore

# from IPython.display import Markdown, display

# # llm = OpenAI(temperature=0, model="text-davinci-002")
# llm = Anthropic(model="claude-2", temperature=0, max_tokens=512)
# service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)

# graph_store = SimpleGraphStore()
# storage_context = StorageContext.from_defaults(graph_store=graph_store)

# # NOTE: can take a while!
# index = KnowledgeGraphIndex(
#     nodes,
#     max_triplets_per_chunk=2,
#     storage_context=storage_context,
#     service_context=service_context,
# )

In [None]:
# index.storage_context.persist(persist_dir="../storage")

In [None]:
# query_engine = index.as_query_engine(include_text=False, response_mode="tree_summarize")
# response = query_engine.query(
#     "Tell me more about Phillip",
# )

In [None]:
# display(Markdown(f"<b>{response}</b>"))

### Visualise graph

In [None]:
# ## create graph
# from pyvis.network import Network

# g = index.get_networkx_graph()
# net = Network(notebook=True, cdn_resources="in_line", directed=True)
# net.from_nx(g)
# net.show("example.html")

### Query with embeddings

In [None]:
from llama_index import KnowledgeGraphIndex, ServiceContext
from llama_index.llms import OpenAI, Anthropic
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import SimpleGraphStore

from IPython.display import Markdown, display

# llm = OpenAI(temperature=0, model="text-davinci-002")
llm = Anthropic(model="claude-2", temperature=0, max_tokens=512)
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex(
    nodes,
    max_triplets_per_chunk=2,
    service_context=service_context,
    include_embeddings=True,
)

In [None]:
index.storage_context.persist(persist_dir="../storage")

In [None]:
from create_vector_store import create_index, save_index

# Index nodes and presist index to disk
vec_index = create_index(nodes)
save_index(vec_index)

In [None]:
# query using top 3 triplets plus keywords (duplicate triplets are removed)
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)

In [None]:
# query = "What are the latest tasks that Phillip Allen has requested and who was responsible for fulfulling those tasks?"
query = "What actions have been assigned by Phillip Allen, which individuals were these actions assigned to and if a date or timeframe was specified, what was it?"
response = query_engine.query(query)
display(Markdown(f"<b>{response}</b>"))

In [None]:
query = "What are Phillip Allen's roles and responsibilities?"
response = query_engine.query(query)
display(Markdown(f"<b>{response}</b>"))

In [None]:
## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")