In [None]:
from llama_index.readers.obsidian import ObsidianReader
from llama_index.core.memory.chat_memory_buffer import MessageRole
from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex, VectorStoreIndex, StorageContext
from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.storage.index_store import SimpleIndexStore
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core import Settings
from IPython.display import Markdown, display
from llama_index.llms.ollama import Ollama
from tqdm.notebook import tqdm
import time
import os
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
import logging
import sys
import ipywidgets as widgets
import json
from llama_index.core.callbacks import CallbackManager
from llama_index.core.callbacks import LlamaDebugHandler
from llama_index.core import ServiceContext
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import KnowledgeGraphRAGRetriever

In [None]:
from llama_index.core import (
    load_index_from_storage,
    load_indices_from_storage,
    load_graph_from_storage,
)

In [None]:
logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

# Build a simple directory

In [None]:
simple_documents = SimpleDirectoryReader(
    r"path_to_your_obsisian_vault"
).load_data()

# Build the knowledge graph

In [None]:
os.environ["OPENAI_API_KEY"] = "your_openai_api_key"

In [None]:
#locate obsidian files that make your graph
filepath = r"path_to_your_obsisian_vault"

#load the graph files 
graph_files = ObsidianReader(filepath).load_data()
print(list(graph_files))

# Set LLM to chat with your graph (OpenAI)

In [None]:
llm = OpenAI(temperature=0, model="gpt-4o", max_tokens=3000)
Settings.llm = llm
Settings.chunk_size = 512

# Set local LLM for embeddings

In [None]:
# bge-base embedding model
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
#Settings.embed_model = OpenAIEmbedding(model="text-embedding-3-small")


# Instantiate graph store 

In [None]:
graph_store = SimpleGraphStore()
graph_storage_context = StorageContext.from_defaults(graph_store=graph_store)

# Construct Index

## Construct Vector Store Index

In [None]:
simple_doc_index = VectorStoreIndex.from_documents(
    simple_documents,
    show_progress=True)

In [None]:
# save index to disk
simple_doc_index.set_index_id("vector_index")
simple_doc_index.storage_context.persist("./vector")

## Construct knowledge graph index 

In [None]:
def process_document(doc):
    # This function will be called for each document
    print(f"Processing document: {doc.doc_id}")
    return doc

In [None]:
# Assuming you have your graph files loaded
print(f"Starting to process {len(graph_files)} documents...")

In [None]:
# Wrap your document processing in a progress bar
graph_index = KnowledgeGraphIndex.from_documents(
        documents=[process_document(doc) for doc in graph_files],  # process one document at a time
        max_triplets_per_chunk=2,
        storage_context=graph_storage_context,
        include_embeddings=True,
    show_progress=True,
)


In [None]:
graph_storage_context.persist(persist_dir="knowledge-graph")

# Load storage contexts

## Load vector storage context

In [None]:
vector_storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir="vector"),
    vector_store=SimpleVectorStore.from_persist_dir(
        persist_dir="vector"
    ),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir="vector"),
)

## Load graph storage index

In [None]:
graph_storage_context = StorageContext.from_defaults(
    docstore=SimpleDocumentStore.from_persist_dir(persist_dir="knowledge-graph"),
    vector_store=SimpleVectorStore.from_persist_dir(
        persist_dir="knowledge-graph"
    ),
    index_store=SimpleIndexStore.from_persist_dir(persist_dir="knowledge-graph"),
)

# Load index

## Load vector index

In [None]:
simple_index = load_index_from_storage(vector_storage_context)

### Retrieve inside vector index

In [None]:
simple_query_engine = simple_index.as_query_engine(
 include_text=True,
 response_mode="tree_summarize",
 embedding_mode="hybrid",
 similarity_top_k=8,
)

In [None]:
simple_rag_retriever = simple_index.as_retriever(
    retriever_mode="hybrid",  # or "embedding" or "hybrid"
    verbose=True
)

response = simple_query_engine.query(
    "Quel est le sujet principal des documents ?",
)

In [None]:
display(Markdown(f"<b>{response}</b>"))

## Load graph index

In [None]:
graph_index = load_index_from_storage(graph_storage_context)

### Retrieve inside graph index

In [None]:
graph_query_engine = graph_index.as_query_engine(
 include_text=True,
 response_mode="tree_summarize",
 embedding_mode="hybrid",
 similarity_top_k=8,
)

In [None]:
graph_rag_retriever = graph_index.as_retriever(
    retriever_mode="hybrid",  # or "embedding" or "hybrid"
    verbose=True
)

response = graph_query_engine.query(
    "Quel est le sujet principal des documents ?",
)

In [None]:
display(Markdown(f"<b>{response}</b>"))