In [None]:
!pip install llama-index transformers accelerate bitsandbytes sentence-transformers --quiet
!pip install llama-index-llms-huggingface --quiet
!pip install llama-index-embeddings-huggingface --quiet
!pip install llama-index-graph-stores-neo4j --quiet

# **Traditional RAG**

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
# model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"
model_name = "deepseek-ai/deepseek-llm-7b-chat"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_8bit=True,
    torch_dtype="auto"
)

In [None]:
import torch
from llama_index.core.prompts.prompts import SimpleInputPrompt

system_prompt = "You are a helpful assistant."
query_wrapper_prompt = SimpleInputPrompt("{query_str}")

llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    model_kwargs={"torch_dtype": torch.float16}
)

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
from llama_index.core import Settings
Settings.llm = llm
Settings.embed_model = embed_model

In [None]:
from llama_index.core import VectorStoreIndex
from llama_index.core.schema import Document
from llama_index.core.vector_stores import SimpleVectorStore
from llama_index.core import StorageContext

documents = [
    Document(text="A is daughter of B."),
    Document(text="B is sister of C."),
    Document(text="D is daughter of C.")
]

vector_store = SimpleVectorStore()
storage_context = StorageContext.from_defaults(vector_store=vector_store)

index = VectorStoreIndex.from_documents(documents,
                                        embed_model=embed_model,
                                        llm=llm,
                                        storage_context=storage_context)
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query("Who is cousin of A?")

In [None]:
from IPython.display import display, Markdown
display(Markdown(f"<b>{response}</b>"))

# **GraphRAG**

In [None]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
from llama_index.core.schema import Document

documents = [
    Document(text="A is daughter of B."),
    Document(text="B is sister of C."),
    Document(text="D is daughter of C.")
]

In [None]:
from llama_index.llms.huggingface import HuggingFaceLLM
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
model_name = "deepseek-ai/deepseek-llm-7b-chat"

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    load_in_8bit=True,
    torch_dtype="auto"
)

In [None]:
import torch
from llama_index.core.prompts.prompts import SimpleInputPrompt

system_prompt = "You are a helpful assistant."
query_wrapper_prompt = SimpleInputPrompt("{query_str}")

llm = HuggingFaceLLM(
    context_window=2048,
    max_new_tokens=256,
    generate_kwargs={"temperature": 0.7, "do_sample": False},
    system_prompt=system_prompt,
    query_wrapper_prompt=query_wrapper_prompt,
    tokenizer_name=model_name,
    model_name=model_name,
    device_map="auto",
    tokenizer_kwargs={"max_length": 2048},
    model_kwargs={"torch_dtype": torch.float16}
)

In [None]:
from llama_index.core import Settings
Settings.llm = llm

In [None]:
from llama_index.core import StorageContext
# from llama_index.core.graph_stores import SimpleGraphStore
from llama_index.core import KnowledgeGraphIndex

# graph_store = SimpleGraphStore()
from llama_index.graph_stores.neo4j import Neo4jGraphStore

username = "XXXX"
password = "XXXX"
url = "XXXX"
database = "neo4j"
graph_store = Neo4jGraphStore(
    username=username,
    password=password,
    url=url,
    database=database,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [None]:
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=3,
    include_embeddings=True,
    storage_context=storage_context,
    embed_model=embed_model,
    llm=llm,
)

In [None]:
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Who is cousin of A?",
)

In [None]:
from IPython.display import display, Markdown
display(Markdown(f"<b>{response}</b>"))