# LightRAG usage example

## Imports

In [1]:

import os
import json
import logging
import base64
from getpass import getpass

import nest_asyncio
import networkx as nx
from pyvis.network import Network
from langchain_openai import ChatOpenAI
from huggingface_hub import notebook_login, login
from transformers import AutoModel, AutoTokenizer

from lightrag import LightRAG, QueryParam
from lightrag.llm import (
    hf_embedding,
    openai_complete_if_cache,

)
from lightrag.utils import EmbeddingFunc

from utils.config import Configuration

## Setting up LightRAG

### Config loading & workdir creation

In [2]:
if not os.path.exists("secrets.json"):
    secrets = {
        "samba_nova_api_key": [
            getpass(prompt="Your SambaNova API key (input hiddden): ")
        ],
        "huggingface_token": [
            getpass(prompt="Your Hugging Face token (input hidden): ")
        ],
    }
    with open("secrets.json", "w", encoding="utf-8") as f:
        json.dump(secrets, f, ensure_ascii=False, indent=4)
if not os.path.exists("config.json"):
    with open("config.json", "w", encoding="utf-8") as f:
        json.dump({}, f, ensure_ascii=False, indent=4)

config = Configuration("config.json")

try:
    token = config["huggingface_token"]
    assert isinstance(token, str), "Invalid token type encountered somehow"
    login(token)
    print("Login successful!")
except KeyError:
    notebook_login()

nest_asyncio.apply()
WORKING_DIR = config["processed/lightrag"]
DATA_PATH = config["raw/"]
assert isinstance(
    WORKING_DIR, str
), f"WORKING_DIR should be a string but is {type(WORKING_DIR)}"
assert isinstance(
    DATA_PATH, str
), f"DATA_PATH should be a string but is {type(DATA_PATH)}"
empty = not os.path.exists("lightrag") or not os.listdir("lightrag")
os.makedirs(WORKING_DIR, exist_ok=True)

Login successful!


### LightRAG init

In [3]:
async def llm_model_func(
    prompt, system_prompt=None, history_messages=[], sleep_time=0, **kwargs
) -> str:
    return await openai_complete_if_cache(
        "Meta-Llama-3.1-405B-Instruct",
        prompt,
        system_prompt=system_prompt,
        history_messages=history_messages,
        api_key=config['samba_nova_api_key'],
        base_url="https://api.sambanova.ai/v1",
        sleep_time=sleep_time,
        **config['openai_complete_if_cache_kwargs']
    )


rag = LightRAG(
    working_dir=WORKING_DIR,
    llm_model_func=llm_model_func,
    embedding_func=EmbeddingFunc(
        embedding_dim=384,
        max_token_size=5000,
        func=lambda texts: hf_embedding(
            texts,
            tokenizer=AutoTokenizer.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2"),
            embed_model=AutoModel.from_pretrained("sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2")
        )
    ),
    **config['rag_kwargs']
)

INFO:lightrag:Logger initialized for working directory: data/processed/lightrag
INFO:lightrag:Load KV llm_response_cache with 0 data
INFO:lightrag:Load KV full_docs with 0 data
INFO:lightrag:Load KV text_chunks with 0 data
INFO:nano-vectordb:Init {'embedding_dim': 384, 'metric': 'cosine', 'storage_file': 'data/processed/lightrag/vdb_entities.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 384, 'metric': 'cosine', 'storage_file': 'data/processed/lightrag/vdb_relationships.json'} 0 data
INFO:nano-vectordb:Init {'embedding_dim': 384, 'metric': 'cosine', 'storage_file': 'data/processed/lightrag/vdb_chunks.json'} 0 data


### Inserting data

In [None]:
pdf_path = config['raw/Росконгресс_Рынок_промышленных_роботов_в_мире_и_России_2024_16_стр.pdf']
docx_path = config['raw/СП_496_1325800_2020_Основания_и_фундаменты_зданий_и_сооружений.docx']
logging.disable()
rag.insert([pdf_path, docx_path], config=config)

Chunking documents:   0%|          | 0/2 [00:00<?, ?doc/s]

Extracting content from file: data/raw/Росконгресс_Рынок_промышленных_роботов_в_мире_и_России_2024_16_стр.pdf...done!
Extracting content from file: data/raw/СП_496_1325800_2020_Основания_и_фундаменты_зданий_и_сооружений.docx...done!


Generating embeddings:   0%|          | 0/7 [00:00<?, ?batch/s]

Extracting entities from chunks:   0%|          | 0/217 [00:00<?, ?chunk/s]

⠹ Processed 12 chunks, 148 entities(duplicated), 68 relations(duplicated)

## QA test

In [None]:
single = ChatOpenAI(api_key=config['samba_nova_api_key'],
                base_url="https://api.sambanova.ai/v1",
                model="Meta-Llama-3.1-405B-Instruct",
                streaming=True)

In [None]:
query = "В каких случаях применяют повышение кровли ММГ до уровня подошвы насыпи для производства земляных работ при устройстве оснований и фундаментов на ММГ"
print('\n## RAG-Answer\n')
print(rag.query(query, param=QueryParam(mode="local",
                                                                            max_token_for_global_context=1650,
                                                                            max_token_for_local_context=1650,
                                                                            max_token_for_text_unit=1450)))
print('\n## -Llama-405B-Answer\n')

print(single.invoke(query).content)

## Miscellaneous

### Vision LLM test

In [None]:
vision = ChatOpenAI(api_key=config['samba_nova_api_key'],
                base_url="https://api.sambanova.ai/v1",
                model="Llama-3.2-11B-Vision-Instruct",
                streaming=True)

In [None]:
image_path = config['raw/image.jpg']
with open(image_path, "rb") as image_file:
    encoded = base64.b64encode(image_file.read())
    url = f'data:image/jpeg;base64,{encoded.decode("utf-8")}'
print(vision.invoke([{"role":"user","content":[{"type":"text","text":"What do you see in this image"},{"type":"image_url","image_url":{"url":url}}]}]).content)

### Saving the knowledge graph

In [None]:

# Load the GraphML file
G = nx.read_graphml(f'{WORKING_DIR}/graph_chunk_entity_relation.graphml')

# Create a Pyvis network
net = Network(notebook=True)

# Convert NetworkX graph to Pyvis network
net.from_nx(G)

# Save and display the network
net.save_graph(f'{WORKING_DIR}/knowledge_graph.html')

In [None]:
G