# KG generation with customized LM

In [4]:
import os
from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext
from llama_index.llms import HuggingFaceLLM
from llama_index.prompts import PromptTemplate
from transformers import BitsAndBytesConfig
from IPython.display import Markdown, display
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    pipeline,
    BitsAndBytesConfig
)
from typing import Optional, List, Mapping, Any, Tuple
from langchain.embeddings.huggingface import HuggingFaceBgeEmbeddings
from llama_index import (
    ServiceContext, 
    SimpleDirectoryReader, 
#     LangchainEmbedding, 
#     ListIndex,
    KnowledgeGraphIndex
)
from llama_index.callbacks import CallbackManager
from llama_index.llms import (
    CustomLLM, 
    CompletionResponse, 
    CompletionResponseGen,
    LLMMetadata,
)
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore
from llama_index.llms.base import llm_completion_callback

  from .autonotebook import tqdm as notebook_tqdm


### Preparation

In [3]:
# For OpenAI

import os

# os.environ["OPENAI_API_KEY"], handled in openrc reading

import logging
import sys

logging.basicConfig(
    stream=sys.stdout, level=logging.INFO
)

from llama_index import (
    VectorStoreIndex,
    SimpleDirectoryReader,
    KnowledgeGraphIndex,
    ServiceContext,
    set_global_service_context
)

from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore

import logging
import sys

from IPython.display import Markdown, display


from llama_index.llms import OpenAI


# define LLM
llm = OpenAI(temperature=0, model="text-davinci-002")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)

# set global service context
set_global_service_context(service_context)

### Create nebula space

In [2]:
from nebula3.gclient.net import Connection
from nebula3.gclient.net.SessionPool import SessionPool
from nebula3.Config import SessionPoolConfig
from nebula3.common.ttypes import ErrorCode
import time
import os

In [6]:
os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula"
os.environ["GRAPHD_HOST"] = "127.0.0.1"
os.environ["GRAPHD_PORT"] = "9669"
os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669"


In [7]:
config = SessionPoolConfig()

# prepare space
conn = Connection()
conn.open(os.environ["GRAPHD_HOST"], os.environ["GRAPHD_PORT"], 1000)
auth_result = conn.authenticate(os.environ["NEBULA_USER"], os.environ["NEBULA_PASSWORD"])
assert auth_result.get_session_id() != 0
resp = conn.execute(
    auth_result._session_id,
    'CREATE SPACE IF NOT EXISTS SoulForge_test(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);',
)
assert resp.error_code == ErrorCode.SUCCEEDED
# insert data need to sleep after create schema
time.sleep(10)

session_pool = SessionPool(os.environ["NEBULA_USER"], os.environ["NEBULA_PASSWORD"], 'SoulForge_test', [(os.environ["GRAPHD_HOST"], os.environ["GRAPHD_PORT"])])
assert session_pool.init(config)

# add schema
resp = session_pool.execute(
    'CREATE TAG IF NOT EXISTS entity(name string);'
    'CREATE EDGE IF NOT EXISTS relationship(relationship string);'
    'CREATE TAG INDEX IF NOT EXISTS entity_index ON entity(name(256));'
)

In [8]:
space_name = "SoulForge_test"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [7]:
%load_ext ngql
connection_string = f"--address 127.0.0.1 --port 9669 --user root --password nebula"
%ngql {connection_string}

Connection Pool Created


Unnamed: 0,Name
0,SoulForge
1,SoulForge_test


SoulForge

In [6]:
%%ngql
ADD HOSTS "storaged0":9779,"storaged1":9779,"storaged2":9779

[ERROR]:
 Query Failed:
 Existed!


In [9]:
%%ngql
CREATE SPACE IF NOT EXISTS SoulForge(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);
USE SoulForge;
CREATE TAG IF NOT EXISTS entity(name string);
CREATE EDGE IF NOT EXISTS relationship(relationship string);
CREATE TAG INDEX IF NOT EXISTS entity_index ON entity(name(256));

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


### Create storage_context with graph_store

In [11]:
os.environ['NEBULA_USER'] = "root"
os.environ['NEBULA_PASSWORD'] = "nebula"
os.environ['NEBULA_ADDRESS'] = "127.0.0.1:9669"

space_name = "SoulForge"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [None]:
os.environ['NEBULA_USER'] = os.environ["NEBULA_USER"]
os.environ['NEBULA_PASSWORD'] = os.environ["NEBULA_PASSWORD"]
os.environ['NEBULA_ADDRESS'] = os.environ["NEBULA_ADDRESS"]

space_name = "rag_workshop"
edge_types, rel_prop_names = ["relationship"], ["relationship"]
tags = ["entity"]

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

## Generate KG

In [6]:
documents = SimpleDirectoryReader(r"..\scenes").load_data()

In [12]:
kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=10,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)

(Ambassador K'thrax, is on, diplomatic mission)
(Ambassador K'thrax, is on, diplomatic mission to negotiate)
(Ambassador K'thrax, is on, diplomatic mission to negotiate a trade agreement)
(Ambassador K'thrax, is on, diplomatic mission to negotiate a trade agreement with the notorious space pirate)
(Ambassador K'thrax, is on, diplomatic mission to negotiate a trade agreement with the notorious space pirate, Captain Seraphina Blackthorn)
(Captain Seraphina Blackthorn, is, space pirate)
(Captain Seraphina Blackthorn, is, notorious space pirate)
(Tensions, run high, as they discuss the terms)
(Tensions, run high, as they discuss the terms, with veiled threats)
(Tensions, run high, as they discuss the terms, with veiled threats and a hint of mutual respect)
(Dr. Alex Ryder, vanished during, deep space mission)
(Dr. Alex Ryder, was presumed dead)
(Dr. Alex Ryder, is alive)
(Dr. Alex Ryder, joined forces with, SARA)
(Dr. Alex Ryder, joined forces with, clandestine underground movement)
(Dr. A

In [13]:
%ngql USE SoulForge;
%ngql MATCH ()-[e]->() RETURN e

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


Unnamed: 0,e
0,"(""The Galactic Council Meeting"")-[:relationshi..."
1,"(""The Galactic Council Meeting"")-[:relationshi..."
2,"(""The Galactic Council Meeting"")-[:relationshi..."
3,"(""The Galactic Council Meeting"")-[:relationshi..."
4,"(""space station"")-[:relationship@-572631301180..."
5,"(""space station"")-[:relationship@-572631301180..."
6,"(""android"")-[:relationship@-549205934196681021..."
7,"(""Tensions"")-[:relationship@902163024503347604..."
8,"(""SARA"")-[:relationship@-5492059341966810217{r..."
9,"(""Dr. Alex Ryder"")-[:relationship@-74844762488..."


In [14]:
%ng_draw

<class 'pyvis.network.Network'> |N|=30 |E|=30

## Load previously generated KG

In [20]:
from llama_index import load_index_from_storage

storage_context = StorageContext.from_defaults(persist_dir='/mnt/c/Users/xli.ASSYSTEM/Documents/Digital safety/data/fr_embed_storage_graph', graph_store=graph_store)
kg_index = load_index_from_storage(
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=10,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    verbose=True,
)

In [24]:
%ngql USE rag_workshop;
%ngql MATCH ()-[e]->() RETURN e

Unnamed: 0,e
0,"(""évents"")-[:relationship@-3431488967660501439..."
1,"(""zone_de_surpression"")-[:relationship@-738609..."
2,"(""zone_de_surpression"")-[:relationship@-738609..."
3,"(""zone_de_surpression"")-[:relationship@-738609..."
4,"(""zone_de_surpression"")-[:relationship@-738609..."
...,...
972,"(""fumées"")-[:relationship@-3640410747914980111..."
973,"(""fonctionnement normal"")-[:relationship@-7396..."
974,"(""Implantation"")-[:relationship@65928469527764..."
975,"(""Implantation"")-[:relationship@65928469527764..."


In [25]:
%ng_draw

<class 'pyvis.network.Network'> |N|=898 |E|=977

In [35]:
kg_index.graph_store

<llama_index.graph_stores.nebulagraph.NebulaGraphStore at 0x7f983175d7e0>

In [42]:
import networkx as nx
import matplotlib.pyplot as plt
g = kg_index.get_networkx_graph()
for n in g.nodes:
    print(n)

UP1
usine
objet
champ d’application
Usine
Marcoule
Usine UP1
Usine Marcoule
Philz
1982
Berkeley
coffee shop
PT
générale
risque
opération
atelier
section
libellé
Plutonium
limit
criticité
sûreté
H
PT spécifique
I
J
K
L
M
N
O
P
L.1
L.3
L.4
L.6
L.8
L.10
L.12
L.14
L.16
L.18
L.20
L.22
L.24
L.26
L.28
L.30
L.32
L.34
L.36
P.1
P.3
effluents
solutions actives
assainissement
masse
soluble
cumul
inférieure
bat 117
traitement
text: Philz
RDS
R0
Page
10
/ 10
013413
9
32
1
boîte à gants
procédé
démantelée
MAR 09 013413
site
bâtiment 100
bâtiment 117
température
température minimale
température maximale
température moyenne
température moyenne des mois d’hiver
température moyenne des mois d’été
température de 30°C
nombre de jours de gelée sous abri
humidité de l’air
précipitation
vent dominant
vitesse moyenne des vents
mistral de l’ordre de 70 à 80 km/h
barrière dynamique
vitesse de passage
barrière statique
taux de renouvellement
page_label
2
ventilation
e
t
l
o
,
<
[
s
-
 
n
a
i
m
]
g
f
>
û
é
r
u
q
d

In [None]:
kg_index_query_engine = kg_index.as_query_engine(
    retriever_mode="keyword",
    verbose=True,
    response_mode="tree_summarize",
)
response_graph_rag = kg_index_query_engine.query("Résume moi")

display(Markdown(f"<b>{response_graph_rag}</b>"))

In [None]:
response_graph_rag = kg_index_query_engine.query("Résume moi")

display(Markdown(f"<b>{response_graph_rag}</b>"))

In [None]:

index = ListIndex.from_documents(documents, service_context=service_context)

# Query and print response
query_engine = index.as_query_engine()
response = query_engine.query("Quel est le titre du chapitre 5 ?")
print(response)

In [None]:
index.save_to_disk("/mnt/c/Users/xli.ASSYSTEM/Documents/Digital safety/data")

In [None]:
index.index_id

In [None]:
index.storage_context.persist("/mnt/c/Users/xli.ASSYSTEM/Documents/Digital safety/data/index")

In [None]:
from llama_index import StorageContext, load_index_from_storage
storage_context = StorageContext.from_defaults(persist_dir="/mnt/c/Users/xli.ASSYSTEM/Documents/Digital safety/data/index")
index = ListIndex.from_documents(documents, service_context=service_context)

new_index = load_index_from_storage(storage_context, service_context=service_context)
new_query_engine = new_index.as_query_engine()
response = new_query_engine.query("Quel est le titre du chapitre 5 ?")
print(response)