In [52]:
!pip3 install llama_index nebula3-python futures glob2 pypdf sentence_transformers

Collecting sentence_transformers
  Downloading sentence-transformers-2.2.2.tar.gz (85 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25ldone
[?25h  Getting requirements to build wheel ... [?25ldone
[?25h  Preparing metadata (pyproject.toml) ... [?25ldone
Collecting transformers<5.0.0,>=4.6.0 (from sentence_transformers)
  Downloading transformers-4.34.1-py3-none-any.whl.metadata (121 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m121.5/121.5 kB[0m [31m12.9 MB/s[0m eta [36m0:00:00[0m
Collecting torch>=1.6.0 (from sentence_transformers)
  Downloading torch-2.1.0-cp311-none-macosx_11_0_arm64.whl.metadata (24 kB)
Collecting torchvision (from sentence_transformers)
  Downloading torchvision-0.16.0-cp311-cp311-macosx_11_0_arm64.whl.metadata (6.6 kB)
Collecting scikit-learn (from sentence_transformers)
  Downloading scikit_learn-1.3.2-cp311-cp31

In [None]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [None]:
import os

os.environ["EDENAI_API_KEY"] = "*****.****.*****"
os.environ["OPENAI_API_KEY"] = "******************************"

In [53]:
from llama_index import KnowledgeGraphIndex, LLMPredictor, ServiceContext
from llama_index.storage.storage_context import StorageContext
from llama_index.graph_stores import NebulaGraphStore

from langchain import OpenAI
from langchain.llms import EdenAI
from IPython.display import Markdown, display
from sentence_transformers import SentenceTransformer, util

  from .autonotebook import tqdm as notebook_tqdm


In [7]:
connection = "--address 127.0.0.1 --port 9669 --user root --password nebula;"
space_name = "kap"
%load_ext ngql

The ngql extension is already loaded. To reload it, use:
  %reload_ext ngql


In [10]:
%ngql {connection}

Connection Pool Created
INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


Unnamed: 0,Name


In [11]:
%ngql CREATE SPACE IF NOT EXISTS {space_name}(vid_type=FIXED_STRING(256), partition_num=1, replica_factor=1);

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


In [14]:
%%ngql
USE kap;
CREATE TAG IF NOT EXISTS entity(name string);
CREATE EDGE IF NOT EXISTS relationship(relationship string);

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


In [15]:
%ngql CREATE TAG INDEX IF NOT EXISTS entity_index ON entity(name(256));

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


In [56]:
llm = EdenAI(
    provider="openai", model="text-davinci-003", temperature=0, max_tokens=1024
)
# embed_llm = SentenceTransformer('sentence-transformers/multi-qa-mpnet-base-dot-v1')

llm_predictor = LLMPredictor(llm=llm)
service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor, chunk_size=512
)

In [17]:
import os

os.environ["NEBULA_USER"] = "root"
os.environ["NEBULA_PASSWORD"] = "nebula"  # default password
os.environ["NEBULA_ADDRESS"] = (
    "127.0.0.1:9669"  # assumed we have NebulaGraph installed locally
)

space_name = "kap"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"
]  # default, could be omitted if created from an empty kg
tags = ["entity"]

graph_store = NebulaGraphStore(
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
)
storage_context = StorageContext.from_defaults(graph_store=graph_store)

In [22]:
# from llama_index import download_loader

# WikipediaReader = download_loader("WikipediaReader")

# loader = WikipediaReader()

# documents = loader.load_data(pages=['Guardians of the Galaxy Vol. 3'], auto_suggest=False)

In [30]:
from llama_index import SimpleDirectoryReader

documents = SimpleDirectoryReader("./docs").load_data()

In [34]:
len(documents)

17

In [35]:
documents[0]

Document(id_='b707b9a8-e54f-4ebd-a6ff-5a654f4876db', embedding=None, metadata={'page_label': '1', 'file_name': 'P02-1058.pdf'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='900803dc9e1b40eb30f903a63058fa2dcb46e1f9b2e20d75f46245da695c2dfe', text='From Single to Multi -document Summarization:  \nA Prototype System and its Evaluation  \nChin-Yew Lin and Eduard Hovy  \nUniversity of Southern California / Information Sciences Institute  \n4676 Admiralty Way  \nMarina del Rey, CA 90292  \n{cyl,hovy}@isi.edu  \n \nAbstract  \nNeATS is a multi -document \nsummarization system that attempts \nto extract relevant or interesting \nportions from a set of documents \nabout some topic and present them \nin coherent order. NeATS is among \nthe best performers in the large scale \nsummarization evaluat ion DUC \n2001.  \n1 Introduction  \nIn recent years, text summarization has been \nenjoying a period of revival.  Two workshops \non Automatic Summarization w

In [71]:
kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context=storage_context,
    service_context=service_context,
    max_triplets_per_chunk=5,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    # include_embeddings=True,
    tags=tags,
)


(NeATS, is, multi-document summarization system)
(NeATS, attempts to extract, relevant or interesting portions)
(NeATS, is among, best performers in large scale summarization evaluation DUC 2001)
(DUC, sponsors, Text Summarization Challenge)
(NTCIR, started in, 2000 in Japan)

(NeATS, is, multi-document summarization system)
(NeATS, leverages, techniques)
(NeATS, uses, term clustering)
(NeATS, generates summaries in, three stages)
(Content selection, goal is to, identify important concepts)

(AA flight 11, is key concept for, September 11 terrorist attacks)
(AA flight 77, is key concept for, September 11 terrorist attacks)
(UA flight 173, is key concept for, September 11 terrorist attacks)
(UA flight 93, is key concept for, September 11 terrorist attacks)
(New York, is key concept for, September 11 terrorist attacks)

(NeATS, computes, likelihood ratio)
(Dunning, 1993, introduced)
(Lin and Hovy, 2000, introduced)
(Clusters, formed through, lexical connection)
(Ranking algorithm, rewar

In [37]:
!pwd

/Users/abhinavvengala/Desktop/KG


In [58]:
kg_index_query_engine = kg_index.as_query_engine(
    retriever_mode="keyword",
    verbose=True,
    response_mode="tree_summarize",
)

In [70]:
response_graph_rag = kg_index_query_engine.query(
    "requirements of multi document summarization"
)

display(Markdown(f"<b>{response_graph_rag}</b>"))

[1;3;32mExtracted keywords: ['Requirements', 'Summarization', 'Multi', 'Multi-document', 'document']
INFO:llama_index.indices.knowledge_graph.retrievers:> No relationships found, returning nodes found by keywords.
INFO:llama_index.indices.knowledge_graph.retrievers:> No nodes found by keywords, returning empty response.


<b> Multi document summarization requires the ability to analyze and synthesize information from multiple documents in order to create a concise summary. It also requires the ability to identify key concepts and relationships between documents.</b>

In [68]:
%%ngql
USE kap;
MATCH p=(n)-[*1..2]-()
  WHERE id(n) IN ['multi', 'document', 'summarization', 'multi-document']
RETURN p LIMIT 100;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)


Unnamed: 0,p
0,"(""summarization"" :entity{name: ""summarization""..."
1,"(""summarization"" :entity{name: ""summarization""..."
2,"(""summarization"" :entity{name: ""summarization""..."


In [67]:
%ng_draw

<class 'pyvis.network.Network'> |N|=4 |E|=3

In [58]:
%ngql DROP SPACE kap;

INFO:nebula3.logger:Get connection to ('127.0.0.1', 9669)
