In [1]:
import os
import streamlit as st
import logging
import sys
import webbrowser
from llama_index import (
    SimpleDirectoryReader,
    ServiceContext,
    GPTKnowledgeGraphIndex,
)
from llama_index.graph_stores import SimpleGraphStore
from llama_index.llms import OpenAI
from IPython.display import Markdown, display
from llama_index.storage.storage_context import StorageContext
from IPython.display import display
from pyvis.network import Network

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

OPENAI_API_KEY = st.secrets["openai_pass"]
os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY

In [11]:
path = r"""C:\Users\fangning.zheng\Documents\weekly summary\week05\nlp-experiments-gists\fangzhen\langchain_streamlit_chatbot\src\langchain_streamlit_chatbot\data_file_parsed"""

documents = SimpleDirectoryReader(path, encoding="utf-8-sig").load_data()
print(documents)

[Document(id_='c73e4ed4-7334-41bb-b398-6a71f79a32b0', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='14e6b74476b35b4de4d53298c98a09fbd7bd46e6021671b7b59386de419c2ec7', text="Status and Outlook: CTVs & SOVs| NOT FOR DISTRIBUTION Welcome to the Q1 2023 edition of 4C Offshore’s Status and Outlook: CTV and SOVs, our review of service vessel activity during Q4 2022 in relation to Q4 2021.\nUnfortunately, demand for CTVs was not good during 2022.\nIn Q4, there was a significant increase in work but not enough to make up the shortfall in the market during the previous months of the year.\nDespite this, investment in vessels has remained high, with 28 new CTVs delivered globally last year, making a pipeline of over 50 for 2023 and beyond.\nIn a new development, our report now includes analysis of walk to work (W2W) vessel activity in offshore wind.\nPreviously, our focus was on SOVs and CSOVs, which provide a niche area of v

In [2]:
# define LLM
# NOTE: at the time of demo, text-davinci-002 did not have rate-limit errors

llm = OpenAI(temperature=0, model="text-davinci-002")
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=512)

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)


In [12]:
# NOTE: can take a while!
index = GPTKnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=10,
    storage_context=storage_context,
    service_context=service_context,
)

In [5]:
#print(index)
path_index = r"""C:\Users\fangning.zheng\Documents\weekly summary\week05\nlp-experiments-gists\fangzhen\langchain_streamlit_chatbot\src\langchain_streamlit_chatbot\knowledge_graph_index"""
#index.storage_context.persist(persist_dir=path_index)

In [8]:
#index2 = StorageContext
storage_context2 = StorageContext.from_defaults(persist_dir = path_index)

index2 = GPTKnowledgeGraphIndex(
    [],
    storage_context=storage_context2,
)
print(index2)


<llama_index.indices.knowledge_graph.base.KnowledgeGraphIndex object at 0x000002032DA08B90>


In [19]:
query_engine = index.as_query_engine(include_text=True, response_mode="tree_summarize")
response = query_engine.query(
    "What's the expected total installed capacity of offshore wind power in China to reach by the end of 2022?",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: What's the expected total installed capacity of offshore wind power in China to reach by the end of 2022?
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['China', 'capacity', '2022', 'power', 'offshore', 'wind', 'installed']
ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 4bedfe57-0911-4a0e-9c66-a4a200bf28f9: m/MW), an approximately $ 5 .5 bn (€ 4 .
5 bn or ₱ .
trn) investment is requi...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: fef0c07e-5e0c-45ab-8489-3f02458c968e: on offshore wind projects, all of which is expected to be >=145 kV (bottom le...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 76be2b79-d2ee-475e-bdb1-0a8c3021f3ad: Hengtong).
(top right) APAC is an emerging offshore wind market, wher

In [20]:
display(Markdown(f"<b>{response}</b>"))

<b>
The expected total installed capacity of offshore wind power in China to reach by the end of 2022 is 27.4 GW.</b>

# Query with embeddings

In [12]:
# NOTE: can take a while!
new_index = GPTKnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=10,
    service_context=service_context,
    include_embeddings=True,
)

In [38]:
# query using top 3 triplets plus keywords (duplicate triplets are removed)
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "What's the capacity of offshore wind power in China?",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: What's the capacity of offshore wind power in China?
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['offshore', 'power', 'China', 'wind']
ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 94a92475-0085-498d-87f8-1ab27b39adb8: fully commissioned.
China leads with 27.4 GW, followed by the UK (13.7 GW) an...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 461614db-8879-442a-b70b-31f160b739bf: regional comparative to its neighbours lies in its experience in fixed-bottom...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 938ff42f-0799-49c4-ab9d-31017f293733: m/MW), an approximately $ 5 .5 bn (€ 4 .
5 bn or ₱ .
trn) investment is requi...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 1b

In [44]:
a=(Markdown(f"<b>{response}</b>"))
print(str(response))


The capacity of offshore wind power in China is 30 GW by the end of 2022.


In [13]:
## create graph
g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
html = net.generate_html()
with open("example.html", mode='w', encoding='utf-8') as fp:
        fp.write(html)
webbrowser.open_new_tab('example.html')

True