# Clean data

In [2]:
from llama_index import(
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
    LangchainEmbedding,
    VectorStoreIndex,
    load_index_from_storage,
    load_graph_from_storage,
    LLMPredictor,
    PromptHelper
    )

# upload model
from llama_index.llms import LangChainLLM
from llama_index.graph_stores import SimpleGraphStore
from llama_index import (KnowledgeGraphIndex)
from llama_index.storage.storage_context import StorageContext
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [3]:
from typing import Callable, Dict, Generator, List, Optional, Type
from pathlib import Path
import logging


logger = logging.getLogger(__name__)
class DirectorySearchSource():
    def __init__(
    self,
    num_files_limit: Optional[int] = None,
    exclude_hidden: bool = True,
    required_exts: Optional[List[str]]  = None,
    recursive : bool = True,):
    
        super().__init__()

        self.recursive = recursive
        self.exclude_hidden = exclude_hidden
        self.required_exts = required_exts
        self.num_files_limit = num_files_limit

    def add_files(self, input_dir):
        all_files = set()
        rejected_files = set()
        list_files = []

        file_refs: Generator[Path, None, None]
        if self.recursive:
            file_refs = Path(input_dir).rglob("*")
        else:
            file_refs = Path(input_dir).glob("*")
        for ref in file_refs:
            # Manually check if file is hidden or directory instead of
            # in glob for backwards compatibility.
            is_dir = ref.is_dir()
            skip_because_hidden = self.exclude_hidden and ref.name.startswith(".")
            skip_because_bad_ext = (
                self.required_exts is not None and ref.suffix not in self.required_exts
            )
            skip_because_excluded = ref in rejected_files

            if (
                is_dir
                or skip_because_hidden
                or skip_because_bad_ext
                or skip_because_excluded
            ):
                continue
            else:
                all_files.add(ref)
        new_input_files = sorted(list(all_files))

        if len(new_input_files) == 0:
            raise ValueError(f"No files found in {input_dir}.")

        if self.num_files_limit is not None and self.num_files_limit > 0:
            new_input_files = new_input_files[0 : num_files_limit]

        # print total number of files added
        logger.debug(
            f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}")

        for f in new_input_files:
            list_files.append(str(f))
        return list_files

In [4]:
from llama_index.readers.base import BaseReader
from llama_index.schema import Document

class HtmlFilesReader(BaseReader):
    """Simple web page reader.

    Reads pages from the web.

    Args:
        html_to_text (bool): Whether to convert HTML to text.
            Requires `html2text` package.

    """

    def __init__(self, html_to_text: bool = False):
        """Initialize with parameters."""
        try:
            import html2text  # noqa: F401
        except ImportError:
            raise ImportError(
                "`html2text` package not found, please run `pip install html2text`"
            )
        self._html_to_text = html_to_text

    def load_data(self, input_files, file_metadata):
        """Load data from the input directory.

        Args:
            urls (List[str]): List of URLs to scrape.

        Returns:
            List[Document]: List of documents.

        """
        if not isinstance(input_files, list):
            raise ValueError("input_files must be a list of strings.")
        documents = []
        for input_file in input_files:
            if file_metadata is not None:
                metadata = file_metadata(str(input_file))
            #response = requests.get(url, headers=None).text
            with open(input_file, "r", errors = "ignore", encoding='utf-8') as f:
                response = f.read()
            if self._html_to_text:
                import html2text

                response = html2text.html2text(response)

            doc = Document(text=response, file_metadata=metadata or {})
            doc.metadata = {'file_name': input_file}


            documents.append(doc)

        return documents

In [5]:
input_dir = "./omniscien.com"
filename_fn = lambda filename: {'file_name': filename}
lists_files = DirectorySearchSource().add_files(input_dir)
documents = HtmlFilesReader(html_to_text=True).load_data(input_files = lists_files,file_metadata=filename_fn)

In [109]:
len(documents)

169

In [70]:
documents[0]

Document(id_='omniscien.com/about-us/careers/index.html', embedding=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='aebdba98e18c3c2347825cd9515f8e016ade77bc0bec3a86a35e2e78a4a960a0', text='\n\n[![](https://omniscien.com/wp-content/uploads/2020/10/AW_Omniscien-\nLogo_RGB_Hi-01-croppedmin.png)![](https://omniscien.com/wp-\ncontent/uploads/2020/10/AW_Omniscien-\nLogo_RGB_Hi-01-croppedmin.png)![](https://omniscien.com/wp-\ncontent/uploads/2020/07/Logo_Web.jpg)](https://omniscien.com/)\n\n__\n\n  * [ Home](https://omniscien.com/)\n  * [Products __](/products/)\n\n    * [![Language Studio Logo](https://omniscien.com/wp-content/uploads/2020/10/LanguageStudio240min.png)](/lsev6/)\n\nPrivate and Secure Artificial Intelligence Tools for Enterprise\n\n[Overview](/lsev6/) | [Features](/lsev6/features/)\n\nEditions\n\n![](https://omniscien.com/wp-content/uploads/2020/08/SecureCloud45.p

In [44]:
documents[0].metadata

{}

In [57]:
documents.metadata = {'filename': '<doc_file_name>'}

AttributeError: 'list' object has no attribute 'metadata'

# Create vector FAISS

In [71]:
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)

In [74]:
nodes[0].text

'[![](https://omniscien.com/wp-content/uploads/2020/10/AW_Omniscien-\nLogo_RGB_Hi-01-croppedmin.png)![](https://omniscien.com/wp-\ncontent/uploads/2020/10/AW_Omniscien-\nLogo_RGB_Hi-01-croppedmin.png)![](https://omniscien.com/wp-\ncontent/uploads/2020/07/Logo_Web.jpg)](https://omniscien.com/)\n\n__\n\n  * [ Home](https://omniscien.com/)\n  * [Products __](/products/)\n\n    * [![Language Studio Logo](https://omniscien.com/wp-content/uploads/2020/10/LanguageStudio240min.png)](/lsev6/)\n\nPrivate and Secure Artificial Intelligence Tools for Enterprise\n\n[Overview](/lsev6/) | [Features](/lsev6/features/)\n\nEditions\n\n![](https://omniscien.com/wp-content/uploads/2020/08/SecureCloud45.png)\n\nSecure Cloud\n\n![](https://omniscien.com/wp-content/uploads/2020/08/Servers45.png)\n\nEnterprise\n\n[![Media Studio Logo](https://omniscien.com/wp-\ncontent/uploads/2020/10/MediaStudio240min.png)](/products/media-studio/)\n\nProject Management,  \nEditing & Subtitle Data Processing\n\n[Overview](/p

In [73]:
nodes[1]

TextNode(id_='5b985152-466a-4c31-b92e-237ad96fc5f5', embedding=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='omniscien.com/about-us/careers/index.html', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='aebdba98e18c3c2347825cd9515f8e016ade77bc0bec3a86a35e2e78a4a960a0'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='479509a9-1d02-492e-93c1-30a979828cb5', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='8d5e182714fe3452b498149da75b125b67ff3bebdafb42c33a22f8def0cc2170'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='dcb1eaa5-2477-48ba-96a8-d7c042842ac5', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='e6ce43f2a1395fbd714c2acd05a487a68844d25d485614ff1fc8b134b769c7ad')}, hash='3c291a

In [75]:
nodes[3]

TextNode(id_='c4afe419-40a2-49bb-9c90-0160549000d8', embedding=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='omniscien.com/about-us/careers/index.html', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='aebdba98e18c3c2347825cd9515f8e016ade77bc0bec3a86a35e2e78a4a960a0'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='dcb1eaa5-2477-48ba-96a8-d7c042842ac5', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='e6ce43f2a1395fbd714c2acd05a487a68844d25d485614ff1fc8b134b769c7ad'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='d4aabfbe-7484-4ef8-8a2c-5d42208a7c06', node_type=None, metadata={'file_name': 'omniscien.com/about-us/careers/index.html'}, hash='9d1a88c767dafec12e3c35b8d5e7b69b879258d86596b152b54a97c1eb68592f')}, hash='b48c04

In [96]:
nodes[-1]

TextNode(id_='5936bc0a-ee06-42d4-9ff0-ed3dbc46e471', embedding=None, metadata={'file_name': 'omniscien.com/whitepapers/index.html'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='omniscien.com/whitepapers/index.html', node_type=None, metadata={'file_name': 'omniscien.com/whitepapers/index.html'}, hash='858e2772906f8eeebae4e77bff65caec03c42eade0be254ca3a94fca61524107'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='267d2de9-7a44-4595-adc4-1be681737e3e', node_type=None, metadata={'file_name': 'omniscien.com/whitepapers/index.html'}, hash='554cf3ea987bf4f20acb7d0691a734c1801d5d3b4b97bebfc751c5212c2e169f')}, hash='878f1b59fd48125a367ad9a2c9a533e630801b5c7efbccb714019fb6c6414aa7', text=' * [Support](/support/)\n  * [Frequently Asked Questions](/faq/)\n  * [Blog](/resources/blog/)\n  * [Webinars](/resources/webinars/)\n  * [Integrated Solution Partners](/resources/integrated-solution-partne

In [107]:
len(documents)

2

In [39]:
len(nodes)

1599

In [110]:
def load_llm():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin",
        n_ctx=2048,
        callback_manager=callback_manager,
        verbose=True,
    )
    return llm

In [31]:
from llama_index.node_parser import SimpleNodeParser

embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",model_kwargs = {'device': 'cuda'}))
parser = SimpleNodeParser()
nodes = parser.get_nodes_from_documents(documents)
prompt_helper = PromptHelper(
  context_window=4096, 
  num_output=256, 
  chunk_overlap_ratio=0.1, 
  chunk_size_limit=None)

service_context = ServiceContext.from_defaults(
  llm=llm,
  embed_model=embed_model,
  node_parser=node,
  prompt_helper=prompt_helper,
  chunk_size=1000, 
  chunk_overlap=200)


KeyboardInterrupt: 

In [16]:
import faiss
from llama_index.vector_stores.faiss import FaissVectorStore

# dimensions of text-ada-embedding-002
d = 384
faiss_index = faiss.IndexFlatL2(d)

In [None]:
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex(nodes, storage_context=storage_context)

In [None]:
index.storage_context.persist("index_vector_FAISS")

# VectorStoreIndex

In [8]:


def load_llm():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin",
        n_ctx=2048,
        callback_manager=callback_manager,
        verbose=True,
    )
    return llm

In [13]:
from llama_index.llms import LangChainLLM

prompt_helper = PromptHelper(
  context_window=2048, 
  num_output=256, 
  chunk_overlap_ratio=0.1, 
  chunk_size_limit=None)
llm = LangChainLLM(load_llm())
llm_predictor = LLMPredictor(llm = llm)
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor,  embed_model=embed_model, prompt_helper=prompt_helper)
index = VectorStoreIndex.from_documents(documents, service_context =service_context, show_progress = True)

llama.cpp: loading model from /home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 2194.73 MB (+  650.00 MB per state)
llama_new_context_with_model: kv self siz

In [14]:
query_engine = index.as_query_engine(streaming=False, similarity_top_k=3, service_context=service_context)
response = query_engine.query("Who is Dion?")
print(response)


file_name: omniscien.com/resources/webinars/index.html

Wiggins has been with Omniscien for a while and can be found working on the company's products and projects.


llama_print_timings:        load time =   907.40 ms
llama_print_timings:      sample time =    28.25 ms /    49 runs   (    0.58 ms per token,  1734.51 tokens per second)
llama_print_timings: prompt eval time = 192528.46 ms /  1687 tokens (  114.12 ms per token,     8.76 tokens per second)
llama_print_timings:        eval time =  7480.21 ms /    48 runs   (  155.84 ms per token,     6.42 tokens per second)
llama_print_timings:       total time = 200689.35 ms
Llama.generate: prefix-match hit


Empty Response



llama_print_timings:        load time =   907.40 ms
llama_print_timings:      sample time =     0.62 ms /     1 runs   (    0.62 ms per token,  1620.75 tokens per second)
llama_print_timings: prompt eval time = 177309.72 ms /  1571 tokens (  112.86 ms per token,     8.86 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time = 177777.46 ms


In [15]:
print(response)

Empty Response


## current index (not romove html tag)

In [17]:
storage_context = StorageContext.from_defaults(persist_dir="/home/sira/sira_project/DQA_demo/llama_vector_index")
index = load_index_from_storage(storage_context, service_context=service_context)

In [24]:
from llama_index.llms import LangChainLLM

prompt_helper = PromptHelper(
  context_window=2048, 
  num_output=256, 
  chunk_overlap_ratio=0.1, 
  chunk_size_limit=None)
llm = load_llm()
llm_predictor = LLMPredictor(llm = llm)
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-mpnet-base-v2",model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor,  embed_model=embed_model, prompt_helper=prompt_helper)

index = load_index_from_storage(storage_context, service_context=service_context)
query_engine = index.as_query_engine(streaming=False, similarity_top_k=3, service_context=service_context)
response = query_engine.query("Who is Dion?")
print(response)

llama.cpp: loading model from /home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 2194.73 MB (+  650.00 MB per state)
llama_new_context_with_model: kv self siz

The person whose image is shown in the video is Dion Wiggins.


llama_print_timings:        load time =   933.47 ms
llama_print_timings:      sample time =     8.73 ms /    16 runs   (    0.55 ms per token,  1832.76 tokens per second)
llama_print_timings: prompt eval time = 208083.83 ms /  1835 tokens (  113.40 ms per token,     8.82 tokens per second)
llama_print_timings:        eval time =  2439.62 ms /    15 runs   (  162.64 ms per token,     6.15 tokens per second)
llama_print_timings:       total time = 211211.56 ms
Llama.generate: prefix-match hit


Empty Response



llama_print_timings:        load time =   933.47 ms
llama_print_timings:      sample time =     0.71 ms /     1 runs   (    0.71 ms per token,  1404.49 tokens per second)
llama_print_timings: prompt eval time = 151297.01 ms /  1464 tokens (  103.34 ms per token,     9.68 tokens per second)
llama_print_timings:        eval time =     0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time = 151707.93 ms


In [22]:
response.response

'Empty Response'

# Not clean doc

In [8]:
url = "/home/sira/sira_project/llama_index/omniscien.com"
documents = SimpleDirectoryReader(url, recursive = True).load_data()

In [13]:
documents[0]

Document(id_='06637541-21dd-4b87-89ca-7fe46071e173', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='42b9a3147c3a835ded927f81653a3b14106a86e421dee56242b44c5ed1294384', text='<!DOCTYPE html><html lang="en-US"><head><meta charset="UTF-8" /><meta http-equiv="X-UA-Compatible" content="IE=edge"><link rel="pingback" href="https://omniscien.com/xmlrpc.php" /> <script type="text/javascript"> document.documentElement.className = \'js\'; </script><link rel="preconnect" href="https://fonts.gstatic.com" crossorigin /><script id="diviarea-loader">window.DiviPopupData=window.DiviAreaConfig={"zIndex":1000000,"animateSpeed":400,"triggerClassPrefix":"show-popup-","idAttrib":"data-popup","modalIndicatorClass":"is-modal","blockingIndicatorClass":"is-blocking","defaultShowCloseButton":true,"withCloseClass":"with-close","noCloseClass":"no-close","triggerCloseClass":"close","singletonClass":"single","darkModeClass":"dark","noShadowClass":"

# Test knowledge graph from OpenAI

In [24]:
import os
import openai


os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [15]:
import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)

In [25]:
from llama_index import (
    SimpleDirectoryReader,
    LLMPredictor,
    ServiceContext,
    KnowledgeGraphIndex,
)
from llama_index.graph_stores import SimpleGraphStore

from llama_index.llms import OpenAI
from IPython.display import Markdown, display

In [26]:
documents = SimpleDirectoryReader(
    "data"
).load_data()


In [27]:
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002", streaming=True))
#embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)

In [30]:
from llama_index.storage.storage_context import StorageContext

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=3,
    storage_context=storage_context,
    service_context=service_context,
)

In [35]:
query_engine = index.as_query_engine(include_text=False, response_mode="tree_summarize")
response = query_engine.query(
    "Tell me more about Interleaf",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about Interleaf
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['Software', 'Solutions', 'Information', 'Technology', 'Interleaf']
ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
Software ['become obsolete', 'couple decades']
Software ['enable', 'people']
Software ['launched', 'publicly']
Interleaf ['made', 'software']
Interleaf ['added', 'scripting language']
Interleaf ['made', 'dialect of Lisp']
Interleaf ['taught', 'what not to do']


In [36]:
display(Markdown(f"<b>{response}</b>"))

<b>
Interleaf was a software company that made software and added a scripting language. They also made a dialect of Lisp and taught people what not to do. Over time, software can become obsolete in a couple of decades, but Interleaf enabled people to do more with their software when it was launched publicly.</b>

In [33]:
query_engine = index.as_query_engine(include_text=True, response_mode="tree_summarize")
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['author', 'work', 'Interleaf']
ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: e7434aa4-fedb-4c2e-a5a9-215fec07e861: each student had. But the Accademia wasn't teaching me anything except Italia...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 5a71c8eb-d4b4-4b82-9c7b-ae082ee229a4: answer turned out to be. If this surprised me, who'd lived it, then I thought...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 54da315c-1081-4565-9a43-0a93f7537b5d: learned some useful things at Interleaf, though they were mostly about what n...
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships

In [34]:
display(Markdown(f"<b>{response}</b>"))

<b>
At Interleaf, the author worked on software for creating documents, similar to Microsoft Word. He also worked on a scripting language, which was a dialect of Lisp. He was paid a large amount of money, but he was a bad employee since he didn't understand most of the software and was irresponsible. He also spent much of his time working on On Lisp, which he had gotten a contract to publish. He learned some useful things at Interleaf, mostly about what not to do, such as how cheap office space is not a bargain if it's depressing, that big, bureaucratic customers are a dangerous source of money, and that there's not much overlap between conventional office hours and the optimal time for hacking. He also learned that it's better for technology companies to be run by product people than sales people, and that it leads to bugs when code is edited by too many people.</b>

## Query with embeddings OpenAI

In [37]:
new_index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    service_context=service_context,
    include_embeddings=True,
)

In [38]:
query_engine = new_index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['author', 'work', 'Interleaf']
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 8ca14d48-cf10-4081-8aeb-f5d7426d37eb: answer turned out to be. If this surprised me, who'd lived it, then I thought...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: e7a3304a-9261-404c-bd5e-35797987a805: learned some useful things at Interleaf, though they were mostly about what n...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 333b24de-e375-46c4-a7c4-583e26343217: each student had. But the Accademia wasn't teaching me anything except Italia...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 2fceb37d-7dc4-4431-931d-1d46a3b20016: It had been so long since I'd painted anything that I'd half forgotten why I ...
INFO:lla

In [39]:
display(Markdown(f"<b>{response}</b>"))

<b>
At Interleaf, the author worked on software for creating documents using a Lisp dialect scripting language. He also did freelance Lisp hacking work and wrote a book on Lisp. Additionally, he learned about the dangers of big, bureaucratic customers, the importance of product people over sales people, the need for cheap office space, and the lack of overlap between conventional office hours and the optimal time for hacking. He also learned the important lesson that low end software tends to eat high end software.</b>

In [None]:
## create graph
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")

## Test save and load OpenAI

save

In [41]:
new_index.set_index_id("vector_index_graph")
new_index.storage_context.persist("./openai_test_vector_graph")

load

In [46]:
from llama_index import(
    load_index_from_storage,
    )
storage_context = StorageContext.from_defaults(persist_dir="./openai_test_vector_graph")
new_index_load = load_index_from_storage(storage_context, service_context=service_context)

INFO:llama_index.indices.loading:Loading all indices.


In [47]:
query_engine = new_index_load.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['author', 'work', 'Interleaf']
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 8ca14d48-cf10-4081-8aeb-f5d7426d37eb: answer turned out to be. If this surprised me, who'd lived it, then I thought...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: e7a3304a-9261-404c-bd5e-35797987a805: learned some useful things at Interleaf, though they were mostly about what n...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 333b24de-e375-46c4-a7c4-583e26343217: each student had. But the Accademia wasn't teaching me anything except Italia...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 3ba19a19-e0bd-4f9b-b604-0cdd3398c37f: essays. [11]

In the print era, the channel for publishing essays had been va...
INFO:lla

In [48]:
display(Markdown(f"<b>{response}</b>"))

<b>
At Interleaf, the author worked on software for creating documents using a Lisp dialect scripting language. He was involved in multiple projects, and gained valuable experience in software development. He learned that low end software tends to eat high end software, and that it's better for technology companies to be run by product people than sales people. He also learned that it leads to bugs when code is edited by too many people, that cheap office space is no bargain if it's depressing, that planned meetings are inferior to corridor conversations, and that big, bureaucratic customers are a dangerous source of money. He also learned that there's not much overlap between conventional office hours and the optimal time for hacking, or conventional offices and the optimal place for it.</b>

In [49]:
len(documents)

1

In [50]:
documents

[Document(id_='3feb2c48-fca5-453e-bbd2-be5a61cdab42', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='4c702b4df575421e1d1af4b1fd50511b226e0c9863dbfffeccb8b689b8448f35', text='\t\t\n\nWhat I Worked On\n\nFebruary 2021\n\nBefore college the two main things I worked on, outside of school, were writing and programming. I didn\'t write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.\n\nThe first programs I tried writing were on the IBM 1401 that our school district used for what was then called "data processing." This was in 9th grade, so I was 13 or 14. The school district\'s 1401 happened to be in the basement of our junior high school, and my friend Rich Draves and I got permission to use it. It was like a mini Bond villain\'s lair down there, 

## Create Knowledge with Llama-cpp

In [68]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms import LangChainLLM

In [69]:
def load_llm():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin",
        n_ctx=2048,
        callback_manager=callback_manager,
        verbose=True,
    )
    return llm

In [71]:
llm_predictor = LLMPredictor(llm=LangChainLLM(llm = load_llm()))
#embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)

llama.cpp: loading model from /home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 2194.73 MB (+  650.00 MB per state)
llama_new_context_with_model: kv self siz

In [75]:
from llama_index.storage.storage_context import StorageContext

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
    service_context=service_context,
    show_progress = True
)

Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  5.13it/s]
Processing nodes:   0%|          | 0/40 [00:00<?, ?it/s]Llama.generate: prefix-match hit


(you)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     2.23 ms /     4 runs   (    0.56 ms per token,  1794.53 tokens per second)
llama_print_timings: prompt eval time = 51637.87 ms /   496 tokens (  104.11 ms per token,     9.61 tokens per second)
llama_print_timings:        eval time =   379.19 ms /     3 runs   (  126.40 ms per token,     7.91 tokens per second)
llama_print_timings:       total time = 52180.50 ms
Processing nodes:   2%|▎         | 1/40 [00:52<33:55, 52.19s/it]Llama.generate: prefix-match hit


(Alice, is, mother of, Bob)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    20.41 ms /    36 runs   (    0.57 ms per token,  1763.84 tokens per second)
llama_print_timings: prompt eval time = 49876.45 ms /   488 tokens (  102.21 ms per token,     9.78 tokens per second)
llama_print_timings:        eval time =  4369.33 ms /    36 runs   (  121.37 ms per token,     8.24 tokens per second)
llama_print_timings:       total time = 54532.61 ms
Processing nodes:   5%|▌         | 2/40 [01:46<33:55, 53.57s/it]Llama.generate: prefix-match hit


(Alice, is, computer)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     4.74 ms /     9 runs   (    0.53 ms per token,  1897.13 tokens per second)
llama_print_timings: prompt eval time = 49646.91 ms /   470 tokens (  105.63 ms per token,     9.47 tokens per second)
llama_print_timings:        eval time =   996.81 ms /     8 runs   (  124.60 ms per token,     8.03 tokens per second)
llama_print_timings:       total time = 50784.86 ms
Processing nodes:   8%|▊         | 3/40 [02:37<32:15, 52.30s/it]Llama.generate: prefix-match hit


(Lisp, hacking, book)

(Lisp, programming language, in its own way as interesting as anything in the computer science literature at the time)
(Lisp, in the Lisp community, a source of constant fascination, as well as frustration and disillusionment)
(Lisp, as I understood it, a highly non-standard dialect with its own idiosyncratic features, which meant that it often took me longer to type than to read what I wrote)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    58.50 ms /   104 runs   (    0.56 ms per token,  1777.66 tokens per second)
llama_print_timings: prompt eval time = 51509.65 ms /   459 tokens (  112.22 ms per token,     8.91 tokens per second)
llama_print_timings:        eval time = 12892.30 ms /   103 runs   (  125.17 ms per token,     7.99 tokens per second)
llama_print_timings:       total time = 64932.16 ms
Processing nodes:  10%|█         | 4/40 [03:42<34:22, 57.29s/it]Llama.generate: prefix-match hit


(live on your own terms)
(beauteous)
(enchanting)
(exciting)
(I)
(In fact, when I went back to my grad school visits at Cal, I found that a lot of people were interested in the question of how to make art as well as code. How to combine their two loves.)

So I returned to Xerox and started doing what I thought could be a good life.

---


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    55.43 ms /   101 runs   (    0.55 ms per token,  1822.02 tokens per second)
llama_print_timings: prompt eval time = 49189.30 ms /   472 tokens (  104.21 ms per token,     9.60 tokens per second)
llama_print_timings:        eval time = 12649.24 ms /   101 runs   (  125.24 ms per token,     7.98 tokens per second)
llama_print_timings:       total time = 62367.60 ms
Processing nodes:  12%|█▎        | 5/40 [04:44<34:29, 59.12s/it]Llama.generate: prefix-match hit



(Alice, to, mother of)

(Philz, founded in, Berkeley)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    12.38 ms /    22 runs   (    0.56 ms per token,  1776.77 tokens per second)
llama_print_timings: prompt eval time = 49594.18 ms /   486 tokens (  102.05 ms per token,     9.80 tokens per second)
llama_print_timings:        eval time =  2603.12 ms /    21 runs   (  123.96 ms per token,     8.07 tokens per second)
llama_print_timings:       total time = 52434.71 ms
Processing nodes:  15%|█▌        | 6/40 [05:37<32:12, 56.85s/it]Llama.generate: prefix-match hit


(Alice, is mother of, Bob)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    20.21 ms /    35 runs   (    0.58 ms per token,  1732.07 tokens per second)
llama_print_timings: prompt eval time = 48203.07 ms /   464 tokens (  103.89 ms per token,     9.63 tokens per second)
llama_print_timings:        eval time =  4356.40 ms /    35 runs   (  124.47 ms per token,     8.03 tokens per second)
llama_print_timings:       total time = 52843.43 ms
Processing nodes:  18%|█▊        | 7/40 [06:30<30:32, 55.54s/it]Llama.generate: prefix-match hit


(move)

move. They just stand there looking at you. And when they smile you have to keep that in mind, because the paint will move to follow their contours. It's like being a child again, except you have to be more self-possessed than ever before. I remember one night when I was finishing up a set of still lifes and was feeling very tired. The first one had taken me about 45 minutes, and the last one was looking quite different from all the rest. I turned out the lights and started working on it. The painting looked as though it were taking on its own life for a moment, and I thought it was going to come alive. Instead, it just crumpled into itself, and I knew I had painted myself into the wall.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    91.89 ms /   166 runs   (    0.55 ms per token,  1806.55 tokens per second)
llama_print_timings: prompt eval time = 48306.42 ms /   467 tokens (  103.44 ms per token,     9.67 tokens per second)
llama_print_timings:        eval time = 20343.32 ms /   165 runs   (  123.29 ms per token,     8.11 tokens per second)
llama_print_timings:       total time = 69512.16 ms
Processing nodes:  20%|██        | 8/40 [07:39<31:59, 59.99s/it]Llama.generate: prefix-match hit


(I, wanted, learn)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     4.86 ms /     8 runs   (    0.61 ms per token,  1645.08 tokens per second)
llama_print_timings: prompt eval time = 50270.59 ms /   487 tokens (  103.23 ms per token,     9.69 tokens per second)
llama_print_timings:        eval time =   866.66 ms /     7 runs   (  123.81 ms per token,     8.08 tokens per second)
llama_print_timings:       total time = 51320.15 ms
Processing nodes:  22%|██▎       | 9/40 [08:30<29:35, 57.28s/it]Llama.generate: prefix-match hit


(Interleaf, was, low)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     4.80 ms /     9 runs   (    0.53 ms per token,  1873.05 tokens per second)
llama_print_timings: prompt eval time = 51297.98 ms /   486 tokens (  105.55 ms per token,     9.47 tokens per second)
llama_print_timings:        eval time =   962.49 ms /     8 runs   (  120.31 ms per token,     8.31 tokens per second)
llama_print_timings:       total time = 52436.11 ms
Processing nodes:  25%|██▌       | 10/40 [09:23<27:53, 55.79s/it]Llama.generate: prefix-match hit


Alice/mother of Philz, Philz/coffee shop, Berkeley/1982, entry level/prestige


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    16.56 ms /    29 runs   (    0.57 ms per token,  1751.31 tokens per second)
llama_print_timings: prompt eval time = 46968.44 ms /   461 tokens (  101.88 ms per token,     9.82 tokens per second)
llama_print_timings:        eval time =  3401.71 ms /    28 runs   (  121.49 ms per token,     8.23 tokens per second)
llama_print_timings:       total time = 50616.09 ms
Processing nodes:  28%|██▊       | 11/40 [10:14<26:11, 54.21s/it]Llama.generate: prefix-match hit



a village where all the residents have a special ability, such as being able to communicate with animals or having super strength. The series follows the adventures of a warrior named Asterix and his sidekick Obelix as they try to stop the Roman Empire from taking over their village. 

There are many other characters in the series, but the two main ones are Asterix and Obelix.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    46.96 ms /    85 runs   (    0.55 ms per token,  1810.09 tokens per second)
llama_print_timings: prompt eval time = 46853.91 ms /   452 tokens (  103.66 ms per token,     9.65 tokens per second)
llama_print_timings:        eval time = 10295.11 ms /    84 runs   (  122.56 ms per token,     8.16 tokens per second)
llama_print_timings:       total time = 57567.04 ms
Processing nodes:  30%|███       | 12/40 [11:11<25:46, 55.23s/it]Llama.generate: prefix-match hit


(had a studio. He was working with a computer scientist, Tim Berners-Lee, on a thing called the World Wide Web.)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    16.89 ms /    30 runs   (    0.56 ms per token,  1775.99 tokens per second)
llama_print_timings: prompt eval time = 49328.01 ms /   480 tokens (  102.77 ms per token,     9.73 tokens per second)
llama_print_timings:        eval time =  3655.95 ms /    30 runs   (  121.86 ms per token,     8.21 tokens per second)
llama_print_timings:       total time = 53228.62 ms
Processing nodes:  32%|███▎      | 13/40 [12:04<24:34, 54.63s/it]Llama.generate: prefix-match hit



- Alice is mother of
- Bob
- the web


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     7.32 ms /    14 runs   (    0.52 ms per token,  1913.35 tokens per second)
llama_print_timings: prompt eval time = 51416.41 ms /   477 tokens (  107.79 ms per token,     9.28 tokens per second)
llama_print_timings:        eval time =  1581.66 ms /    13 runs   (  121.67 ms per token,     8.22 tokens per second)
llama_print_timings:       total time = 53211.99 ms
Processing nodes:  35%|███▌      | 14/40 [12:58<23:29, 54.20s/it]Llama.generate: prefix-match hit


(Alice, is, mother of)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     5.50 ms /    10 runs   (    0.55 ms per token,  1818.51 tokens per second)
llama_print_timings: prompt eval time = 51889.20 ms /   491 tokens (  105.68 ms per token,     9.46 tokens per second)
llama_print_timings:        eval time =  1142.20 ms /     9 runs   (  126.91 ms per token,     7.88 tokens per second)
llama_print_timings:       total time = 53211.54 ms
Processing nodes:  38%|███▊      | 15/40 [13:51<22:27, 53.90s/it]Llama.generate: prefix-match hit


(1) in September 1998,
(2) with a basic set of features, and
(3) without any ads or popups.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    18.77 ms /    35 runs   (    0.54 ms per token,  1864.48 tokens per second)
llama_print_timings: prompt eval time = 51185.86 ms /   482 tokens (  106.19 ms per token,     9.42 tokens per second)
llama_print_timings:        eval time =  4278.04 ms /    34 runs   (  125.82 ms per token,     7.95 tokens per second)
llama_print_timings:       total time = 55748.09 ms
Processing nodes:  40%|████      | 16/40 [14:47<21:47, 54.46s/it]Llama.generate: prefix-match hit


(d'etat,
tout)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     6.07 ms /    11 runs   (    0.55 ms per token,  1811.89 tokens per second)
llama_print_timings: prompt eval time = 54021.00 ms /   492 tokens (  109.80 ms per token,     9.11 tokens per second)
llama_print_timings:        eval time =  1223.70 ms /    10 runs   (  122.37 ms per token,     8.17 tokens per second)
llama_print_timings:       total time = 55439.76 ms
Processing nodes:  42%|████▎     | 17/40 [15:42<20:59, 54.76s/it]Llama.generate: prefix-match hit


(user, was, want)
(Alice, was, want)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     9.77 ms /    17 runs   (    0.57 ms per token,  1740.55 tokens per second)
llama_print_timings: prompt eval time = 54518.59 ms /   512 tokens (  106.48 ms per token,     9.39 tokens per second)
llama_print_timings:        eval time =  1978.41 ms /    16 runs   (  123.65 ms per token,     8.09 tokens per second)
llama_print_timings:       total time = 56715.60 ms
Processing nodes:  45%|████▌     | 18/40 [16:39<20:17, 55.35s/it]Llama.generate: prefix-match hit


(Alice, is, mother of, Bob)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     6.88 ms /    12 runs   (    0.57 ms per token,  1745.45 tokens per second)
llama_print_timings: prompt eval time = 52870.88 ms /   487 tokens (  108.56 ms per token,     9.21 tokens per second)
llama_print_timings:        eval time =  1366.41 ms /    11 runs   (  124.22 ms per token,     8.05 tokens per second)
llama_print_timings:       total time = 54431.54 ms
Processing nodes:  48%|████▊     | 19/40 [17:33<19:16, 55.07s/it]Llama.generate: prefix-match hit


- (I'd, traveled with)
- (I'd, traveled with)
- (I'd, been in, New York)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    18.61 ms /    33 runs   (    0.56 ms per token,  1773.43 tokens per second)
llama_print_timings: prompt eval time = 50054.33 ms /   463 tokens (  108.11 ms per token,     9.25 tokens per second)
llama_print_timings:        eval time =  4220.28 ms /    32 runs   (  131.88 ms per token,     7.58 tokens per second)
llama_print_timings:       total time = 54568.97 ms
Processing nodes:  50%|█████     | 20/40 [18:28<18:18, 54.92s/it]Llama.generate: prefix-match hit


(Alice, joining me in, New York)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     7.17 ms /    12 runs   (    0.60 ms per token,  1672.71 tokens per second)
llama_print_timings: prompt eval time = 49672.25 ms /   468 tokens (  106.14 ms per token,     9.42 tokens per second)
llama_print_timings:        eval time =  1458.10 ms /    11 runs   (  132.55 ms per token,     7.54 tokens per second)
llama_print_timings:       total time = 51317.57 ms
Processing nodes:  52%|█████▎    | 21/40 [19:19<17:03, 53.84s/it]Llama.generate: prefix-match hit


(Alice, is mother of, Bob)
(Philz, is, coffee shop)
(Viaweb, has been, a software development company)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    19.52 ms /    35 runs   (    0.56 ms per token,  1793.03 tokens per second)
llama_print_timings: prompt eval time = 49814.72 ms /   480 tokens (  103.78 ms per token,     9.64 tokens per second)
llama_print_timings:        eval time =  4212.08 ms /    34 runs   (  123.88 ms per token,     8.07 tokens per second)
llama_print_timings:       total time = 54297.04 ms
Processing nodes:  55%|█████▌    | 22/40 [20:13<16:11, 53.98s/it]Llama.generate: prefix-match hit


(Essay)

had been narrow, and the monsters
Triplets:
(Editor)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    14.59 ms /    25 runs   (    0.58 ms per token,  1713.74 tokens per second)
llama_print_timings: prompt eval time = 50331.12 ms /   472 tokens (  106.63 ms per token,     9.38 tokens per second)
llama_print_timings:        eval time =  2999.72 ms /    24 runs   (  124.99 ms per token,     8.00 tokens per second)
llama_print_timings:       total time = 53587.38 ms
Processing nodes:  57%|█████▊    | 23/40 [21:07<15:15, 53.86s/it]Llama.generate: prefix-match hit


(isn't)
(leaves)
(a)

Text: essays. [11] 

(Alice, is, mother of, Bob)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    22.47 ms /    40 runs   (    0.56 ms per token,  1780.47 tokens per second)
llama_print_timings: prompt eval time = 49367.43 ms /   471 tokens (  104.81 ms per token,     9.54 tokens per second)
llama_print_timings:        eval time =  5354.68 ms /    39 runs   (  137.30 ms per token,     7.28 tokens per second)
llama_print_timings:       total time = 55019.79 ms
Processing nodes:  60%|██████    | 24/40 [22:02<14:27, 54.21s/it]Llama.generate: prefix-match hit


been to start with a clear subject sentence that grabs the reader's attention, then build up from there, using evidence from lots of sources to support whatever idea or argument I was making. That technique had worked well in the essay about why the Internet was changing everything, and it seemed like a good way to approach the book about startups. So I started writing my own book on the subject.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    44.02 ms /    81 runs   (    0.54 ms per token,  1840.24 tokens per second)
llama_print_timings: prompt eval time = 50824.83 ms /   470 tokens (  108.14 ms per token,     9.25 tokens per second)
llama_print_timings:        eval time =  9968.22 ms /    80 runs   (  124.60 ms per token,     8.03 tokens per second)
llama_print_timings:       total time = 61257.66 ms
Processing nodes:  62%|██████▎   | 25/40 [23:03<14:04, 56.33s/it]Llama.generate: prefix-match hit


(out. Some of these will have more than one element.)

And now that I had a business plan and a team, I felt like I was ready for the next step. We started reaching out to investors the week after we announced our plans. It was slow going at first, but eventually we got some interest from high net worth individuals who wanted to invest in Y Combinator.

As it turned out, one of the biggest advantages of starting a startup with people you knew well is that you can brainstorm and bounce ideas off of each other. Even if you don't always agree, you can usually come up with something better together than you would alone. I'd been worried about the workload when we decided to start Y Combinator, but it turned out to be a blast.

One of the things that made starting Y Combinator so much fun was that we knew each other well. We could trust each other to take risks and be supportive. For example, when I said I wanted to quit my job to start a company with Jessica, Robert, and Trevor, they all b


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =   133.65 ms /   240 runs   (    0.56 ms per token,  1795.68 tokens per second)
llama_print_timings: prompt eval time = 50287.11 ms /   476 tokens (  105.65 ms per token,     9.47 tokens per second)
llama_print_timings:        eval time = 30019.16 ms /   239 runs   (  125.60 ms per token,     7.96 tokens per second)
llama_print_timings:       total time = 81406.27 ms
Processing nodes:  65%|██████▌   | 26/40 [24:25<14:53, 63.85s/it]Llama.generate: prefix-match hit


(y Combinator, batch model)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     5.34 ms /     9 runs   (    0.59 ms per token,  1685.08 tokens per second)
llama_print_timings: prompt eval time = 47119.13 ms /   459 tokens (  102.66 ms per token,     9.74 tokens per second)
llama_print_timings:        eval time =   935.68 ms /     8 runs   (  116.96 ms per token,     8.55 tokens per second)
llama_print_timings:       total time = 48230.70 ms
Processing nodes:  68%|██████▊   | 27/40 [25:13<12:49, 59.17s/it]Llama.generate: prefix-match hit


(Alice, is, startup)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     5.16 ms /     9 runs   (    0.57 ms per token,  1744.52 tokens per second)
llama_print_timings: prompt eval time = 50151.67 ms /   486 tokens (  103.19 ms per token,     9.69 tokens per second)
llama_print_timings:        eval time =   985.55 ms /     8 runs   (  123.19 ms per token,     8.12 tokens per second)
llama_print_timings:       total time = 51349.17 ms
Processing nodes:  70%|███████   | 28/40 [26:04<11:21, 56.82s/it]Llama.generate: prefix-match hit


Startup News and decided to change the name to Hacker News. It gained popularity as a place for people to discuss technology and startups, and it still is today.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    19.87 ms /    35 runs   (    0.57 ms per token,  1761.45 tokens per second)
llama_print_timings: prompt eval time = 47851.60 ms /   464 tokens (  103.13 ms per token,     9.70 tokens per second)
llama_print_timings:        eval time =  4201.83 ms /    34 runs   (  123.58 ms per token,     8.09 tokens per second)
llama_print_timings:       total time = 52336.99 ms
Processing nodes:  72%|███████▎  | 29/40 [26:57<10:10, 55.48s/it]Llama.generate: prefix-match hit


(Alice, is, mother of) Bob
(Philz, founded in, Berkeley)
(Alice, was, called) Startup News
(HN, was, a mistake)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    23.15 ms /    41 runs   (    0.56 ms per token,  1771.29 tokens per second)
llama_print_timings: prompt eval time = 50993.22 ms /   483 tokens (  105.58 ms per token,     9.47 tokens per second)
llama_print_timings:        eval time =  4979.67 ms /    40 runs   (  124.49 ms per token,     8.03 tokens per second)
llama_print_timings:       total time = 56293.09 ms
Processing nodes:  75%|███████▌  | 30/40 [27:53<09:17, 55.72s/it]Llama.generate: prefix-match hit


(planned, to, leave)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     4.76 ms /     9 runs   (    0.53 ms per token,  1888.77 tokens per second)
llama_print_timings: prompt eval time = 49677.81 ms /   485 tokens (  102.43 ms per token,     9.76 tokens per second)
llama_print_timings:        eval time =   993.03 ms /     8 runs   (  124.13 ms per token,     8.06 tokens per second)
llama_print_timings:       total time = 50858.09 ms
Processing nodes:  78%|███████▊  | 31/40 [28:44<08:08, 54.27s/it]Llama.generate: prefix-match hit


(Alice, is mother of, Bob)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     6.17 ms /    11 runs   (    0.56 ms per token,  1783.98 tokens per second)
llama_print_timings: prompt eval time = 51893.89 ms /   504 tokens (  102.96 ms per token,     9.71 tokens per second)
llama_print_timings:        eval time =  1352.95 ms /    11 runs   (  123.00 ms per token,     8.13 tokens per second)
llama_print_timings:       total time = 53438.48 ms
Processing nodes:  80%|████████  | 32/40 [29:37<07:12, 54.02s/it]Llama.generate: prefix-match hit


(Lisp, interpreted in, Lisp)
(Lisp, interpreted in, Lisp)
(Lisp, interpreted in, Lisp)
---------------------

The same day I found my old Lisp books and the Lisp machine. And I realized that the Lisp I'd been studying for was what we now call functional programming. Functional programming is an approach to programming that emphasizes using functions to manipulate data instead of relying on objects or classes. It's much more powerful than Lisp, which is mostly about manipulating symbols.

But I still wanted to use Lisp to do stuff that wasn't just mathematical exercises. And I wanted to learn enough to write some computer programs too. So I started reading up on that too. I'm not sure how far I'll get, but it's worth a try.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    98.30 ms /   179 runs   (    0.55 ms per token,  1820.96 tokens per second)
llama_print_timings: prompt eval time = 49363.48 ms /   475 tokens (  103.92 ms per token,     9.62 tokens per second)
llama_print_timings:        eval time = 21962.10 ms /   178 runs   (  123.38 ms per token,     8.10 tokens per second)
llama_print_timings:       total time = 72112.66 ms
Processing nodes:  82%|████████▎ | 33/40 [30:49<06:56, 59.45s/it]Llama.generate: prefix-match hit


(Bel, written)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =     3.05 ms /     6 runs   (    0.51 ms per token,  1964.64 tokens per second)
llama_print_timings: prompt eval time = 50205.02 ms /   488 tokens (  102.88 ms per token,     9.72 tokens per second)
llama_print_timings:        eval time =   733.17 ms /     6 runs   (  122.19 ms per token,     8.18 tokens per second)
llama_print_timings:       total time = 51097.29 ms
Processing nodes:  85%|████████▌ | 34/40 [31:40<05:41, 56.95s/it]Llama.generate: prefix-match hit


(Alice, is, mother of, Bob)
Text: The book is about a man named William and his journey to find his lost friend, whose name is also William.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    20.18 ms /    38 runs   (    0.53 ms per token,  1883.15 tokens per second)
llama_print_timings: prompt eval time = 50874.34 ms /   492 tokens (  103.40 ms per token,     9.67 tokens per second)
llama_print_timings:        eval time =  4820.54 ms /    37 runs   (  130.28 ms per token,     7.68 tokens per second)
llama_print_timings:       total time = 55970.58 ms
Processing nodes:  88%|████████▊ | 35/40 [32:36<04:43, 56.65s/it]Llama.generate: prefix-match hit


(means, in a given context) to have the right price. But the best artists are always worth something, whether they sell out or not.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    17.34 ms /    32 runs   (    0.54 ms per token,  1845.12 tokens per second)
llama_print_timings: prompt eval time = 34335.38 ms /   462 tokens (   74.32 ms per token,    13.46 tokens per second)
llama_print_timings:        eval time =  3149.86 ms /    31 runs   (  101.61 ms per token,     9.84 tokens per second)
llama_print_timings:       total time = 37697.78 ms
Processing nodes:  90%|█████████ | 36/40 [33:14<03:23, 50.97s/it]Llama.generate: prefix-match hit


(Customary, venture capital, practice)
(Customary, venture capital, practice)
(Customary, venture capital, practice)
[13] "But you have to ask yourself," she said, "what kind of person am I, and how can I be a positive force in the world? These are the questions that shape our choices." In other words, we are all responsible for what we do and say. The world is full of choices.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    53.08 ms /    97 runs   (    0.55 ms per token,  1827.53 tokens per second)
llama_print_timings: prompt eval time = 34121.98 ms /   476 tokens (   71.68 ms per token,    13.95 tokens per second)
llama_print_timings:        eval time =  9850.83 ms /    96 runs   (  102.61 ms per token,     9.75 tokens per second)
llama_print_timings:       total time = 44397.54 ms
Processing nodes:  92%|█████████▎| 37/40 [33:58<02:26, 49.00s/it]Llama.generate: prefix-match hit


(been) started.

[16] So in summary, YC has been a failure, but I believe that in the future more founders will succeed than fail, since the rate of change in the startup market is increasing and we'll be able to predict which fields are affected better than others.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    34.58 ms /    65 runs   (    0.53 ms per token,  1879.59 tokens per second)
llama_print_timings: prompt eval time = 33315.27 ms /   467 tokens (   71.34 ms per token,    14.02 tokens per second)
llama_print_timings:        eval time =  6647.85 ms /    64 runs   (  103.87 ms per token,     9.63 tokens per second)
llama_print_timings:       total time = 40285.88 ms
Processing nodes:  95%|█████████▌| 38/40 [34:39<01:32, 46.39s/it]Llama.generate: prefix-match hit


(Alice, is, mother of, Bob)
(Philz, is, coffee shop)
(Philz, founded in, Berkeley)
(Philz, founded in, 1982)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    24.72 ms /    46 runs   (    0.54 ms per token,  1861.14 tokens per second)
llama_print_timings: prompt eval time = 33218.29 ms /   463 tokens (   71.75 ms per token,    13.94 tokens per second)
llama_print_timings:        eval time =  4672.83 ms /    45 runs   (  103.84 ms per token,     9.63 tokens per second)
llama_print_timings:       total time = 38149.58 ms
Processing nodes:  98%|█████████▊| 39/40 [35:17<00:43, 43.92s/it]Llama.generate: prefix-match hit


(Trevor Blackwell, John Collison, Patrick Collison), (Daniel Gackle, Ralph Hazell), (Jessica Livingston, Robert Morris), (Harj Taggar, for reading drafts of this)


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    26.11 ms /    50 runs   (    0.52 ms per token,  1914.98 tokens per second)
llama_print_timings: prompt eval time =  3505.14 ms /    47 tokens (   74.58 ms per token,    13.41 tokens per second)
llama_print_timings:        eval time =  4715.75 ms /    49 runs   (   96.24 ms per token,    10.39 tokens per second)
llama_print_timings:       total time =  8393.61 ms
Processing nodes: 100%|██████████| 40/40 [35:25<00:00, 53.14s/it]


In [76]:
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Tell me more about what the author worked on at Interleaf",
)

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me more about what the author worked on at Interleaf


Llama.generate: prefix-match hit



Here are some example keywords from the text that we can use to best lookup answers to the question:

'KEYWORDS: Interleaf, author, work, at, more'.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    24.61 ms /    42 runs   (    0.59 ms per token,  1706.55 tokens per second)
llama_print_timings: prompt eval time =  9349.69 ms /    92 tokens (  101.63 ms per token,     9.84 tokens per second)
llama_print_timings:        eval time =  4985.63 ms /    41 runs   (  121.60 ms per token,     8.22 tokens per second)
llama_print_timings:       total time = 14536.07 ms


INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ["Here are some example keywords from the text that we can use to best lookup answers to the question:\n\n'KEYWORDS: Interleaf", 'work', 'keywords', "more'.", 'example', 'KEYWORDS', 'at', 'answers', 'author', 'use', 'lookup', 'question', 'text', 'Here', 'Interleaf', 'best']
ERROR:llama_index.indices.knowledge_graph.retriever:Index was not constructed with embeddings, skipping embedding usage...
INFO:llama_index.indices.knowledge_graph.retriever:> Querying with idx: 468fbc0a-9775-467e-add2-0272aa15f8c9: each student had. But the Accademia wasn't teaching me anything except Italia...
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
Interleaf ['was', 'low']


Llama.generate: prefix-match hit



Interleaf was a company that created software for creating documents. The author went there and learned about their software, which had a scripting language, and even made it a dialect of Lisp. The author then spent much of the year working on On Lisp, which they were publishing by the end of the year.


llama_print_timings:        load time =   887.21 ms
llama_print_timings:      sample time =    36.66 ms /    66 runs   (    0.56 ms per token,  1800.23 tokens per second)
llama_print_timings: prompt eval time = 57790.39 ms /   570 tokens (  101.39 ms per token,     9.86 tokens per second)
llama_print_timings:        eval time =  7917.96 ms /    65 runs   (  121.81 ms per token,     8.21 tokens per second)
llama_print_timings:       total time = 66161.43 ms


In [77]:
display(Markdown(f"<b>{response}</b>"))

<b>
Interleaf was a company that created software for creating documents. The author went there and learned about their software, which had a scripting language, and even made it a dialect of Lisp. The author then spent much of the year working on On Lisp, which they were publishing by the end of the year.</b>

In [78]:
response

Response(response='\nInterleaf was a company that created software for creating documents. The author went there and learned about their software, which had a scripting language, and even made it a dialect of Lisp. The author then spent much of the year working on On Lisp, which they were publishing by the end of the year.', source_nodes=[NodeWithScore(node=TextNode(id_='468fbc0a-9775-467e-add2-0272aa15f8c9', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='3feb2c48-fca5-453e-bbd2-be5a61cdab42', node_type=None, metadata={}, hash='4c702b4df575421e1d1af4b1fd50511b226e0c9863dbfffeccb8b689b8448f35'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='7595c2ec-05f9-42e3-86da-95261c9bf888', node_type=None, metadata={}, hash='3f18d914bc8f1defa1cd575534b30678444c28cc5bdbb83f2225b6124c5de8dc'), <NodeRelationship.NEXT: '3'>: RelatedNodeInfo(node_id='f9765949-fd93-45ed-8e51-

# Create knowledge graph with Omniscien remove HTML tag 

In [None]:
from llama_index import(
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
    LangchainEmbedding,
    VectorStoreIndex,
    load_index_from_storage,
    load_graph_from_storage,
    LLMPredictor,
    PromptHelper
    ) 
    
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms import LangChainLLM
from llama_index.storage.storage_context import StorageContext

from typing import Callable, Dict, Generator, List, Optional, Type
from pathlib import Path
from llama_index.readers.base import BaseReader
from llama_index.schema import Document
import logging


logger = logging.getLogger(__name__)
class DirectorySearchSource():
    def __init__(
    self,
    num_files_limit: Optional[int] = None,
    exclude_hidden: bool = True,
    required_exts: Optional[List[str]]  = None,
    recursive : bool = True,):
    
        super().__init__()

        self.recursive = recursive
        self.exclude_hidden = exclude_hidden
        self.required_exts = required_exts
        self.num_files_limit = num_files_limit

    def add_files(self, input_dir):
        all_files = set()
        rejected_files = set()
        list_files = []

        file_refs: Generator[Path, None, None]
        if self.recursive:
            file_refs = Path(input_dir).rglob("*")
        else:
            file_refs = Path(input_dir).glob("*")
        for ref in file_refs:
            # Manually check if file is hidden or directory instead of
            # in glob for backwards compatibility.
            is_dir = ref.is_dir()
            skip_because_hidden = self.exclude_hidden and ref.name.startswith(".")
            skip_because_bad_ext = (
                self.required_exts is not None and ref.suffix not in self.required_exts
            )
            skip_because_excluded = ref in rejected_files

            if (
                is_dir
                or skip_because_hidden
                or skip_because_bad_ext
                or skip_because_excluded
            ):
                continue
            else:
                all_files.add(ref)
        new_input_files = sorted(list(all_files))

        if len(new_input_files) == 0:
            raise ValueError(f"No files found in {input_dir}.")

        if self.num_files_limit is not None and self.num_files_limit > 0:
            new_input_files = new_input_files[0 : num_files_limit]

        # print total number of files added
        logger.debug(
            f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}")

        for f in new_input_files:
            list_files.append(str(f))
        return list_files

class HtmlFilesReader(BaseReader):
    """Simple web page reader.

    Reads pages from the web.

    Args:
        html_to_text (bool): Whether to convert HTML to text.
            Requires `html2text` package.

    """

    def __init__(self, html_to_text: bool = False):
        """Initialize with parameters."""
        try:
            import html2text  # noqa: F401
        except ImportError:
            raise ImportError(
                "`html2text` package not found, please run `pip install html2text`"
            )
        self._html_to_text = html_to_text

    def load_data(self, input_files, ):
        """Load data from the input directory.

        Args:
            urls (List[str]): List of URLs to scrape.

        Returns:
            List[Document]: List of documents.

        """
        if not isinstance(input_files, list):
            raise ValueError("input_files must be a list of strings.")
        documents = []
        for input_file in input_files:
            #response = requests.get(url, headers=None).text
            with open(input_file, "r", errors = "ignore", encoding='utf-8') as f:
                response = f.read()
            if self._html_to_text:
                import html2text

                response = html2text.html2text(response)

            doc = Document(text=response)
            doc.metadata = {'file_name': input_file}


            documents.append(doc)

        return documents

input_dir = "./omniscien.com"
lists_files = DirectorySearchSource().add_files(input_dir)
documents = HtmlFilesReader(html_to_text=True).load_data(input_files = lists_files)

def load_llm():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/meta-Llama2/llama-2-7b-chat.ggmlv3.q8_0.bin",
        callback_manager=callback_manager,
        verbose=True,n_ctx = 4096, temperature = 0.1, max_tokens = 4096
    )
    return llm

llm_predictor = LLMPredictor(llm=LangChainLLM(llm = load_llm()))
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(
        model_name = "sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(
        llm_predictor=llm_predictor, 
        chunk_size=1000, 
        embed_model = embed_model)

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=3,
    storage_context=storage_context,
    service_context=service_context,
    show_progress = True
)

storage_context = StorageContext.from_defaults(persist_dir="/home/sira/sira_project/DQA_demo/llama7b_vector_index_removeHTML")
index = load_index_from_storage(storage_context, service_context=service_context)

# Create knowledge graph with Omniscien remove HTML tag for GPU

In [None]:
from llama_index import(
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
    LangchainEmbedding,
    VectorStoreIndex,
    load_index_from_storage,
    load_graph_from_storage,
    LLMPredictor,
    PromptHelper
    ) 
    
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index.llms import LangChainLLM
from llama_index.storage.storage_context import StorageContext

from typing import Callable, Dict, Generator, List, Optional, Type
from pathlib import Path
from llama_index.readers.base import BaseReader
from llama_index.schema import Document
import logging


logger = logging.getLogger(__name__)
class DirectorySearchSource():
    def __init__(
    self,
    num_files_limit: Optional[int] = None,
    exclude_hidden: bool = True,
    required_exts: Optional[List[str]]  = None,
    recursive : bool = True,):
    
        super().__init__()

        self.recursive = recursive
        self.exclude_hidden = exclude_hidden
        self.required_exts = required_exts
        self.num_files_limit = num_files_limit

    def add_files(self, input_dir):
        all_files = set()
        rejected_files = set()
        list_files = []

        file_refs: Generator[Path, None, None]
        if self.recursive:
            file_refs = Path(input_dir).rglob("*")
        else:
            file_refs = Path(input_dir).glob("*")
        for ref in file_refs:
            # Manually check if file is hidden or directory instead of
            # in glob for backwards compatibility.
            is_dir = ref.is_dir()
            skip_because_hidden = self.exclude_hidden and ref.name.startswith(".")
            skip_because_bad_ext = (
                self.required_exts is not None and ref.suffix not in self.required_exts
            )
            skip_because_excluded = ref in rejected_files

            if (
                is_dir
                or skip_because_hidden
                or skip_because_bad_ext
                or skip_because_excluded
            ):
                continue
            else:
                all_files.add(ref)
        new_input_files = sorted(list(all_files))

        if len(new_input_files) == 0:
            raise ValueError(f"No files found in {input_dir}.")

        if self.num_files_limit is not None and self.num_files_limit > 0:
            new_input_files = new_input_files[0 : self.num_files_limit]

        # print total number of files added
        logger.debug(
            f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}")

        for f in new_input_files:
            list_files.append(str(f))
        return list_files

class HtmlFilesReader(BaseReader):
    """Simple web page reader.

    Reads pages from the web.

    Args:
        html_to_text (bool): Whether to convert HTML to text.
            Requires `html2text` package.

    """

    def __init__(self, html_to_text: bool = False):
        """Initialize with parameters."""
        try:
            import html2text  # noqa: F401
        except ImportError:
            raise ImportError(
                "`html2text` package not found, please run `pip install html2text`"
            )
        self._html_to_text = html_to_text

    def load_data(self, input_files, ):
        """Load data from the input directory.

        Args:
            urls (List[str]): List of URLs to scrape.

        Returns:
            List[Document]: List of documents.

        """
        if not isinstance(input_files, list):
            raise ValueError("input_files must be a list of strings.")
        documents = []
        for input_file in input_files:
            #response = requests.get(url, headers=None).text
            with open(input_file, "r", errors = "ignore", encoding='utf-8') as f:
                response = f.read()
            if self._html_to_text:
                import html2text

                response = html2text.html2text(response)

            doc = Document(text=response)
            doc.metadata = {'file_name': input_file}


            documents.append(doc)

        return documents

input_dir = "./omniscien.com"
lists_files = DirectorySearchSource().add_files(input_dir)
documents = HtmlFilesReader(html_to_text=True).load_data(input_files = lists_files)

def load_llm():
    n_gpu_layers = 32 
    n_batch = 512  
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="llama-2-7b-chat.ggmlv3.q8_0.bin",
        callback_manager=callback_manager,
        n_gpu_layers=n_gpu_layers,
        n_batch=n_batch,
        verbose=True,
        n_ctx = 4096, 
        temperature = 0.1, 
        max_tokens = 4096
    )
    return llm

llm_predictor = LLMPredictor(llm=LangChainLLM(llm = load_llm()))
embed_model = LangchainEmbedding(HuggingFaceEmbeddings(
        model_name = "sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs = {'device': 'cuda'}))
service_context = ServiceContext.from_defaults(
        llm_predictor=llm_predictor, 
        chunk_size=1000, 
        embed_model = embed_model)

graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=3,
    storage_context=storage_context,
    service_context=service_context,
    show_progress = True
)

storage_context = StorageContext.from_defaults(persist_dir="./llama7b_vector_index_removeHTML")
index = load_index_from_storage(storage_context, service_context=service_context)

# Custom Retriever combining KG Index and VectorStore Index

In [100]:
import os
import openai
from llama_index.llms import OpenAI
from llama_index.embeddings import OpenAIEmbedding
os.environ["OPENAI_API_KEY"] = "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"
openai.api_key = os.environ["OPENAI_API_KEY"]

In [137]:
from llama_index import download_loader

WikipediaReader = download_loader("WikipediaReader")

loader = WikipediaReader()

documents = loader.load_data(pages=["2023 in science"], auto_suggest=False)

In [156]:
llm_predictor = LLMPredictor(llm=OpenAI(temperature=0, model_name="text-davinci-002", streaming=True))
embed_model = OpenAIEmbedding(embed_batch_size=10)
#embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",model_kwargs = {'device': 'cpu'}))
service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, chunk_size=512)

In [157]:
space_name = "llamaindex"
edge_types, rel_prop_names = ["relationship"], [
    "relationship"]  # default, could be omit if create from an empty kg
tags = ["entity"]  

In [158]:
graph_store = SimpleGraphStore(space_name=space_name,edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,)

storage_context = StorageContext.from_defaults(graph_store=graph_store)

kg_index = KnowledgeGraphIndex.from_documents(
    documents,
    storage_context = storage_context,
    service_context = service_context,
    max_triplets_per_chunk=10,
    space_name=space_name,
    edge_types=edge_types,
    rel_prop_names=rel_prop_names,
    tags=tags,
    include_embeddings=True,
    show_progress = True
)

Parsing documents into nodes: 100%|██████████| 1/1 [00:00<00:00,  6.67it/s]
Processing nodes:   0%|          | 0/21 [00:00<?, ?it/s]
[A
Generating embeddings: 100%|██████████| 8/8 [00:00<00:00,  8.28it/s]
Processing nodes:   5%|▍         | 1/21 [00:03<01:02,  3.15s/it]
[A
Generating embeddings: 100%|██████████| 11/11 [00:01<00:00,  7.78it/s]
Processing nodes:  10%|▉         | 2/21 [00:07<01:17,  4.06s/it]
[A
Generating embeddings: 100%|██████████| 10/10 [00:00<00:00, 12.62it/s]
Processing nodes:  14%|█▍        | 3/21 [00:11<01:05,  3.65s/it]
[A
Generating embeddings: 100%|██████████| 10/10 [00:01<00:00,  9.44it/s]
Processing nodes:  19%|█▉        | 4/21 [00:14<01:00,  3.54s/it]
[A
[A
Generating embeddings: 100%|██████████| 11/11 [00:01<00:00,  6.24it/s]
Processing nodes:  24%|██▍       | 5/21 [00:20<01:13,  4.56s/it]
[A
Generating embeddings: 100%|██████████| 11/11 [00:01<00:00,  8.46it/s]
Processing nodes:  29%|██▊       | 6/21 [00:26<01:12,  4.83s/it]
[A
Generating embeddings

In [159]:
vector_index = VectorStoreIndex.from_documents(documents, service_context= service_context)

In [166]:
# import QueryBundle
from llama_index import QueryBundle

# import NodeWithScore
from llama_index.schema import NodeWithScore

# Retrievers
from llama_index.retrievers import BaseRetriever, VectorIndexRetriever, KGTableRetriever

from typing import List


class CustomRetriever(BaseRetriever):
    """Custom retriever that performs both Vector search and Knowledge Graph search"""

    def __init__(
        self,
        vector_retriever: VectorIndexRetriever,
        kg_retriever: KGTableRetriever,
        mode: str = "OR",
    ) -> None:
        """Init params."""

        self._vector_retriever = vector_retriever
        self._kg_retriever = kg_retriever
        if mode not in ("AND", "OR"):
            raise ValueError("Invalid mode.")
        self._mode = mode

    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        """Retrieve nodes given query."""

        vector_nodes = self._vector_retriever.retrieve(query_bundle)
        kg_nodes = self._kg_retriever.retrieve(query_bundle)

        vector_ids = {n.node.node_id for n in vector_nodes}
        kg_ids = {n.node.node_id for n in kg_nodes}

        combined_dict = {n.node.node_id: n for n in vector_nodes}
        combined_dict.update({n.node.node_id: n for n in kg_nodes})

        if self._mode == "AND":
            retrieve_ids = vector_ids.intersection(kg_ids)
        else:
            retrieve_ids = vector_ids.union(kg_ids)

        retrieve_nodes = [combined_dict[rid] for rid in retrieve_ids]
        return retrieve_nodes


In [167]:
from llama_index import get_response_synthesizer
from llama_index.query_engine import RetrieverQueryEngine

# create custom retriever
vector_retriever = VectorIndexRetriever(index=vector_index)
kg_retriever = KGTableRetriever(
    index=kg_index, retriever_mode="keyword", include_text=False
)
custom_retriever = CustomRetriever(vector_retriever, kg_retriever)

# create response synthesizer
response_synthesizer = get_response_synthesizer(
    service_context=service_context,
    response_mode="tree_summarize",
)


In [168]:
custom_query_engine = RetrieverQueryEngine(
    retriever=custom_retriever,
    response_synthesizer=response_synthesizer,
)

vector_query_engine = vector_index.as_query_engine()

kg_keyword_query_engine = kg_index.as_query_engine(
    # setting to false uses the raw triplets instead of adding the text from the corresponding nodes
    include_text=False,
    retriever_mode="keyword",
    response_mode="tree_summarize",
)

In [169]:
response = kg_keyword_query_engine.query("Tell me events about NASA")
display(Markdown(f"<b>{response}</b>"))

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me events about NASA
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['events', 'NASA']
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
NASA ['study on UAP', 'mid-2023']


<b>
NASA is expected to release a study on Unidentified Aerial Phenomena (UAP) by mid-2023.</b>

In [170]:
response = vector_query_engine.query("Tell me events about NASA")
display(Markdown(f"<b>{response}</b>"))

<b>
NASA is expected to receive increased budgets in 2023, which will be used to fund various research topics and agencies. On 12 July, astronomers reported considerable success of the James Webb Space Telescope (JWST) after its first year of operations. On 14 July, the Indian Space Research Organisation (ISRO) successfully launched its Chandrayaan-3 spacecraft towards the Moon. On 19 July, astronomers reported the discovery of a bizarre 'two-faced' star, with one side made up of hydrogen and the other consisting of helium. On 25 July, a study published in Nature found that a collapse of the Atlantic meridional overturning circulation (AMOC) is highly likely this century, and may occur as early as 2025. On 26 July, DARPA, in collaboration with NASA, began work on the first in-orbit demonstration of a nuclear thermal rocket engine.</b>

In [171]:
response = custom_query_engine.query("Tell me events about NASA")
display(Markdown(f"<b>{response}</b>"))

INFO:llama_index.indices.knowledge_graph.retriever:> Starting query: Tell me events about NASA
INFO:llama_index.indices.knowledge_graph.retriever:> Query keywords: ['events', 'NASA']
INFO:llama_index.indices.knowledge_graph.retriever:> Extracted relationships: The following are knowledge triplets in max depth 2 in the form of `subject [predicate, object, predicate_next_hop, object_next_hop ...]`
NASA ['study on UAP', 'mid-2023']


<b>
NASA is scheduled to release the results of its study on Unidentified Aerial Phenomena (UAP) in mid-2023. The agency is also providing increased budgets for various fields, research topics and agencies, including the new Advanced Research Projects Agency for Health (ARPA-H). Additionally, NASA is collaborating with DARPA on the first in-orbit demonstration of a nuclear thermal rocket engine.</b>

In [136]:
## create graph
from pyvis.network import Network

g = kg_index.get_networkx_graph(200)
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("2023_Science_Wikipedia_KnowledgeGraph.html")

2023_Science_Wikipedia_KnowledgeGraph.html
