In [62]:
from llama_index import(
    ServiceContext,
    StorageContext,
    SimpleDirectoryReader,
    LangchainEmbedding,
    VectorStoreIndex,
    load_index_from_storage,
    load_graph_from_storage,
    LLMPredictor,
    PromptHelper
    )

# upload model
from llama_index.llms import LangChainLLM
from llama_index.graph_stores import SimpleGraphStore
from llama_index import (KnowledgeGraphIndex)
from llama_index.storage.storage_context import StorageContext
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp
from langchain.embeddings.huggingface import HuggingFaceEmbeddings

In [4]:
from typing import Callable, Dict, Generator, List, Optional, Type
from pathlib import Path
import logging
logger = logging.getLogger(__name__)
class DirectorySearchSource():
    def __init__(
    self,
    num_files_limit: Optional[int] = None,
    exclude_hidden: bool = True,
    required_exts: Optional[List[str]]  = None,
    recursive : bool = True,):
    
        super().__init__()
        if not input_dir :
            raise ValueError("Must provide either `input_dir` or `input_files`.")

        self.recursive = recursive
        self.exclude_hidden = exclude_hidden
        self.required_exts = required_exts
        self.num_files_limit = num_files_limit

    def add_files(self, input_dir):
        all_files = set()
        rejected_files = set()
        list_files = []

        file_refs: Generator[Path, None, None]
        if self.recursive:
            file_refs = Path(input_dir).rglob("*")
        else:
            file_refs = Path(input_dir).glob("*")
        for ref in file_refs:
            # Manually check if file is hidden or directory instead of
            # in glob for backwards compatibility.
            is_dir = ref.is_dir()
            skip_because_hidden = self.exclude_hidden and ref.name.startswith(".")
            skip_because_bad_ext = (
                self.required_exts is not None and ref.suffix not in required_exts
            )
            skip_because_excluded = ref in rejected_files

            if (
                is_dir
                or skip_because_hidden
                or skip_because_bad_ext
                or skip_because_excluded
            ):
                continue
            else:
                all_files.add(ref)
        new_input_files = sorted(list(all_files))

        if len(new_input_files) == 0:
            raise ValueError(f"No files found in {input_dir}.")

        if self.num_files_limit is not None and self.num_files_limit > 0:
            new_input_files = new_input_files[0 : num_files_limit]

        # print total number of files added
        logger.debug(
            f"> [SimpleDirectoryReader] Total files added: {len(new_input_files)}")

        for f in new_input_files:
            list_files.append(str(f))
        return list_files

In [6]:
from llama_index.readers.base import BaseReader
from llama_index.schema import Document

class HtmlFilesReader(BaseReader):
    """Simple web page reader.

    Reads pages from the web.

    Args:
        html_to_text (bool): Whether to convert HTML to text.
            Requires `html2text` package.

    """

    def __init__(self, html_to_text: bool = False):
        """Initialize with parameters."""
        try:
            import html2text  # noqa: F401
        except ImportError:
            raise ImportError(
                "`html2text` package not found, please run `pip install html2text`"
            )
        self._html_to_text = html_to_text

    def load_data(self, input_files):
        """Load data from the input directory.

        Args:
            urls (List[str]): List of URLs to scrape.

        Returns:
            List[Document]: List of documents.

        """
        if not isinstance(input_files, list):
            raise ValueError("input_files must be a list of strings.")
        documents = []
        for input_file in input_files:
            #response = requests.get(url, headers=None).text
            with open(input_file, "r", errors = "ignore", encoding='utf-8') as f:
                response = f.read()
            if self._html_to_text:
                import html2text

                response = html2text.html2text(response)

            doc = Document(text=response)
            doc.id_ = str(input_file)

            documents.append(doc)

        return documents

In [45]:
input_dir = "./omniscienSmall.com/about-us/company"
lists_files = DirectorySearchSource().add_files(input_dir)
documents = HtmlFilesReader(html_to_text=True).load_data(input_files = lists_files)

In [7]:
def load_llm():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm = LlamaCpp(
        model_path="/home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin",
        n_ctx=2048,
        callback_manager=callback_manager,
        verbose=True,
    )
    return llm

In [8]:
embed_model = LangchainEmbedding(
  HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2 ")
)

  from .autonotebook import tqdm as notebook_tqdm


In [9]:
llm = LangChainLLM(load_llm())
service_context = ServiceContext.from_defaults(llm=llm, chunk_size=1000, embed_model= embed_model)

llama.cpp: loading model from /home/sira/sira_project/DQA_demo/orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_head_kv  = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: n_gqa      = 1
llama_model_load_internal: rnorm_eps  = 1.0e-06
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: freq_base  = 10000.0
llama_model_load_internal: freq_scale = 1
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.07 MB
llama_model_load_internal: mem required  = 2194.73 MB (+  650.00 MB per state)
llama_new_context_with_model: kv self siz

In [None]:
graph_store = SimpleGraphStore()
storage_context = StorageContext.from_defaults(graph_store=graph_store)

# NOTE: can take a while!
index = KnowledgeGraphIndex.from_documents(
    documents,
    max_triplets_per_chunk=2,
    storage_context=storage_context,
    service_context=service_context,
    show_progress = True
)

In [None]:
# index.set_index_id("vector_index_graph")
# index.storage_context.persist("./llama_vector_graph_small")

In [42]:
storage_context = StorageContext.from_defaults(persist_dir="./llama_vector_graph_small")
index = load_index_from_storage(storage_context, root_id="<root_id>", service_context=service_context)

In [43]:
query_engine = index.as_query_engine(include_text=False, response_mode="tree_summarize",
                                     retriever_mode="keyword")
response = query_engine.query(
    "Tell me more about Philipp",
)

Llama.generate: prefix-match hit



Example:


llama_print_timings:        load time =   806.40 ms
llama_print_timings:      sample time =     2.60 ms /     5 runs   (    0.52 ms per token,  1920.86 tokens per second)
llama_print_timings: prompt eval time =  8409.02 ms /    85 tokens (   98.93 ms per token,    10.11 tokens per second)
llama_print_timings:        eval time =   447.19 ms /     4 runs   (  111.80 ms per token,     8.94 tokens per second)
llama_print_timings:       total time =  8892.63 ms
Llama.generate: prefix-match hit

llama_print_timings:        load time =   806.40 ms
llama_print_timings:      sample time =     0.50 ms /     1 runs   (    0.50 ms per token,  2016.13 tokens per second)
llama_print_timings: prompt eval time =  7081.04 ms /    72 tokens (   98.35 ms per token,    10.17 tokens per second)
llama_print_timings:        eval time =   110.18 ms /     1 runs   (  110.18 ms per token,     9.08 tokens per second)
llama_print_timings:       total time =  7211.72 ms


In [15]:
query_engine = index.as_query_engine(
    include_text=True,
    response_mode="tree_summarize",
    embedding_mode="hybrid",
    similarity_top_k=5,
)
response = query_engine.query(
    "Which book are writing by Philpp",
)

Llama.generate: prefix-match hit


For example: 'KEYWORDS: "Philpp", "writing"'.


llama_print_timings:        load time =   806.40 ms
llama_print_timings:      sample time =    12.25 ms /    18 runs   (    0.68 ms per token,  1469.99 tokens per second)
llama_print_timings: prompt eval time = 10657.37 ms /    87 tokens (  122.50 ms per token,     8.16 tokens per second)
llama_print_timings:        eval time =  2469.15 ms /    17 runs   (  145.24 ms per token,     6.88 tokens per second)
llama_print_timings:       total time = 13247.54 ms
Llama.generate: prefix-match hit


Next action(s) may include:
 - Ask for more context information
 - Check if any of the existing books have been written by Philpp.



llama_print_timings:        load time =   806.40 ms
llama_print_timings:      sample time =    20.13 ms /    33 runs   (    0.61 ms per token,  1639.10 tokens per second)
llama_print_timings: prompt eval time =  9136.66 ms /    75 tokens (  121.82 ms per token,     8.21 tokens per second)
llama_print_timings:        eval time =  4223.60 ms /    32 runs   (  131.99 ms per token,     7.58 tokens per second)
llama_print_timings:       total time = 13535.81 ms


In [None]:
from pyvis.network import Network

g = index.get_networkx_graph()
net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(g)
net.show("example.html")