# Import library

In [123]:
from llama_index import(
    GPTVectorStoreIndex,
    ServiceContext,
    LLMPredictor,
    PromptHelper,
    Document,
    VectorStoreIndex,
    LangchainEmbedding,
    StorageContext,
    load_index_from_storage,
    )


from langchain import OpenAI
from langchain.docstore.document import Document as LangchainDocument
from llama_index.node_parser import SimpleNodeParser

#scrap website
from bs4 import BeautifulSoup
import requests

# upload model 
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from llama_index.llms import LangChainLLM
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.embeddings.huggingface import HuggingFaceEmbeddings


# Scrap entire website 

All of function base on : https://www.youtube.com/watch?v=XJRoDEctAwA

In [124]:
def scrape(site):
    urls = []
    
    def scrape_helper(current_site):
        nonlocal urls

        r = requests.get(current_site)

        s = BeautifulSoup(r.text, "html.parser")
        #print(s.find_all("a"))
        for i in s.find_all("a"):
            if "href" in i.attrs:
                href = i.attrs["href"]

                if href.startswith("/") or href.startswith('#'):
                    full_url = site + href

                    if full_url not in urls:
                        urls.append(full_url)
                        scrape_helper(full_url)
    scrape_helper(site)
    return urls

guide process : https://gpt-index.readthedocs.io/en/latest/end_to_end_tutorials/usage_pattern.html

load local model integrate with langchain : https://gpt-index.readthedocs.io/en/latest/examples/llm/langchain.html,    https://python.langchain.com/docs/modules/model_io/models/llms/integrations/llamacpp

exceed context window of 512 : https://github.com/hwchase17/langchain/issues/3751


In [125]:
def load_llm(model_path):
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    llm_langchain = LlamaCpp(
    model_path= model_path, 
    callback_manager=callback_manager, 
    verbose=True, 
    n_ctx=2048) #define n-ctx for prevent exceed token error
    llm = LangChainLLM(llm=llm_langchain)
    return llm

Crate index and asking question : https://colab.research.google.com/drive/16QMQePkONNlDpgiltOi7oRQgmB8dU5fl?usp=sharing#scrollTo=5be63ef4

In [127]:
from llama_index import download_loader
url = "https://anaconda.org/conda-forge/attrs"
urls = scrape(url)
BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
loader = BeautifulSoupWebReader()
documents = loader.load_data(urls)
parser = SimpleNodeParser()

nodes = parser.get_nodes_from_documents(documents)

In [128]:
documents

[Document(id_='9b01fdfc-4171-4911-aa0f-5072043c80ec', embedding=None, metadata={'URL': 'https://anaconda.org/conda-forge/attrs/'}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, hash='a46355b6ba70c9057dbb1b051fa8c6a0b433d05f69ba08d3b40ab8f0c01e0795', text='\n\n\n\n\n\n :: Anaconda.org\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nMenu\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nGallery\nAbout\nAnaconda\nHelp\nDownload Anaconda\n\n\nSign In\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n        conda-forge\n      \n      /\n      \n        \n          packages\n        \n      \n      /\n      \nattrs\n\n23.1.0\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n              3\n            \n\n\n\n\n\n\n\n\nattrs is the Python package that will bring back the joy of writing classes by relieving you from the drudgery of implementing object protocols (aka dunder methods).\n\n\n\n\n\n\n\n\n            \n            Conda\n            \n          \n\nFiles\nLabels\nBadges\n\n\n\n

In [129]:
type(documents)

list

In [108]:
def load_document_to_gpt_vectorstore(url, model_path, model_emb_path):
    from llama_index import download_loader 

    urls = scrape(url)
    BeautifulSoupWebReader = download_loader("BeautifulSoupWebReader")
    loader = BeautifulSoupWebReader()
    documents = loader.load_data(urls)
    parser = SimpleNodeParser()

    nodes = parser.get_nodes_from_documents(documents)

    llm = load_llm(model_path)
    llm_predictor = LLMPredictor(llm = llm)
    embed_model = LangchainEmbedding(HuggingFaceEmbeddings(model_name=model_emb_path))


    max_input_size = 4096
    num_output = 512
    max_chunk_overlap = 0.20
    prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
    service_context = ServiceContext.from_defaults(
    llm_predictor=llm_predictor,
    embed_model=embed_model,
    prompt_helper=prompt_helper,
    )

    index = GPTVectorStoreIndex(nodes, service_context=service_context) 
    #index.save_to_disk("./gpt_index_docs_api_remotion_v2.json") #cant use save_to_disk replace with storage_context
    index.storage_context.persist(persist_dir="./llama_index_docs_api_v1") # create json file for index
    return index, service_context

In [88]:
# def chat(query, index): #No need to use
#     #index = VectorStoreIndex.load_from_disk("gpt_index_docs.json")
#     response = index.query(query)
#     print(response)
#     return response

URL Doc :  https://anaconda.org/conda-forge/attrs

model link : orca-mini-3b.ggmlv3.q4_0.bin (1.9GB) : https://gpt4all.io/index.html 

embedding model : https://huggingface.co/sentence-transformers/all-mpnet-base-v2

In [119]:
url = "https://anaconda.org/conda-forge/attrs"
model_path = "orca-mini-3b.ggmlv3.q4_0.bin"
model_emb_path = "sentence-transformers/all-mpnet-base-v2"

index, service_context = load_document_to_gpt_vectorstore(url= url, 
                                         model_path= model_path,
                                         model_emb_path=model_emb_path)

llama.cpp: loading model from orca-mini-3b.ggmlv3.q4_0.bin
llama_model_load_internal: format     = ggjt v3 (latest)
llama_model_load_internal: n_vocab    = 32000
llama_model_load_internal: n_ctx      = 2048
llama_model_load_internal: n_embd     = 3200
llama_model_load_internal: n_mult     = 240
llama_model_load_internal: n_head     = 32
llama_model_load_internal: n_layer    = 26
llama_model_load_internal: n_rot      = 100
llama_model_load_internal: ftype      = 2 (mostly Q4_0)
llama_model_load_internal: n_ff       = 8640
llama_model_load_internal: model size = 3B
llama_model_load_internal: ggml ctx size =    0.06 MB
llama_model_load_internal: mem required  = 2862.72 MB (+  682.00 MB per state)
llama_new_context_with_model: kv self size  =  650.00 MB
AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | 


In [120]:
storage_context = StorageContext.from_defaults(persist_dir="./llama_index_docs_api_v1")
index = load_index_from_storage(storage_context, service_context=service_context)

In [121]:
query_engine = index.as_query_engine(streaming=True, similarity_top_k=1, service_context=service_context)

In [122]:
response_stream = query_engine.query("What is attrs in python")
response_stream.print_response_stream()

 
:: Anaconda.org



Answer: attrs is a Python package that provides functionality to write concise and correct code while relieving you from implementing object protocols (aka dunder methods).


llama_print_timings:        load time =  1243.67 ms
llama_print_timings:      sample time =    33.06 ms /    46 runs   (    0.72 ms per token,  1391.49 tokens per second)
llama_print_timings: prompt eval time = 144285.21 ms /  1160 tokens (  124.38 ms per token,     8.04 tokens per second)
llama_print_timings:        eval time = 10546.94 ms /    45 runs   (  234.38 ms per token,     4.27 tokens per second)
llama_print_timings:       total time = 159050.27 ms
