In [1]:
from sentence_transformers import SentenceTransformer
from langchain.vectorstores import Milvus
from langchain.embeddings import HuggingFaceEmbeddings, SentenceTransformerEmbeddings
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import CharacterTextSplitter

loader = PyPDFLoader("Learning_Python.pdf") #wget https://images.samsung.com/is/content/samsung/assets/global/ir/docs/2023_con_quarter04_all.pdf
pages = loader.load_and_split()

  from tqdm.autonotebook import tqdm, trange


In [2]:
len(pages)

1180

In [3]:
local_embedding_model="all-MiniLM-L6-v2" #git clone https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2
embeddings = HuggingFaceEmbeddings(model_name=local_embedding_model)



In [4]:
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
docs = text_splitter.split_documents(pages)

In [5]:
vector_db = Milvus.from_documents(
    docs,
    embeddings,
    collection_name="Learning_Python",
    connection_args={"host": "local.dev.server", "port": "19530"},
)

In [6]:
len(docs)

1180

In [7]:
print(f"Default collection name - {vector_db.collection_name}")
print(f"Default search params - {vector_db.search_params}")
print(f"Default index params - {vector_db.index_params}")

Default collection name - Learning_Python
Default search params - {'metric_type': 'L2', 'params': {'ef': 10}}
Default index params - None


In [8]:
docs[1]

Document(page_content='FOURTH EDITION\nLearning Python\nMark Lutz\nBeijing •Cambridge •Farnham •Köln •Sebastopol •Taipei •Tokyo', metadata={'source': 'Learning_Python.pdf', 'page': 3})

In [9]:
vector_db.col

<Collection>:
-------------
<name>: Learning_Python
<description>: 
<schema>: {'auto_id': True, 'description': '', 'fields': [{'name': 'source', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'page', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'text', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 65535}}, {'name': 'pk', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': True}, {'name': 'vector', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 384}}], 'enable_dynamic_field': False}

In [10]:
from langchain_community.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain_core.prompts import PromptTemplate

template = """Question: {question}
Answer: Let's work this out in a step by step way to be sure we have the right answer"""
prompt = PromptTemplate.from_template(template)
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

local_path = (
    "gpt4all-falcon-newbpe-q4_0.gguf"  #wget https://gpt4all.io/models/gguf/gpt4all-falcon-newbpe-q4_0.gguf
)
llm = LlamaCpp(
    model_path=local_path,
    temperature=0.75,
    max_tokens=2000,
    top_p=1,
    n_ctx=4096,
    callback_manager=callback_manager,
    verbose=True,
)

llama_model_loader: loaded meta data with 18 key-value pairs and 196 tensors from gpt4all-falcon-newbpe-q4_0.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = falcon
llama_model_loader: - kv   1:                               general.name str              = Falcon
llama_model_loader: - kv   2:                      falcon.context_length u32              = 2048
llama_model_loader: - kv   3:                  falcon.tensor_data_layout str              = jploski
llama_model_loader: - kv   4:                    falcon.embedding_length u32              = 4544
llama_model_loader: - kv   5:                 falcon.feed_forward_length u32              = 18176
llama_model_loader: - kv   6:                         falcon.block_count u32              = 32
llama_model_loader: - kv   7:                falcon.attention.head_count u32

In [11]:
question = """
Question: What is NASA's role in Space Technology?
"""
llm.invoke(question)

Answer: NASA's role in space technology involves developing and testing new technologies for space exploration, such as propulsion systems, spacecraft materials, and communication networks. They also collaborate with other organizations and countries to develop and advance technologies related to space travel and exploration.


llama_print_timings:        load time =    2727.86 ms
llama_print_timings:      sample time =       8.65 ms /    53 runs   (    0.16 ms per token,  6129.29 tokens per second)
llama_print_timings: prompt eval time =    3011.88 ms /    15 tokens (  200.79 ms per token,     4.98 tokens per second)
llama_print_timings:        eval time =    4951.11 ms /    52 runs   (   95.21 ms per token,    10.50 tokens per second)
llama_print_timings:       total time =    8032.17 ms /    67 tokens


"Answer: NASA's role in space technology involves developing and testing new technologies for space exploration, such as propulsion systems, spacecraft materials, and communication networks. They also collaborate with other organizations and countries to develop and advance technologies related to space travel and exploration."

In [12]:
from langchain.prompts import PromptTemplate
from langchain.chains import RetrievalQA

prompt_template = """Use the following pieces of context to answer the question at the end. If you don't find the answer in tne context provided or local db provided, just say that you don't know, don't try to make up an answer from your knowledge apart from the context.

{context}

Question: {question}
Helpful Answer:"""
QA_CHAIN_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

chain_type_kwargs={
        "refine_prompt": QA_CHAIN_PROMPT
        }

qa_chain = RetrievalQA.from_chain_type(
    llm,
    chain_type="refine",
    retriever=vector_db.as_retriever(search_type="mmr", search_kwargs={"k": 1}),
    return_source_documents=False,
    callbacks=None,
    chain_type_kwargs={"refine_prompt": QA_CHAIN_PROMPT,"verbose":True},
    verbose=True
)

question = "What is SpaceX's role in Space Technology?"
result = qa_chain({"query": question}) # must be query
result["result"]



[1m> Entering new RetrievalQA chain...[0m


  warn_deprecated(




[1m> Entering new RefineDocumentsChain chain...[0m


[1m> Entering new LLMChain chain...[0m
Prompt after formatting:
[32;1m[1;3mContext information is below. 
------------
For more details on companies using Python today, see Python’s website at http://www
.python.org.
What Can I Do with Python?
In addition to 
being a well-designed programming language, Python is useful for ac-
complishing real-world tasks—the sorts of things developers do day in and day out.
It’s commonly used in a variety of domains, as a tool for scripting other components
and implementing standalone programs. In fact, as a general-purpose language,
Python’s roles are virtually unlimited: you can use it for everything from website de-
velopment and gaming to robotics and spacecraft control.
However, the most common Python roles currently seem to fall into a few broad cat-
egories. The next few sections describe some of Python’s most common applications
today, as well as tools used in each domain. We won’t b

Llama.generate: prefix-match hit


NASA's role in Space Technology is to develop and operate various spacecraft and technologies used in space exploration. They also collaborate with other agencies and organizations around the world to advance space research and technology.


llama_print_timings:        load time =    2727.86 ms
llama_print_timings:      sample time =       7.15 ms /    40 runs   (    0.18 ms per token,  5593.62 tokens per second)
llama_print_timings: prompt eval time =   50721.80 ms /   626 tokens (   81.03 ms per token,    12.34 tokens per second)
llama_print_timings:        eval time =    3011.17 ms /    39 runs   (   77.21 ms per token,    12.95 tokens per second)
llama_print_timings:       total time =   53829.23 ms /   665 tokens



[1m> Finished chain.[0m

[1m> Finished chain.[0m

[1m> Finished chain.[0m


"NASA's role in Space Technology is to develop and operate various spacecraft and technologies used in space exploration. They also collaborate with other agencies and organizations around the world to advance space research and technology."