In [5]:
# %pip install langchain
# %pip install -U langchain-community
# %pip install pymupdf
# %pip install sentence-transformers
# %pip install --upgrade pip
# %pip install chromadb
%pip install llama-cpp-python
# %pip uninstall llama-cpp-python
# # %pip install llama-cpp-python-cpu

Note: you may need to restart the kernel to use updated packages.


In [1]:
from langchain.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import Chroma
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA

In [2]:
loader = PyMuPDFLoader("Virtual_characters.pdf")
PDF_data = loader.load()

In [3]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=5)
all_splits = text_splitter.split_documents(PDF_data)

In [4]:
# Embed and store the texts
# Supplying a persist_directory will store the embeddings on disk
persist_directory = 'db'
model_name = "sentence-transformers/all-MiniLM-L6-v2"
model_kwargs = {'device': 'cpu'}
embedding = HuggingFaceEmbeddings(model_name=model_name,
                                  model_kwargs=model_kwargs)

vectordb = Chroma.from_documents(documents=all_splits, embedding=embedding, persist_directory=persist_directory)

  warn_deprecated(
  from tqdm.autonotebook import tqdm, trange


In [9]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain_community.llms import LlamaCpp
path="C:\\Users\\user\\Downloads\\llama.cpp-master\\llama.cpp-master\\models\\Taiwan-LLM-7B\\Breeze-7B-Instruct-v0.1-Q4_K_M.gguf"
import os
os.environ.pop('CUDA_PATH', None)
llm = LlamaCpp(
    model_path=path,
    n_gpu_layers=100,
    n_batch=512,
    n_ctx=2048,
    f16_kv=True,
    callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]),
    verbose=True,
)

llama_model_loader: loaded meta data with 25 key-value pairs and 291 tensors from C:\Users\user\Downloads\llama.cpp-master\llama.cpp-master\models\Taiwan-LLM-7B\Breeze-7B-Instruct-v0.1-Q4_K_M.gguf (version GGUF V3 (latest))
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = LLaMA v2
llama_model_loader: - kv   2:                           llama.vocab_size u32              = 32000
llama_model_loader: - kv   3:                       llama.context_length u32              = 4096
llama_model_loader: - kv   4:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   5:                          llama.block_count u32              = 32
llama_model_loader: - kv   6:                  llama.feed_forward_length u32              = 

In [10]:
from langchain.chains import LLMChain
from langchain.chains.prompt_selector import ConditionalPromptSelector
from langchain.prompts import PromptTemplate

DEFAULT_LLAMA_SEARCH_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""<<SYS>> 
    You are a helpful assistant eager to assist with providing better Google search results.
    <</SYS>> 
    
    [INST] Provide an answer to the following question in 150 words. Ensure that the answer is informative, \
            relevant, and concise:
            {question} 
    [/INST]""",
)

DEFAULT_SEARCH_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are a helpful assistant eager to assist with providing better Google search results. \
        Provide an answer to the following question in about 150 words. Ensure that the answer is informative, \
        relevant, and concise: \
        {question}""",
)

QUESTION_PROMPT_SELECTOR = ConditionalPromptSelector(
    default_prompt=DEFAULT_SEARCH_PROMPT,
    conditionals=[(lambda llm: isinstance(llm, LlamaCpp), DEFAULT_LLAMA_SEARCH_PROMPT)],
)

prompt = QUESTION_PROMPT_SELECTOR.get_prompt(llm)
prompt

PromptTemplate(input_variables=['question'], template='<<SYS>> \n    You are a helpful assistant eager to assist with providing better Google search results.\n    <</SYS>> \n    \n    [INST] Provide an answer to the following question in 150 words. Ensure that the answer is informative,             relevant, and concise:\n            {question} \n    [/INST]')

In [11]:
llm_chain = LLMChain(prompt=prompt, llm=llm)
question = "What is Taiwan known for?"
llm_chain.invoke({"question": question})

  warn_deprecated(

llama_print_timings:        load time =   12709.92 ms
llama_print_timings:      sample time =       0.23 ms /     1 runs   (    0.23 ms per token,  4310.34 tokens per second)
llama_print_timings: prompt eval time =   12709.33 ms /    84 tokens (  151.30 ms per token,     6.61 tokens per second)
llama_print_timings:        eval time =       0.00 ms /     1 runs   (    0.00 ms per token,      inf tokens per second)
llama_print_timings:       total time =   12711.26 ms /    85 tokens


{'question': 'What is Taiwan known for?', 'text': ''}

In [14]:
retriever = vectordb.as_retriever()

qa = RetrievalQA.from_chain_type(
    llm=llm, 
    chain_type="stuff", 
    retriever=retriever, 
    verbose=True
)

In [15]:
query = "Tell me about Alison Hawk's career and age"
qa.invoke(query)



[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit


 Alison Hawk is 28 years old.


llama_print_timings:        load time =   12709.92 ms
llama_print_timings:      sample time =       2.48 ms /    12 runs   (    0.21 ms per token,  4832.86 tokens per second)
llama_print_timings: prompt eval time =   26708.41 ms /   163 tokens (  163.86 ms per token,     6.10 tokens per second)
llama_print_timings:        eval time =    3634.54 ms /    11 runs   (  330.41 ms per token,     3.03 tokens per second)
llama_print_timings:       total time =   30358.80 ms /   174 tokens



[1m> Finished chain.[0m


{'query': "Tell me about Alison Hawk's career and age",
 'result': ' Alison Hawk is 28 years old.'}