In [None]:
!pip install langchain llama-cpp-python faiss-cpu pypdf sentence-transformers langchain_community


In [None]:
from langchain.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.llms import LlamaCpp
from langchain.chains import RetrievalQA




In [None]:
loader = PyPDFLoader("/your-pdf-file.pdf")
pages = loader.load()
len(pages)

2

In [22]:

splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = splitter.split_documents(pages)
print(f"Split into {len(docs)} chunks.")


Split into 4 chunks.


In [23]:


embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = FAISS.from_documents(docs, embeddings)
retriever = db.as_retriever()

In [None]:

llm = LlamaCpp(
    model_path="/mistral-7b-instruct-v0.1.Q2_K.gguf",  # Make sure path is correct
    n_ctx=2048,
    n_batch=512,
    n_threads=8,
    temperature=0.7,
    verbose=True,
)


llama_model_loader: loaded meta data with 20 key-value pairs and 291 tensors from ../personal/mistral-7b-instruct-v0.1.Q2_K.gguf (version GGUF V2)
llama_model_loader: Dumping metadata keys/values. Note: KV overrides do not apply in this output.
llama_model_loader: - kv   0:                       general.architecture str              = llama
llama_model_loader: - kv   1:                               general.name str              = mistralai_mistral-7b-instruct-v0.1
llama_model_loader: - kv   2:                       llama.context_length u32              = 32768
llama_model_loader: - kv   3:                     llama.embedding_length u32              = 4096
llama_model_loader: - kv   4:                          llama.block_count u32              = 32
llama_model_loader: - kv   5:                  llama.feed_forward_length u32              = 14336
llama_model_loader: - kv   6:                 llama.rope.dimension_count u32              = 128


llama_model_loader: - kv   7:                 llama.attention.head_count u32              = 32
llama_model_loader: - kv   8:              llama.attention.head_count_kv u32              = 8
llama_model_loader: - kv   9:     llama.attention.layer_norm_rms_epsilon f32              = 0.000010
llama_model_loader: - kv  10:                       llama.rope.freq_base f32              = 10000.000000
llama_model_loader: - kv  11:                          general.file_type u32              = 10
llama_model_loader: - kv  12:                       tokenizer.ggml.model str              = llama
llama_model_loader: - kv  13:                      tokenizer.ggml.tokens arr[str,32000]   = ["<unk>", "<s>", "</s>", "<0x00>", "<...
llama_model_loader: - kv  14:                      tokenizer.ggml.scores arr[f32,32000]   = [0.000000, 0.000000, 0.000000, 0.0000...
llama_model_loader: - kv  15:                  tokenizer.ggml.token_type arr[i32,32000]   = [2, 3, 3, 6, 6, 6, 6, 6, 6, 6, 6, 6, ...
llama_model_l

In [25]:

qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)
response = qa_chain.run("History and Development from this this PDF?")
print(response)


llama_perf_context_print:        load time =   84615.90 ms
llama_perf_context_print: prompt eval time =   84614.92 ms /   663 tokens (  127.62 ms per token,     7.84 tokens per second)
llama_perf_context_print:        eval time =   11931.75 ms /    55 runs   (  216.94 ms per token,     4.61 tokens per second)
llama_perf_context_print:       total time =   96630.59 ms /   718 tokens


 Offshore drilling began in the late 19th century in shallow waters, evolving dramatically in the mid-20th century with the introduction of floating rigs and deepwater technology. Today, offshore operations extend into ultra-deep waters using advanced techniques.
