INSTALLATIONS

In [None]:
pip install chromadb==0.4.13 InstructorEmbedding==1.0.1 langchain==0.0.305 sentence-transformers==2.2.2 torch==2.0.1 llama-cpp-python==0.2.11 pypdf langchainhub

IMPORTS

In [None]:
import os
import torch
from chromadb.config import Settings

In [None]:
import logging
from langchain.document_loaders import DirectoryLoader, PDFMinerLoader, PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import SentenceTransformerEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import Chroma

In [None]:
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.chains import RetrievalQAWithSourcesChain
from langchain.llms import LlamaCpp
from langchain.callbacks.manager import CallbackManager
from langchain.vectorstores import Chroma
from langchain.prompts import PromptTemplate
from huggingface_hub import hf_hub_download

GOOGLE DRIVE FOR SOURCE, MODEL, VECTOR DATABASE PATHS

In [None]:
from google.colab import drive
drive.mount('/content/drive')
parent_path = "/content/drive/MyDrive/Langchain project"
source_path = os.path.join(parent_path, "documents")
model_directory = os.path.join(parent_path, "models")
vectordb_path = os.path.join(parent_path, "db")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
if not os.path.exists(source_path):
  os.makedirs(source_path)
if not os.path.exists(model_directory):
  os.makedirs(model_directory)
if not os.path.exists(vectordb_path):
  os.makedirs(vectordb_path)

In [None]:
EMBEDDING_MODEL = "all-MiniLM-L12-v2"
MODEL_NAME = "TheBloke/Mistral-7B-Instruct-v0.1-GGUF"
MODEL_FILE = "mistral-7b-instruct-v0.1.Q4_K_M.gguf"
# MODEL CONFIG
MAX_TOKEN_LENGTH = 4094 # 8192 is the max for Mistral-7B
N_GPU_LAYERS = 40

In [None]:
# PYTORCH DEVICE COMPATIBILITY
if torch.cuda.is_available():
    DEVICE_TYPE = "cuda"
else:
    DEVICE_TYPE = "cpu"

In [None]:
# DATABASE SETTINGS
CHROMA_SETTINGS = Settings(
    anonymized_telemetry=False,
    is_persistent=True,
)

LOADING DOCUMENT

In [None]:
def load_docs(directory: str = source_path):
    """
    Loads documents from a specified directory.

    Args:
        directory (str): The directory path containing PDF documents.

    Returns:
        list: A list of loaded documents.
    """
    loader = DirectoryLoader(directory, glob="**/*.pdf", use_multithreading=True, loader_cls= PyPDFLoader)
    docs = loader.load()
    logging.info(f"Loaded {len(docs)} documents from {directory}")
    print(f"Loaded {len(docs)} documents from {directory}")
    return docs

In [None]:
documents = load_docs(source_path)

Loaded 718 documents from /content/drive/MyDrive/Langchain project/documents


SPLITTING DOCUMENT INTO CHUNKS

In [None]:
def split_docs(documents,chunk_size=1000,chunk_overlap=10):
    """
    Splits documents into smaller chunks for processing.

    Args:
        documents (list): List of documents to be split.
        chunk_size (int): The size of each chunk.
        chunk_overlap (int): The overlap between adjacent chunks.

    Returns:
        list: List of split documents.
    """
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    logging.info(f"Split {len(documents)} documents into chunks")
    print(f"Split {len(documents)} documents into {len(docs)} chunks")
    return docs

In [None]:
docs = split_docs(documents)

Split 718 documents into 1734 chunks


BUILDING VECTOR DATABASE OF VECTOR EMBEDDINGS OF ALL CHUNKS

In [None]:
def builder(docs):

    embeddings = SentenceTransformerEmbeddings(
        model_name=EMBEDDING_MODEL,
        model_kwargs={"device": DEVICE_TYPE},
        cache_folder=model_directory
    )
    db = Chroma.from_documents(
        docs,
        embeddings,
        persist_directory= vectordb_path,
        client_settings=CHROMA_SETTINGS,

    )
    logging.info(f"Loaded Documents to Chroma DB Successfully")
    print(f"Loaded Documents to Chroma DB Successfully")
    return db

In [None]:
DB = builder(docs)

Loaded Documents to Chroma DB Successfully


BUILD LOCAL LLM (LLAMA2)

In [None]:
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.llms import LlamaCpp

In [None]:
n_gpu_layers = 1  # Metal set to 1 is enough.
n_batch = 1  # Should be between 1 and n_ctx, consider the amount of RAM of your Apple Silicon Chip.
callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])

# Make sure the model path is correct for your system!
llm = LlamaCpp(
    model_path="/content/drive/MyDrive/Langchain project/models/models--TheBloke--Mistral-7B-Instruct-v0.1-GGUF/snapshots/731a9fc8f06f5f5e2db8a0cf9d256197eb6e05d1/mistral-7b-instruct-v0.1.Q4_K_M.gguf",
    n_gpu_layers=n_gpu_layers,
    n_batch=n_batch,
    n_ctx=2048,
    f16_kv=True,  # MUST set to True, otherwise you will run into problem after a couple of calls
    callback_manager=callback_manager,
    verbose=True,
)

AVX = 1 | AVX2 = 1 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | SSSE3 = 1 | VSX = 0 | 


RETRIEVAL

In [None]:
retriever=DB.as_retriever()
retriever.get_relevant_documents("Pan Tompkins algorithm")

[Document(page_content='594 PATTERN CLASSIFICATION AND DIAGNOSTIC DECISION\nykis calculated as\nyk=f\uf8eb\n\uf8edJ∑\nj=1w#\njkx#\nj−θ#\nk\uf8f6\n\uf8f8, k= 1,2,...,K, (9.74)\nwhere\nx#\nj=f(I∑\ni=1wijxi−θj)\n, j= 1,2,...,J, (9.75)\nand\nf(β) =1\n1 + exp( −β). (9.76)\n1\n2\nI1\n2\nJ1\n2\nK\n Input layer Hidden layer Output layerw wjk ij\ni j k##\nx x y\nFigure 9.4 A two\xadlayer perceptron.\nIn the equations given above, θjandθ#\nkare node offsets; wijandw#\njkare node\nweights;xiare the elements of the pattern vectors (input parameters); and I,J, and\nKare the numbers of nodes in the input, hidden, and output layers, respectively. The\nweights and offsets are updated by\nw#\njk(n+1) =w#\njk(n)+η[yk(1−yk)(dk−yk)]x#\nj+α[w#\njk(n)−w#\njk(n−1)],(9.77)', metadata={'page': 637, 'source': '/content/drive/MyDrive/Langchain/documents/(IEEE Press series in biomedical engineering) Rangayyan, Rangaraj M - Biomedical Signal Analysis-Wiley (2015)_221104_153033 (1).pdf'}),
 Document(page_content='5

RETRIEVAL QA CHAIN

In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

Template = """Use the following pieces of context to answer the question at the end.
Use 5 sentences maximum and keep the answer as technical as possible.
Always say "thanks for asking!" at the end of the answer.
{context}
Question: {question}
Helpful Answer:"""

rag_prompt_custom = PromptTemplate.from_template(template = Template)
chain_type_kwargs ={"prompt": rag_prompt_custom}

In [None]:
qa_chain = RetrievalQA.from_chain_type(
    llm,
    retriever=DB.as_retriever(),
    chain_type="stuff", chain_type_kwargs = chain_type_kwargs, return_source_documents=False, verbose = True)

In [None]:
output = qa_chain.run("What is adaptive thresholding?")



[1m> Entering new RetrievalQA chain...[0m


Llama.generate: prefix-match hit



Adaptive thresholding is a process that involves setting the detection threshold dynamically in response to changes in the input signal. In this case, the input signal is the segmented signal, which is generated by an AR system. The reference signal is the same as the primary input signal, but delayed by 7 samples (3.5 ms), and applied at the adaptive filter. The adaptive filter then acts as an adaptive AR model, continuously adapting its tap-weight vector to changes in the statistics of the input signal, in order to minimize the prediction error. Significant changes in the tap-weight vector or the prediction error may be used to mark points of prominent segmentation.
[1m> Finished chain.[0m


In [None]:
print(output)


Adaptive thresholding is a process that involves setting the detection threshold dynamically in response to changes in the input signal. In this case, the input signal is the segmented signal, which is generated by an AR system. The reference signal is the same as the primary input signal, but delayed by 7 samples (3.5 ms), and applied at the adaptive filter. The adaptive filter then acts as an adaptive AR model, continuously adapting its tap-weight vector to changes in the statistics of the input signal, in order to minimize the prediction error. Significant changes in the tap-weight vector or the prediction error may be used to mark points of prominent segmentation.
