In [1]:
#from langchain_community.llms import Ollama
from langchain_ollama import OllamaLLM as Ollama, OllamaEmbeddings
from langchain_community.document_loaders import PyPDFLoader, TextLoader
from langchain.prompts import PromptTemplate
from langchain_community.vectorstores import DocArrayInMemorySearch
from langchain_community.embeddings import OllamaEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# for text splitting
from langchain_experimental.text_splitter import SemanticChunker
# import CharacterTextSplitter
from langchain_text_splitters import CharacterTextSplitter
# for embeddings
from langchain_huggingface import HuggingFaceEmbeddings
import torch

In [70]:
# ignore warnings
import warnings
warnings.filterwarnings("ignore")

In [71]:
# check if GPU is available
if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"
print(f"Using device: {device}")

Using device: cuda


### Setting up the embeddings

For augmenting our query with context information effectively, we can 
1. embed the source material in a semantically meaningful vector space -- i.e., represent blocks of text with numerical vectors.
2. then do the same for the query.
3. compare the query's vector representation agains all the vectors of the source material.
4. return the block of text that corresponds to most similar vector.

A practical issue with embedding using Ollama is that we need a different model for the embedding and the llm. If we use our local Ollama server for both, it has to load the 2 different models alternately. If both models are sufficiently small (compared to system memory), one way to deal with this is by running the embedding model in a separate container. Or we can use a Huggingface model for embedding and ollama for llm. There are still issues with running multiple models in parallel locally. 

Ideally we could use a remote endpoint for the llm and run the embedding locally -- embedding large text corpora over http connection is not a good idea. However, this would mean we'd have to purchase compute from some api service. 

In [None]:
# 1. Create the embeddings (OPTION 1, simplest)
embedding_model="all-minilm"#"mxbai-embed-small"
embeddings = OllamaEmbeddings(model=embedding_model)

In [72]:
# 2. Create the embeddings (OPTION 2, in a container)
# assuming we have a local Ollama container running on port 11435 (see launch_ollama.sh)
embedding_model="all-minilm"#"mxbai-embed-small"
embeddings = OllamaEmbeddings(base_url="http://localhost:11435",
                              model=embedding_model)

In [46]:
# 3. Create the embeddings (OPTION 3, locally using HuggingFace)
embedding_model ="thenlper/gte-small"# "sentence-transformers/all-mpnet-base-v2"
model_kwargs = {'device': device}
encode_kwargs = {'normalize_embeddings': False}
embeddings = HuggingFaceEmbeddings(
    model_name=embedding_model,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)

In [73]:
# 2. Create the llm (assumes we have Ollama running in a container on default port 11434)
llm = Ollama(model='llama3.2')

### Reading the document and loading it into a vector store

* loader loads the document, then splits it into chunks. I am using defaults here, but we could also get more control over the text splitting.
* We make a vector store from the documents, and index it for fast similarity search based on embedding vectors.
* finally we make a langchain `retriever` that can be directly invoked with queries -- it returns the most similar text block.

In [None]:
# 2. Load the context information and create a retriever
path = "./documents/"
#loader = TextLoader(path + "sample.txt")
loader = PyPDFLoader(path + "benjamini_yekutieli_fdr.pdf")

In [75]:
# defaults
pages = loader.load_and_split()
store = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)
retriever = store.as_retriever(search_kwargs={"k": 1}) # number of chunks to return

In [76]:
text_splitter = SemanticChunker(
    min_chunk_size=500,
    embeddings=embeddings,
)

In [32]:
text_splitter = CharacterTextSplitter(
    chunk_size=200,
    chunk_overlap=20,
    length_function=len,
)

In [89]:
text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    encoding_name="cl100k_base", chunk_size=500, chunk_overlap=20
)

In [90]:
pages = loader.load_and_split(text_splitter=text_splitter)
store = DocArrayInMemorySearch.from_documents(pages, embedding=embeddings)
retriever = store.as_retriever(search_kwargs={"k": 1}) # number of chunks to return

In [91]:
# 3. Create the prompt template
template = """
Answer the question based on the context provided if possible.
Context: {context}
Question: {question}
"""

In [92]:
prompt = PromptTemplate.from_template(template)
def format_docs(docs):
    return "\n".join(doc.page_content for doc in docs)

### Chaining the the operations

See: https://python.langchain.com/docs/versions/migrating_chains/

Chains are a useful abstraction for a pipeline of common operations that most LLM applications need. 

Here we are:

1. invoking the retriever with our query, and it's returning most relevant context (with metadata as well, which we don't use here)
2. formatting the retrieved context (keeping just the text)
3. mapping the query and context into the prompt template.
4. passing the formatted prompt to llm
5. formatting the output with `StrOutputParser()`

In [80]:
res=retriever.invoke("idea of fdr control") 

In [81]:
res[0].metadata

{'producer': 'Acrobat Distiller 4.05 for Windows',
 'creator': 'DVIPSONE (32) 2.1.3 http://www.YandY.com',
 'creationdate': 'D:20011203123023',
 'title': 'FRANJIMSAOS\x029-4AOS260',
 'author': 'Dr. Mirko Janc (Tech Typeset) 427 1999 Feb 15 15:33:29',
 'subject': 'TeX output 2001.11.15:0819',
 'source': 'benjamini_yekutieli_fdr.pdf',
 'total_pages': 24,
 'page': 4,
 'page_label': '5'}

In [82]:
format_docs(res)

'CONTROLLING THE FDR UNDER DEPENDENCY 1169\nIn Section 2 we discuss in more detail the FDR criterion, the historical\nbackground of the procedure and available results and review the relevant\nnotions of positive dependency. This section can be consulted as needed. In\nSection 3 we outline some important problems where it is natural to assume\nthat the conditions of Theorem 1.2 hold. In Section 4 we prove the theorem. In the course of the proof we provide an explicit expression for the FDR, from\nwhich many more new properties can be derived, both for the independent and\nthe dependent cases. Thus issues such as discrete test statistics, composite\nnull hypotheses, general step-up procedures and general dependency can be\naddressed. This is done in Section 5. In particular we prove there the following\ntheorem. Theorem 1.3. When the Benjamini Hochberg procedure is conducted with\nq//lparenOSCASB∑ m\ni=1\n1\ni/rparenOSCASBtaking the place of qin (1), it always controls the FDR at level 

In [66]:
#prompt.invoke({"context": format_docs(res), "question": "idea of fdr control?"}).text.splitlines()

In [83]:
retriever.invoke("What is the FDR?")

[Document(metadata={'producer': 'Acrobat Distiller 4.05 for Windows', 'creator': 'DVIPSONE (32) 2.1.3 http://www.YandY.com', 'creationdate': 'D:20011203123023', 'title': 'FRANJIMSAOS\x029-4AOS260', 'author': 'Dr. Mirko Janc (Tech Typeset) 427 1999 Feb 15 15:33:29', 'subject': 'TeX output 2001.11.15:0819', 'source': 'benjamini_yekutieli_fdr.pdf', 'total_pages': 24, 'page': 4, 'page_label': '5'}, page_content='CONTROLLING THE FDR UNDER DEPENDENCY 1169\nIn Section 2 we discuss in more detail the FDR criterion, the historical\nbackground of the procedure and available results and review the relevant\nnotions of positive dependency. This section can be consulted as needed. In\nSection 3 we outline some important problems where it is natural to assume\nthat the conditions of Theorem 1.2 hold. In Section 4 we prove the theorem. In the course of the proof we provide an explicit expression for the FDR, from\nwhich many more new properties can be derived, both for the independent and\nthe depend

In [84]:
(
    {
        'context': retriever | format_docs,
        'question': RunnablePassthrough(),
    }
    | prompt
).invoke("What is the FDR?")

StringPromptValue(text='\nAnswer the question based on the context provided if possible.\nContext: CONTROLLING THE FDR UNDER DEPENDENCY 1169\nIn Section 2 we discuss in more detail the FDR criterion, the historical\nbackground of the procedure and available results and review the relevant\nnotions of positive dependency. This section can be consulted as needed. In\nSection 3 we outline some important problems where it is natural to assume\nthat the conditions of Theorem 1.2 hold. In Section 4 we prove the theorem. In the course of the proof we provide an explicit expression for the FDR, from\nwhich many more new properties can be derived, both for the independent and\nthe dependent cases. Thus issues such as discrete test statistics, composite\nnull hypotheses, general step-up procedures and general dependency can be\naddressed. This is done in Section 5. In particular we prove there the following\ntheorem. Theorem 1.3. When the Benjamini Hochberg procedure is conducted with\nq//lparen

**Now chaining all the operations into one chai:**

In [None]:
chain = (
    {
        'context': retriever | format_docs,
        'question': RunnablePassthrough(),
    }
    | prompt
    | llm
    | StrOutputParser() # or JsonOutputParser() etc. for formatted output depending on the LLM used and the need
)

**invoking the chain with query/question:**

In [98]:
response=chain.invoke(
    "implement fdr control in multiple testing"
)

**and viewing the output:**

In [99]:
response.splitlines()

['Based on the context provided, to implement FDR control in multiple testing, you would need to follow these general steps:',
 '',
 '1. **Determine the number of independent tests**: Identify the total number of hypotheses being tested (R).',
 '',
 '2. **Compute the test statistic**: For each test, calculate a test statistic that determines whether a hypothesis is rejected.',
 '',
 '3. **Calculate the proportion of true null hypotheses rejected**: Calculate V/R, where V is the number of true null hypotheses rejected and R is the total number of hypotheses rejected.',
 '',
 '4. **Calculate the FDR value**: Calculate the FDR (False Discovery Rate) as E[Q], where Q = V/R if R > 0, or 0 otherwise.',
 '',
 '5. **Choose a significance level q**: Choose an FDR level q that you want to control, typically at a conventional level for α (e.g., 0.05).',
 '',
 '6. **Implement the step-up procedure**: Implement the step-up procedure, which rejects all hypotheses with a p-value less than or equal to