In [1]:
from langchain_ollama.chat_models import ChatOllama
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage, AIMessage
import os
from typing import Literal
from datetime import datetime
from pydantic import BaseModel


OLLAMA_CLIENT_BASE_URL = "http://localhost:11434"

# --- Initialize ChatOllama instance ---
# This is your main LLM instance that will be used for both direct queries and tool calls.
llm = ChatOllama(
    model="llama3.1:latest", # Ensure this model is pulled on your remote Ollama server
    temperature=0.0,
    base_url=OLLAMA_CLIENT_BASE_URL, # Crucial: points to your accessible remote server (via tunnel) [1, 2]
    api_key="ollama" # Dummy value, as Ollama doesn't use API keys
)

In [2]:
llm.invoke('test')

AIMessage(content='It looks like you just tested the chat! Is there anything else I can help with?', additional_kwargs={}, response_metadata={'model': 'llama3.1:latest', 'created_at': '2025-08-05T14:40:19.930809539Z', 'done': True, 'done_reason': 'stop', 'total_duration': 308796950, 'load_duration': 23694160, 'prompt_eval_count': 11, 'prompt_eval_duration': 15151909, 'eval_count': 19, 'eval_duration': 269191839, 'model_name': 'llama3.1:latest'}, id='run--19711994-95dd-4b9f-8890-2013cddd8566-0', usage_metadata={'input_tokens': 11, 'output_tokens': 19, 'total_tokens': 30})

## Text splitters

In [3]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

def text_splitter(data, chunk_size, chunk_overlap):
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len,
    )
    chunks = text_splitter.split_documents(data)
    return chunks

### Create the embedding model

In [4]:
from langchain.embeddings import HuggingFaceEmbeddings
model_path = "./models/"
embedding_model = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    cache_folder=model_path,
    model_kwargs={'device': 'cpu'}  # or 'cuda' if you have GPU
)

  embedding_model = HuggingFaceEmbeddings(
  from .autonotebook import tqdm as notebook_tqdm


In [5]:
embedding = embedding_model.embed_query("Your text here")
print(embedding[:10])
len(embedding)

[0.04398941248655319, -0.005811411887407303, -0.024250555783510208, 0.001176159013994038, 0.0230423491448164, -0.07438800483942032, 0.046700119972229004, 0.0926041379570961, 0.09619280695915222, 0.014831428416073322]


  return forward_call(*args, **kwargs)


384

In [6]:
!wget "https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/MZ9z1lm-Ui3YBp3SYWLTAQ/companypolicies.txt"
from langchain_community.document_loaders import TextLoader
loader = TextLoader("companypolicies.txt")
txt_data = loader.load()

--2025-08-05 16:40:27--  https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/MZ9z1lm-Ui3YBp3SYWLTAQ/companypolicies.txt
Resolving cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud (cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud)... 

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


169.63.118.104
Connecting to cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud (cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud)|169.63.118.104|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 15660 (15K) [text/plain]
Saving to: ‘companypolicies.txt.1’


2025-08-05 16:40:28 (213 MB/s) - ‘companypolicies.txt.1’ saved [15660/15660]



In [7]:
txt_data



In [8]:
chunks_txt = text_splitter(txt_data, 200, 20)

In [9]:
from langchain.vectorstores import Chroma
vectordb = Chroma.from_documents(chunks_txt, embedding_model)

  return forward_call(*args, **kwargs)


## Simple similarity search

In [10]:
query = "email policy"
retriever = vectordb.as_retriever()
docs = retriever.invoke(query)
docs

  return forward_call(*args, **kwargs)


[Document(metadata={'source': 'companypolicies.txt'}, page_content='3.\tInternet and Email Policy'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Our Internet and Email Policy aims to promote safe, responsible usage of digital communication tools that align with our values and legal obligations. Each employee is expected to understand and'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Our Internet and Email Policy is established to guide the responsible and secure use of these essential tools within our organization. We recognize their significance in daily business operations and'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Confidentiality: Reserve email for the transmission of confidential information, trade secrets, and sensitive customer data only when encryption is applied. Exercise discretion when discussing')]

In [11]:
retriever = vectordb.as_retriever(search_kwargs={"k": 1})
docs = retriever.invoke(query)
docs

  return forward_call(*args, **kwargs)


[Document(metadata={'source': 'companypolicies.txt'}, page_content='3.\tInternet and Email Policy')]

## MMR Search

In [12]:
retriever = vectordb.as_retriever(search_type="mmr")
docs = retriever.invoke(query)
docs

  return forward_call(*args, **kwargs)


[Document(metadata={'source': 'companypolicies.txt'}, page_content='3.\tInternet and Email Policy'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Confidentiality: Reserve email for the transmission of confidential information, trade secrets, and sensitive customer data only when encryption is applied. Exercise discretion when discussing'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Review of Policy: This policy will be reviewed periodically to ensure its alignment with evolving legal requirements and best practices for maintaining a healthy and safe workplace.'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='individual found to be in violation of this policy.')]

In [13]:
retriever = vectordb.as_retriever(
    search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.4}
)
docs = retriever.invoke(query)
docs

  return forward_call(*args, **kwargs)


[Document(metadata={'source': 'companypolicies.txt'}, page_content='3.\tInternet and Email Policy'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Our Internet and Email Policy aims to promote safe, responsible usage of digital communication tools that align with our values and legal obligations. Each employee is expected to understand and'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Our Internet and Email Policy is established to guide the responsible and secure use of these essential tools within our organization. We recognize their significance in daily business operations and'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='Confidentiality: Reserve email for the transmission of confidential information, trade secrets, and sensitive customer data only when encryption is applied. Exercise discretion when discussing')]

### Multi-Query Retriever

In [14]:
from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader("https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/ioch1wsxkfqgfLLgmd-6Rw/langchain-paper.pdf")
pdf_data = loader.load()

In [15]:
pdf_data[1]

Document(metadata={'producer': 'PyPDF', 'creator': 'Microsoft Word', 'creationdate': '2023-12-31T03:50:13+00:00', 'author': 'IEEE', 'moddate': '2023-12-31T03:52:06+00:00', 'title': 's8329 final', 'source': 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/ioch1wsxkfqgfLLgmd-6Rw/langchain-paper.pdf', 'total_pages': 6, 'page': 1, 'page_label': '2'}, page_content='LangChain helps us to unlock the ability to harness the \nLLM’s immense potential in tasks such as document analysis, \nchatbot development, code analysis, and countless other \napplications. Whether your desire is to unlock deeper natural \nlanguage understanding , enhance data, or circumvent \nlanguage barriers through translation, LangChain is ready to \nprovide the tools and programming support you need to do \nwithout it that it is not only difficult but also fresh for you. Its \ncore functionalities encompass: \n1. Context-Aware Capabilities: LangChain facilitates the \ndevelopment of applications that ar

In [16]:
# Split
chunks_pdf = text_splitter(pdf_data, 500, 20)

# VectorDB
ids = vectordb.get()["ids"]# We need to delete existing embeddings from previous documents and then store current document embeddings in.
vectordb = Chroma.from_documents(documents=chunks_pdf, embedding=embedding_model)

In [17]:
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain_core.messages import HumanMessage

query = "What does the paper say about langchain?"

retriever = MultiQueryRetriever.from_llm(
    retriever=vectordb.as_retriever(), llm=llm
)

# Pass the query to the retriever
docs = retriever.invoke(query)

# Log the generated queries (if supported by the retriever)
for i, doc in enumerate(docs):
    print(f"Generated Query {i + 1}: {doc.page_content}")

docs

Generated Query 1: 1.	Code of Conduct
Generated Query 2: any potential violations of this code and support the investigation of such matters.
Generated Query 3: 2.	Recruitment Policy
Generated Query 4: question (Fig. 4b). 
• MindGuide Chatbot's AI response to the 
subsequent human message, followed by another 
mental health question from the human (Fig. 4c). 
• MindGuide Chatbot's AI response after 
analyzing the latest human message (Fig. 4d). 
 
   s 
                                                         (a)      (b) 
      
                                                         (c)      (d)
Generated Query 5: LangChain helps us to unlock the ability to harness the 
LLM’s immense potential in tasks such as document analysis, 
chatbot development, code analysis, and countless other 
applications. Whether your desire is to unlock deeper natural 
language understanding , enhance data, or circumvent 
language barriers through translation, LangChain is ready to 
provide the tools and

  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


[Document(metadata={'source': 'companypolicies.txt'}, page_content='1.\tCode of Conduct'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='any potential violations of this code and support the investigation of such matters.'),
 Document(metadata={'source': 'companypolicies.txt'}, page_content='2.\tRecruitment Policy'),
 Document(metadata={'producer': 'PyPDF', 'creationdate': '2023-12-31T03:50:13+00:00', 'author': 'IEEE', 'moddate': '2023-12-31T03:52:06+00:00', 'creator': 'Microsoft Word', 'total_pages': 6, 'source': 'https://cf-courses-data.s3.us.cloud-object-storage.appdomain.cloud/ioch1wsxkfqgfLLgmd-6Rw/langchain-paper.pdf', 'page': 4, 'page_label': '5', 'title': 's8329 final'}, page_content="question (Fig. 4b). \n• MindGuide Chatbot's AI response to the \nsubsequent human message, followed by another \nmental health question from the human (Fig. 4c). \n• MindGuide Chatbot's AI response after \nanalyzing the latest human message (Fig. 4d). \n \n   s \n            

## Self Querying Rertrievers

In [18]:
!pip install lark

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [19]:
from langchain_core.documents import Document
from langchain.chains.query_constructor.base import AttributeInfo
from langchain.retrievers.self_query.base import SelfQueryRetriever
from lark import lark

In [20]:
docs = [
    Document(
        page_content="A bunch of scientists bring back dinosaurs and mayhem breaks loose",
        metadata={"year": 1993, "rating": 7.7, "genre": "science fiction"},
    ),
    Document(
        page_content="Leo DiCaprio gets lost in a dream within a dream within a dream within a ...",
        metadata={"year": 2010, "director": "Christopher Nolan", "rating": 8.2},
    ),
    Document(
        page_content="A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea",
        metadata={"year": 2006, "director": "Satoshi Kon", "rating": 8.6},
    ),
    Document(
        page_content="A bunch of normal-sized women are supremely wholesome and some men pine after them",
        metadata={"year": 2019, "director": "Greta Gerwig", "rating": 8.3},
    ),
    Document(
        page_content="Toys come alive and have a blast doing so",
        metadata={"year": 1995, "genre": "animated"},
    ),
    Document(
        page_content="Three men walk into the Zone, three men walk out of the Zone",
        metadata={
            "year": 1979,
            "director": "Andrei Tarkovsky",
            "genre": "thriller",
            "rating": 9.9,
        },
    ),
]

In [21]:
metadata_field_info = [
    AttributeInfo(
        name="genre",
        description="The genre of the movie. One of ['science fiction', 'comedy', 'drama', 'thriller', 'romance', 'action', 'animated']",
        type="string",
    ),
    AttributeInfo(
        name="year",
        description="The year the movie was released",
        type="integer",
    ),
    AttributeInfo(
        name="director",
        description="The name of the movie director",
        type="string",
    ),
    AttributeInfo(
        name="rating", description="A 1-10 rating for the movie", type="float"
    ),
]

In [22]:
vectordb = Chroma.from_documents(docs, embedding_model)

In [23]:
document_content_description = "Brief summary of a movie."

retriever = SelfQueryRetriever.from_llm(
    llm,
    vectordb,
    document_content_description,
    metadata_field_info,
)

In [24]:
retriever.invoke("I want to watch a movie rated higher than 8.5")

  return forward_call(*args, **kwargs)


[Document(metadata={'rating': 8.6, 'year': 2006, 'director': 'Satoshi Kon'}, page_content='A psychologist / detective gets lost in a series of dreams within dreams within dreams and Inception reused the idea'),
 Document(metadata={'year': 1979, 'genre': 'thriller', 'director': 'Andrei Tarkovsky', 'rating': 9.9}, page_content='Three men walk into the Zone, three men walk out of the Zone')]

In [27]:
retriever.invoke("Has Greta Gerwig directed any movies about women")

  return forward_call(*args, **kwargs)


[]

In [29]:
retriever.invoke("Whats a highly rated (above 8.5) science fiction film?")

  return forward_call(*args, **kwargs)


[]

### Parent Document Retriever

When splitting documents for retrieval, there are often conflicting desires:

1. You may want to have small documents so that their embeddings can most accurately reflect their meaning. If the documents are too long, the embeddings can lose meaning.
2. You want to have long enough documents so that the context of each chunk is retained.

The `ParentDocumentRetriever` strikes that balance by splitting and storing small chunks of data. During retrieval, it first fetches the small chunks but then looks up the parent IDs for those chunks and returns those larger documents.


In [30]:
from langchain.retrievers import ParentDocumentRetriever
from langchain_text_splitters import CharacterTextSplitter
from langchain.storage import InMemoryStore

In [31]:
# Set two splitters. One is with big chunk size (parent) and one is with small chunk size (child)
parent_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator='\n')
child_splitter = CharacterTextSplitter(chunk_size=200, chunk_overlap=20, separator='\n')

In [33]:
vectordb = Chroma(
    collection_name="split_parents", embedding_function=embedding_model
)
#vectordb = Chroma.from_documents(documents=chunks_pdf, embedding=watsonx_embedding())
# The storage layer for the parent documents
store = InMemoryStore()

  vectordb = Chroma(


In [34]:
retriever = ParentDocumentRetriever(
    vectorstore=vectordb,
    docstore=store,
    child_splitter=child_splitter,
    parent_splitter=parent_splitter,
)

In [38]:
retriever.add_documents(txt_data)
len(list(store.yield_keys()))
sub_docs = vectordb.similarity_search("smoking policy")
print(sub_docs[0].page_content)
retrieved_docs = retriever.invoke("smoking policy")
print(retrieved_docs[0].page_content)

Created a chunk of size 223, which is longer than the specified 200
Created a chunk of size 274, which is longer than the specified 200
Created a chunk of size 262, which is longer than the specified 200
Created a chunk of size 282, which is longer than the specified 200
Created a chunk of size 262, which is longer than the specified 200
Created a chunk of size 270, which is longer than the specified 200
Created a chunk of size 224, which is longer than the specified 200
Created a chunk of size 325, which is longer than the specified 200
Created a chunk of size 300, which is longer than the specified 200
Created a chunk of size 216, which is longer than the specified 200
Created a chunk of size 226, which is longer than the specified 200
Created a chunk of size 235, which is longer than the specified 200
Created a chunk of size 300, which is longer than the specified 200
Created a chunk of size 294, which is longer than the specified 200
Created a chunk of size 234, which is longer tha

5.	Smoking Policy
5.	Smoking Policy
Policy Purpose: The Smoking Policy has been established to provide clear guidance and expectations concerning smoking on company premises. This policy is in place to ensure a safe and healthy environment for all employees, visitors, and the general public.
Designated Smoking Areas: Smoking is only permitted in designated smoking areas, as marked by appropriate signage. These areas have been chosen to minimize exposure to secondhand smoke and to maintain the overall cleanliness of the premises.
Smoking Restrictions: Smoking inside company buildings, offices, meeting rooms, and other enclosed spaces is strictly prohibited. This includes electronic cigarettes and vaping devices.
Compliance with Applicable Laws: All employees and visitors must adhere to relevant federal, state, and local smoking laws and regulations.


  return forward_call(*args, **kwargs)
