# Imports

In [36]:
import os
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import ConversationalRetrievalChain
from langchain.memory import ConversationBufferMemory
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate

from load import *

# Model and Vectorstore Init

In [28]:
def initialize_chat_ollama():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    return ChatOllama(
        base_url="http://localhost:11434",
        model="qwen3",
        temperature=0.5,
        callback_manager=callback_manager
    )
chat_model = initialize_chat_ollama()

# Initialize Ollama Embeddings
def initialize_embeddings():
    return OllamaEmbeddings(
        base_url="http://localhost:11434",
        model="qwen3"
    )
embeddings = initialize_embeddings()

  chat_model = initialize_chat_ollama()


# Helper Funcions

## Load and process documents

In [29]:
def load_documents(file_paths):
    documents = []
    for file_path in file_paths:
        content = read_file(file_path)
        documents.append(Document(page_content=content, metadata={"source": file_path}))
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(documents)


## Creating a vector store


In [30]:
def create_vector_store(documents, embeddings):
    return Chroma.from_documents(documents, embeddings)

## Creating a RAG chain

In [37]:
def create_rag_chain_with_memory(chat_model, vector_store):
    system_template = "<|im_start|>system\nYou are a helpful AI assistant that provides clear and concise information based on the given context and chat history.<|im_end|>"
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    human_template = "<|im_start|>user\nContext: {context}\nChat History: {chat_history}\nQuestion: {question}<|im_end|>"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    assistant_template = "<|im_start|>assistant\n"
    chat_prompt = ChatPromptTemplate.from_messages([
        system_message_prompt,
        human_message_prompt,
        assistant_template
    ])

    memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)

    return ConversationalRetrievalChain.from_llm(
        llm=chat_model,
        retriever=vector_store.as_retriever(),
        memory=memory,
        combine_docs_chain_kwargs={"prompt": chat_prompt}
    )

# Checking if Qwen's Embeddings work

In [32]:
system_prompt = "You are a helpful AI assistant that provides clear and concise information."
user_prompt = "Write step by step the answer to 2x+3=7."
full_prompt = f'''<|im_start|>system
{system_prompt}<|im_end|>
<|im_start|>user
{user_prompt}<|im_end|>
<|im_start|>assistant
'''
print("Testing ChatOllama with prompt:")
print(full_prompt)
response = chat_model.predict(full_prompt)
print("\nChatOllama Response:")
print(response)

Testing ChatOllama with prompt:
<|im_start|>system
You are a helpful AI assistant that provides clear and concise information.<|im_end|>
<|im_start|>user
Write step by step the answer to 2x+3=7.<|im_end|>
<|im_start|>assistant

Certainly! Here is a step-by-step guide to solving the equation \(2x + 3 = 7\):

### Step 1: Subtract 3 from both sides of the equation.
This step aims to isolate the term containing the variable on one side of the equation.

\[2x + 3 - 3 = 7 - 3\]

Simplifying this, we get:

\[2x = 4\]

### Step 2: Divide both sides by 2.
This step isolates \(x\) and gives us its value.

\[\frac{2x}{2} = \frac{4}{2}\]

Simplifying this, we get:

\[x = 2\]

So the solution to the equation \(2x + 3 = 7\) is \(x = 2\).
ChatOllama Response:
Certainly! Here is a step-by-step guide to solving the equation \(2x + 3 = 7\):

### Step 1: Subtract 3 from both sides of the equation.
This step aims to isolate the term containing the variable on one side of the equation.

\[2x + 3 - 3 = 7 - 

In [33]:
file_paths = ["samples/sample.pptx", "samples/sample.txt"]
documents = load_documents(file_paths)

In [34]:
documents

[Document(metadata={'source': 'samples/sample.pptx'}, page_content='Various Image Segmentation Techniques:\nDifferent types of Image Segmentation Techniques\nThresholding technique segmentation\nHistogram based segmentation\nRegion based segmentation\nEdge based segmentation\nClustering based segmentation\nMorphological Transforms and\nTexture based segmentation approaches\nThresholding technique segmentation\nSegmentation algorithms based on thresholding approach are suitable for images where there is distinct difference between object and background.\nMain Goal: divide an image into two distinct regions (object and background) directly based on intensity values and their properties\nTypes: Global, Variable, Multiple'),
 Document(metadata={'source': 'samples/sample.pptx'}, page_content='Original coins image\n2)Histogram based segmentation\nHistogram of an image is a plot between intensity levels. \nDeep valleys are used to separate different peaks of histogram. \nHistogram peaks are t

In [35]:
vector_store = create_vector_store(documents, embeddings)

rag_chain = create_rag_chain(chat_model, vector_store)

rag_query = "What are the topics discussed in these documents? Write it in points"
rag_response = rag_chain.run(rag_query)
print(f"\nRAG Query: {rag_query}")
print(f"RAG Response:\n{rag_response}")

- Clustering-based segmentation techniques and their effectiveness for image processing.
- Types of clustering methods, with a focus on selecting the best proven to work well with image processing.
- Texture-based segmentation approaches, explaining how texture features help divide images into regions with similar characteristics.
- Morphological transforms-based segmentation, detailing operations like erosion, dilation, opening, closing, white and black tophat, skeletonize, and convex hull, which process images based on shapes rather than pixel intensities.
RAG Query: What are the topics discussed in these documents? Write it in points
RAG Response:
- Clustering-based segmentation techniques and their effectiveness for image processing.
- Types of clustering methods, with a focus on selecting the best proven to work well with image processing.
- Texture-based segmentation approaches, explaining how texture features help divide images into regions with similar characteristics.
- Morpho

In [39]:
rag_chain = create_rag_chain_with_memory(chat_model, vector_store)

queries = [
    "<|im_start|>What is the main topic of the documents?<|im_end|>",
    "<|im_start|>Can you provide more details about that?<|im_end|>",
    "<|im_start|>How does this relate to the previous information you gave me?<|im_end|>"
]

for i, query in enumerate(queries, 1):
    print(f"\nQuery {i}: {query}")
    response = rag_chain({"question": query})
    print(f"Response {i}: {response['answer']}")



Query 1: <|im_start|>What is the main topic of the documents?<|im_end|>
The main topics of these documents are related to testing a file reading function. The documents provide various sections such as overview, features, implementation details, and conclusions that all revolve around ensuring the functionality of a file reading test framework.Response 1: The main topics of these documents are related to testing a file reading function. The documents provide various sections such as overview, features, implementation details, and conclusions that all revolve around ensuring the functionality of a file reading test framework.

Query 2: <|im_start|>Can you provide more details about that?<|im_end|>
 Can you provide more details about the main topics of these documents related to testing a file reading function?Certainly! Here are more detailed breakdowns of the main topics in the documents:

1. **Overview**:
   - Purpose and scope of the document (testing a file reading function).
   - 