# Imports

In [9]:
import os
from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_core.callbacks import CallbackManager, StreamingStdOutCallbackHandler
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains import RetrievalQA
from langchain.schema import Document
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, SystemMessagePromptTemplate

from load import *

# Model and Vectorstore Init

In [25]:
def initialize_chat_ollama():
    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
    return ChatOllama(
        base_url="http://localhost:11434",
        model="qwen3",
        temperature=0.5,
        callback_manager=callback_manager
    )
chat_model = initialize_chat_ollama()

# Initialize Ollama Embeddings
def initialize_embeddings():
    return OllamaEmbeddings(
        base_url="http://localhost:11434",
        model="qwen3"
    )
embeddings = initialize_embeddings()

  chat_model = initialize_chat_ollama()


# Helper Funcions

## Load and process documents

In [None]:
def load_documents(file_paths):
    documents = []
    for file_path in file_paths:
        content = read_file(file_path)
        documents.append(Document(page_content=content, metadata={"source": file_path}))
    text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
    return text_splitter.split_documents(documents)


In [17]:

def create_vector_store(documents, embeddings):
    return Chroma.from_documents(documents, embeddings)

# Create RAG chain
def create_rag_chain(chat_model, vector_store):
    system_template = "<|im_start|>system\nYou are a helpful AI assistant that provides clear and concise information based on the given context.<|im_end|>"
    system_message_prompt = SystemMessagePromptTemplate.from_template(system_template)
    human_template = "<|im_start|>user\nContext: {context}\n\nQuestion: {question}<|im_end|>"
    human_message_prompt = HumanMessagePromptTemplate.from_template(human_template)
    assistant_template = "<|im_start|>assistant\n"
    chat_prompt = ChatPromptTemplate.from_messages([
        system_message_prompt,
        human_message_prompt,
        assistant_template
    ])

    return RetrievalQA.from_chain_type(
        llm=chat_model,
        chain_type="stuff",
        retriever=vector_store.as_retriever(),
        chain_type_kwargs={"prompt": chat_prompt}
    )

# Checking if Qwen's Embeddings work

In [13]:
system_prompt = "You are a helpful AI assistant that provides clear and concise information."
user_prompt = "Write step by step the answer to 2x+3=7."
full_prompt = f'''<|im_start|>system
{system_prompt}<|im_end|>
<|im_start|>user
{user_prompt}<|im_end|>
<|im_start|>assistant
'''
print("Testing ChatOllama with prompt:")
print(full_prompt)
response = chat_model.predict(full_prompt)
print("\nChatOllama Response:")
print(response)

Testing ChatOllama with prompt:
<|im_start|>system
You are a helpful AI assistant that provides clear and concise information.<|im_end|>
<|im_start|>user
Write step by step the answer to 2x+3=7.<|im_end|>
<|im_start|>assistant



  response = chat_model.predict(full_prompt)


Sure, let's solve the equation \(2x + 3 = 7\) step by step:

### Step 1: Subtract 3 from both sides of the equation.
\[2x + 3 - 3 = 7 - 3\]
This simplifies to:
\[2x = 4\]

### Step 2: Divide both sides by 2.
\[\frac{2x}{2} = \frac{4}{2}\]
This simplifies to:
\[x = 2\]

So, the solution to the equation \(2x + 3 = 7\) is \(x = 2\).
ChatOllama Response:
Sure, let's solve the equation \(2x + 3 = 7\) step by step:

### Step 1: Subtract 3 from both sides of the equation.
\[2x + 3 - 3 = 7 - 3\]
This simplifies to:
\[2x = 4\]

### Step 2: Divide both sides by 2.
\[\frac{2x}{2} = \frac{4}{2}\]
This simplifies to:
\[x = 2\]

So, the solution to the equation \(2x + 3 = 7\) is \(x = 2\).


In [None]:
# Create a vector store with a sample text
from langchain_core.vectorstores import InMemoryVectorStore

text = read_file("samples/sample.pptx")

vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings,
)

# Use the vectorstore as a retriever
retriever = vectorstore.as_retriever()

# Retrieve the most similar text
retrieved_documents = retriever.invoke("What are the types of Image segmentation Techniques?")

# show the retrieved document's content
retrieved_documents[0].page_content

In [14]:
file_paths = ["samples/sample.pptx", "samples/sample.txt"]
documents = load_documents(file_paths)

In [15]:
documents

[Document(metadata={'source': 'samples/sample.pptx'}, page_content='Various Image Segmentation Techniques:\nDifferent types of Image Segmentation Techniques\nThresholding technique segmentation\nHistogram based segmentation\nRegion based segmentation\nEdge based segmentation\nClustering based segmentation\nMorphological Transforms and\nTexture based segmentation approaches\nThresholding technique segmentation\nSegmentation algorithms based on thresholding approach are suitable for images where there is distinct difference between object and background.\nMain Goal: divide an image into two distinct regions (object and background) directly based on intensity values and their properties\nTypes: Global, Variable, Multiple'),
 Document(metadata={'source': 'samples/sample.pptx'}, page_content='Original coins image\n2)Histogram based segmentation\nHistogram of an image is a plot between intensity levels. \nDeep valleys are used to separate different peaks of histogram. \nHistogram peaks are t

In [26]:
vector_store = create_vector_store(documents, embeddings)

rag_chain = create_rag_chain(chat_model, vector_store)

rag_query = "What are the topics discussed in these documents? Write it in points"
rag_response = rag_chain.run(rag_query)
print(f"\nRAG Query: {rag_query}")
print(f"RAG Response:\n{rag_response}")

- Clustering-based segmentation techniques and their success rates in image processing.
- Types of clustering methods, including selecting the best for image processing.
- Texture-based segmentation approaches focusing on texture features and regions division based on similarity.
- Morphological transforms-based segmentation with operations such as erosion, dilation, opening, closing, white tophat, black tophat, skeletonize, and convex hull.
RAG Query: What are the topics discussed in these documents? Write it in points
RAG Response:
- Clustering-based segmentation techniques and their success rates in image processing.
- Types of clustering methods, including selecting the best for image processing.
- Texture-based segmentation approaches focusing on texture features and regions division based on similarity.
- Morphological transforms-based segmentation with operations such as erosion, dilation, opening, closing, white tophat, black tophat, skeletonize, and convex hull.
