In [None]:
api_key = ""

In [None]:
# Use a pipeline as a high-level helper
from transformers import pipeline

pipe = pipeline("text-generation", model="ibm-granite/granite-3.3-2b-instruct")
messages = [
    {"role": "user", "content": "Who are you?"},
]
pipe(messages)

In [None]:
from vllm import LLM, SamplingParams
from vllm.assets.image import ImageAsset
from huggingface_hub import hf_hub_download
from PIL import Image

model_path = "ibm-granite/granite-vision-3.3-2b"

model = LLM(
    model=model_path,
)

sampling_params = SamplingParams(
    temperature=0.2,
    max_tokens=64,
)

# Define the question we want to answer and format the prompt
image_token = "<image>"
system_prompt = "<|system|>\nA chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions.\n"

question = "What is the highest scoring model on ChartQA and what is its score?"
prompt = f"{system_prompt}<|user|>\n{image_token}\n{question}\n<|assistant|>\n"
# img_path = hf_hub_download(repo_id=model_path, filename='example.png')
image = Image.open("example.png").convert("RGB")
print(image)

# Build the inputs to vLLM; the image is passed as `multi_modal_data`.
inputs = {
    "prompt": prompt,
    "multi_modal_data": {
        "image": image,
    }
}

outputs = model.generate(inputs, sampling_params=sampling_params)
print(f"Generated text: {outputs[0].outputs[0].text}")


In [None]:
import os
os.environ["GROQ_API_KEY"] = ""

In [None]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model="openai/gpt-oss-20b",
    temperature=0,
    max_tokens=None,
    reasoning_format="parsed",
    timeout=None,
    max_retries=2,
    # other params...
)


In [None]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser


async def agent_chunker(text,MAX_CHUNK_TOKENS=500):
   semantic_chunking_prompt ="""You are an expert in text preprocessing and semantic segmentation for NLP pipelines.
   Your task is to transform the given paragraph into semantic chunks that preserve meaning
   while ensuring each chunk is independent and self-contained.

   ### RULES

   1. Cleaning
      - Remove unwanted special characters: @, #, $, %, ^, &, *, ~, |, _, multiple spaces, or formatting artifacts.
      - Keep punctuation that improves readability and sentence clarity.
      - Normalize spacing after punctuation.

   2. Semantic Chunking
      - Group sentences so that each chunk expresses ONE coherent idea or closely related set of ideas.
      - Preserve the original phrasing whenever possible; avoid unnecessary rewording.
      - Make each chunk self-contained:
         - Replace pronouns ("it", "he", "she", "they", "this", "that") with the full entity name.
         - Add necessary context modifiers to nouns or statements so they can be understood out of context.

   3. Chunk Size Constraint (Token-Based)
      - Each chunk must be ≤ {MAX_CHUNK_TOKENS} tokens.
      - If a chunk exceeds the limit, split it into smaller, still meaningful chunks.

   4. Output Format
      - Output must be only a valid JSON array of strings.
      - Each string is one semantic chunk.
      - Do not include explanations or other text.

   ---

   ### Example Input:
   Apple Inc. announced a new iPhone yesterday. It features an improved camera and longer battery life.
   The device will be available in stores next month along with new accessories.

   ### Example Output (MAX_CHUNK_TOKENS = 20):
   [
   "Apple Inc. announced a new iPhone yesterday. The new iPhone features an improved camera and longer battery life.",
   "The new iPhone will be available in stores next month along with new accessories."
   ]

   ---

   Now process the following paragraph:
   {text}
   """
   # ai_msg = llm.invoke(semantic_chunking_prompt)
   prompt = ChatPromptTemplate.from_template(semantic_chunking_prompt)
   chain_chunks = prompt | llm | JsonOutputParser()
   chunks = await chain_chunks.ainvoke({"MAX_CHUNK_TOKENS":MAX_CHUNK_TOKENS,"text":text})
   return chunks

In [None]:
# import bs4
# from langchain import hub
# from langchain_community.document_loaders import WebBaseLoader
# from langchain_core.documents import Document
# from langchain_text_splitters import RecursiveCharacterTextSplitter
# # from langgraph.graph import START, StateGraph
# from typing_extensions import List, TypedDict

# loader = WebBaseLoader(
#     web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
#     bs_kwargs=dict(
#         parse_only=bs4.SoupStrainer(
#             class_=("post-content", "post-title", "post-header")
#         )
#     ),
# )
# docs = loader.load()

In [None]:
from langchain_community.document_loaders import DirectoryLoader

In [None]:
loader = DirectoryLoader("./AHC_AGENT",show_progress=True,use_multithreading=True)
docs = loader.load()
docs

In [98]:
text = docs[1].page_content
text

'Lorem ipsum\n\nLorem ipsum dolor sit amet, consectetur adipiscing\n\nelit. Nunc ac faucibus odio.\n\nVestibulum neque massa, scelerisque sit amet ligula eu, congue molestie mi. Praesent ut varius sem. Nullam at porttitor arcu, nec lacinia nisi. Ut ac dolor vitae odio interdum condimentum. Vivamus dapibus sodales ex, vitae malesuada ipsum cursus convallis. Maecenas sed egestas nulla, ac condimentum orci. Mauris diam felis, vulputate ac suscipit et, iaculis non est. Curabitur semper arcu ac ligula semper, nec luctus nisl blandit. Integer lacinia ante ac libero lobortis imperdiet. Nullam mollis convallis ipsum, ac accumsan nunc vehicula vitae. Nulla eget justo in felis tristique fringilla. Morbi sit amet tortor quis risus auctor condimentum. Morbi in ullamcorper elit. Nulla iaculis tellus sit amet mauris tempus fringilla.\n\nMaecenas mauris lectus, lobortis et purus mattis, blandit dictum tellus.\n\nMaecenas non lorem quis tellus placerat varius.\n\nNulla facilisi.\n\nAenean congue fring

In [96]:
text_list = text.split("\n\n")
len(text_list)

378

In [100]:
len(text)

50362

In [97]:
text = 'What is a Sample Text Paragraph?\nA sample text paragraph is a written section that illustrates a specific idea or theme. It typically includes a topic sentence, supporting details, and a concluding sentence. Sample paragraphs can be used for various purposes, such as demonstrating writing style, presenting information, or providing examples in educational contexts.\n\nStructure of a Sample Text Paragraph\nTopic Sentence: This is the first sentence that states the main idea of the paragraph. It sets the tone and direction for the rest of the text.\nSupporting Sentences: These sentences provide evidence, examples, or explanations that bolster the main idea. They are essential for developing the paragraph’s theme and providing clarity.\nConcluding Sentence: This wraps up the paragraph, reinforcing the main idea and providing a transition to the next point or paragraph.\nImportance of Sample Text Paragraphs\nSample text paragraphs are vital for several reasons:\n\nClarity: Well-structured paragraphs help convey ideas clearly, making it easier for readers to understand the content.\nOrganization: They provide a framework for organizing thoughts, which is essential for effective communication.\nEngagement: Engaging paragraphs capture the reader’s interest, encouraging them to continue reading.\nTips for Writing Effective Sample Text Paragraphs\nBe Concise: Keep sentences clear and to the point. Avoid unnecessary jargon that might confuse the reader.\nUse Active Voice: Active voice makes writing more dynamic and engaging. For example, instead of saying, “The book was read by the student,” say, “The student read the book.”\nVary Sentence Length: Mix short and long sentences to maintain a natural flow and keep the reader’s interest.\nEdit and Revise: Always review your paragraphs for clarity, coherence, and grammatical accuracy. This process helps ensure that your writing is polished and professional.\nExamples of Sample Text Paragraphs\nExample 1: Descriptive Paragraph\n\nThe sun dipped below the horizon, casting a warm golden glow over the tranquil lake. The water shimmered like a thousand diamonds, reflecting the hues of orange and pink that painted the sky. A gentle breeze rustled the leaves of the nearby trees, creating a soothing melody that echoed through the quiet evening. In that moment, time seemed to stand still, inviting anyone who witnessed it to pause and appreciate the beauty of nature.\n\nExample 2: Expository Paragraph\n\nRenewable energy is an essential component of sustainable development. Unlike fossil fuels, which are finite and contribute to environmental degradation, renewable energy sources such as solar, wind, and hydroelectric power offer a clean and sustainable alternative. These energy sources are abundant and can significantly reduce greenhouse gas emissions, helping to combat climate change. By investing in renewable energy, countries can create jobs, enhance energy security, and promote environmental sustainability.\n\nConclusion\nUnderstanding and utilizing sample text paragraphs is crucial for effective writing. By mastering their structure and purpose, writers can enhance clarity, organization, and engagement in their work. Whether you are a student, a professional, or a casual writer, these skills will serve you well in all your writing endeavors.\n\nFrequently Asked Questions (FAQs)\n1. What is the purpose of a sample text paragraph?\n\nThe purpose of a sample text paragraph is to illustrate a specific idea or theme clearly and effectively, serving as a model for writers.\n\n2. How do I know if my paragraph is effective?\n\nAn effective paragraph should have a clear topic sentence, supporting details that are relevant and concise, and a strong concluding sentence that ties everything together.\n\n3. Can I use sample text paragraphs in academic writing?\n\nYes, sample text paragraphs can be used in academic writing to provide examples, illustrate points, and demonstrate proper writing techniques.\n\n4. What are common mistakes to avoid in writing paragraphs?\n\nCommon mistakes include overly long sentences, lack of focus on the main idea, and failure to provide adequate supporting details.\n\n5. How can I improve my paragraph writing skills?\n\nTo improve your paragraph writing skills, practice writing regularly, read well-structured texts, and seek feedback from peers or mentors.'

In [104]:
chunk = await agent_chunker(text=text[30000:],MAX_CHUNK_TOKENS=1000)

In [105]:
for c in chunk:
    print(c)

Donec ante est, blandit sit amet tristique vel, lacinia pulvinar arcu. Pellentesque scelerisque fermentum erat, id posuere justo pulvinar ut. Cras id eros sed enim aliquam lobortis. Sed lobortis nisl ut eros efficitur tincidunt. Cras justo mi, porttitor quis mattis vel, ultricies ut purus. Ut facilisis et lacus eu cursus.
In eleifend velit vitae libero sollicitudin euismod.
Fusce vitae vestibulum velit. Pellentesque vulputate lectus quis pellentesque commodo. Aliquam erat volutpat. Vestibulum in egestas velit. Pellentesque fermentum nisl vitae fringilla venenatis. Etiam id mauris vitae orci maximus ultricies. Cras fringilla ipsum magna, in fringilla dui commodo a.
Etiam vehicula luctus fermentum. In vel metus congue, pulvinar lectus vel, fermentum dui. Maecenas ante orci, egestas ut aliquet sit amet, sagittis a magna. Aliquam ante quam, pellentesque ut dignissim quis, laoreet eget est. Aliquam erat volutpat. Class aptent taciti sociosqu ad litora torquent per conubia nostra, per incept

In [None]:
chunks = [
    "A sample text paragraph is a written section that illustrates a specific idea or theme. A sample text paragraph typically includes a topic sentence, supporting details, and a concluding sentence. Sample paragraphs can be used for various purposes, such as demonstrating writing style, presenting information, or providing examples in educational contexts.",
    "Topic Sentence: The Topic Sentence is the first sentence that states the main idea of the paragraph. The Topic Sentence sets the tone and direction for the rest of the text. Supporting Sentences: Supporting Sentences provide evidence, examples, or explanations that bolster the main idea. Supporting Sentences are essential for developing the paragraph’s theme and providing clarity. Concluding Sentence: The Concluding Sentence wraps up the paragraph, reinforcing the main idea and providing a transition to the next point or paragraph.",
    "Sample text paragraphs are vital for several reasons. Clarity: Well-structured paragraphs help convey ideas clearly, making it easier for readers to understand the content. Organization: Sample text paragraphs provide a framework for organizing thoughts, which is essential for effective communication. Engagement: Engaging paragraphs capture the reader’s interest, encouraging them to continue reading.",
    "Be Concise: Keep sentences clear and to the point. Avoid unnecessary jargon that might confuse the reader. Use Active Voice: Active voice makes writing more dynamic and engaging. For example, instead of saying, “The book was read by the student,” say, “The student read the book.” Vary Sentence Length: Mix short and long sentences to maintain a natural flow and keep the reader’s interest. Edit and Revise: Always review paragraphs for clarity, coherence, and grammatical accuracy. The Edit and Revise process helps ensure that writing is polished and professional.",
    "The sun dipped below the horizon, casting a warm golden glow over the tranquil lake. The water shimmered like a thousand diamonds, reflecting the hues of orange and pink that painted the sky. A gentle breeze rustled the leaves of the nearby trees, creating a soothing melody that echoed through the quiet evening. In that moment, time seemed to stand still, inviting anyone who witnessed the scene to pause and appreciate the beauty of nature.",
    "Renewable energy is an essential component of sustainable development. Unlike fossil fuels, which are finite and contribute to environmental degradation, renewable energy sources such as solar, wind, and hydroelectric power offer a clean and sustainable alternative. Renewable energy sources are abundant and can significantly reduce greenhouse gas emissions, helping to combat climate change. By investing in renewable energy, countries can create jobs, enhance energy security, and promote environmental sustainability.",
    "Understanding and utilizing sample text paragraphs is crucial for effective writing. By mastering their structure and purpose, writers can enhance clarity, organization, and engagement in their work. Whether the reader is a student, a professional, or a casual writer, these skills will serve the reader well in all writing endeavors.",
    "What is the purpose of a sample text paragraph? The purpose of a sample text paragraph is to illustrate a specific idea or theme clearly and effectively, serving as a model for writers.",
    "How does the reader know if the reader's paragraph is effective? An effective paragraph should have a clear topic sentence, supporting details that are relevant and concise, and a strong concluding sentence that ties everything together.",
    "Can the reader use sample text paragraphs in academic writing? Yes, sample text paragraphs can be used in academic writing to provide examples, illustrate points, and demonstrate proper writing techniques.",
    "What are common mistakes to avoid in writing paragraphs? Common mistakes include overly long sentences, lack of focus on the main idea, and failure to provide adequate supporting details.",
    "How can the reader improve the reader's paragraph writing skills? To improve the reader's paragraph writing skills, practice writing regularly, read well-structured texts, and seek feedback from peers or mentors."
]
print(len(chunks))

In [None]:
from langchain_ollama import OllamaEmbeddings

embeddings = OllamaEmbeddings(model="qwen3:1.7b")

In [None]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
)

In [None]:
from langchain_chroma import Chroma

vector_store = Chroma(
    collection_name="example_collection",
    embedding_function=embeddings,
    persist_directory="./chroma_langchain_db",
)

In [None]:
from langchain_core.documents import Document
from uuid import uuid4

documents = [Document(
    page_content=chunk,
    metadata={"source": "tweet"},
    id=1,
)for chunk in chunks]

In [None]:
for chunk in chunks:
    print(chunk)

In [None]:
uuids = [str(uuid4()) for _ in range(len(documents))]

data = await vector_store.aadd_documents(documents=documents, ids=uuids)

In [None]:
results = vector_store.similarity_search(
    "Perpose of sample text ?",
    k=2,
    filter={"source": "tweet"},
)
for res in results:
    print(f"{res.page_content}")

In [None]:
from langchain.prompts import ChatPromptTemplate

# RAG-Fusion: Related
template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
Generate multiple search queries related to: {question} \n
Output json format:"""
prompt_rag_fusion = ChatPromptTemplate.from_template(template)

In [None]:
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
# from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_rag_fusion 
    | llm
    | JsonOutputParser() 
    # | (lambda x: x.split("\n"))
)

In [None]:
question = "what is leave policy? and list of of leaves ?"
generate_queries.invoke({"question": question})

In [None]:
import os
os.environ["GROQ_API_KEY"] = "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"

class Prompt_store:
  DECOMPOSITION_PROMPT = """**Prompt for Question‑Splitting (JSON‑only Output)**
You are an assistant that extracts individual questions from a single user query that may contain multiple questions.  
Your task: split the query into the smallest logical chunks, each representing a single question. and each question should be standalone question 

Input:  
{question}

Output: a JSON array of strings, each string being one extracted question, **exactly** as it appears in the input (maintain original wording). No other text or explanation should be included.  
If the input contains only one question, return a JSON array with that single question.

Example  
Input:  
"Can you explain how blockchain works, and also tell me the best way to invest in cryptocurrencies?"

Output:  
```json
[
  "Can you explain how blockchain works?",
  "What is the best way to invest in cryptocurrencies?"
]```"""
  FINAL_ANSWER_TEMPLATE = """You are an assistant for question-answering tasks. Use the following pieces of retrived context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maxmimun ans keeping the answer concise
  Question: {question}
  Context: {context}
  Answer:
"""
  connection = "postgresql+psycopg://langchain:langchain@localhost:6024/langchain"
  EMBEDDING_MODEL_NAME = "qwen3:1.7b"
  LLM_MODEL_NAME = "openai/gpt-oss-20b"
  COLLECTION_NAME = "NEST"





In [None]:
from langchain.globals import set_debug
set_debug(False)

In [None]:
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_postgres import PGVector
from langchain_ollama import OllamaEmbeddings
from langchain.load import dumps, loads
from langchain.prompts import ChatPromptTemplate
from langchain_chroma import Chroma

class RAG(Prompt_store):
    def __init__(self,llm_model_name=None,embedding_model_name=None,DECOMPOSITION_PROMPT=None,FINAL_ANSWER_TEMPLATE=None,COLLECTION_NAME=None):
        super().__init__()
        if llm_model_name:
            self.LLM_MODEL_NAME = llm_model_name
        
        if embedding_model_name:
            self.EMBEDDING_MODEL_NAME = embedding_model_name

        if DECOMPOSITION_PROMPT:
            self.DECOMPOSITION_PROMPT = DECOMPOSITION_PROMPT

        if FINAL_ANSWER_TEMPLATE:
            self.FINAL_ANSWER_TEMPLATE = FINAL_ANSWER_TEMPLATE

        if COLLECTION_NAME:
            self.COLLECTION_NAME = COLLECTION_NAME
    def get_llm(self,model_name=None):
        if not model_name:
            model_name = self.LLM_MODEL_NAME
            
        llm = ChatGroq(
            model=model_name,
            temperature=0,
            max_tokens=None,
            reasoning_format="parsed",
            timeout=None,
            max_retries=2
        )
        return llm
    
    def get_embeddings(self,model_name=None):
        if not model_name:
            model_name = self.EMBEDDING_MODEL_NAME
        embeddings = OllamaEmbeddings(model=model_name)
        return embeddings
    
    def get_vector_store(self,collection_name=None,model_name=None):
        if not model_name:
            model_name = self.EMBEDDING_MODEL_NAME

        if not collection_name:
            collection_name = self.COLLECTION_NAME
        # vector_store = PGVector(
        #     embeddings=self.get_embeddings(model_name),
        #     collection_name=collection_name,
        #     connection=self.connection,
        #     use_jsonb=True,
        # )
        # return vector_store
        vector_store = Chroma(
            collection_name=collection_name,
            embedding_function=self.get_embeddings(model_name),
            persist_directory="./chroma_langchain_db",
        )
        return vector_store

    async def upload_documents(self,documents,uuids):
        data = await vector_store.aadd_documents(documents=documents, ids=uuids)

    def get_retriever(self,collection_name=None):
        if not collection_name:
            collection_name = self.COLLECTION_NAME

        return self.get_vector_store(collection_name).as_retriever()
    
    async def retriever_document(self,question):
        return await self.get_retriever().ainvoke(question)
    
    async def retriever_batch_document(self,question_list:list):
        return await self.get_retriever().abatch(question_list)
    
    async def decomposition_query(self, question,decomposition_prompt_template=None):
        if not decomposition_prompt_template:
            decomposition_prompt_template = self.DECOMPOSITION_PROMPT

        decomposition_prompt = ChatPromptTemplate.from_template(decomposition_prompt_template)
        generate_queries_decomposition = decomposition_prompt | self.get_llm() | JsonOutputParser()
        questions = await generate_queries_decomposition.ainvoke({"question":question})
        return questions

    async def retriever_document(self, question):
        # retrieval_chain = self.decomposition_query | {"question":question,"context": self.get_retriever().map()} | self.get_unique_union
        sub_queries = await self.decomposition_query(question=question)
        if not sub_queries:
            sub_queries = [question]
        all_results = await self.retriever_batch_document(sub_queries)
        return self.get_unique_union(all_results)
    
    def get_unique_union(self,documents: list[list]):
        """ Unique union of retrieved docs """
        flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
        unique_docs = list(set(flattened_docs))
        return [loads(doc) for doc in unique_docs]
    
    async def rag_response(self,question,context,final_answer_template=None):
        if not final_answer_template:
            final_answer_template = self.FINAL_ANSWER_TEMPLATE
        
        print(context)
        prompt = ChatPromptTemplate.from_template(final_answer_template)
        final_rag_chain = prompt | self.get_llm() | StrOutputParser()

        final_response = await final_rag_chain.ainvoke({"context":context,"question":question})
        return final_response
    
    async def rag(self,question,metadata={}):
        context = await self.retriever_document(question=question)
        final_response = await self.rag_response(question=question,context=context)
        return final_response

In [None]:
rag = RAG()

In [None]:
# question = "What are your favorite hobbies, and how did you get into them"
question = "Perpose of sample text and Use of Perpose of sample text ?"
final_response = await rag.rag(question=question)

In [None]:
print(final_response)

In [None]:
template = """Answer the following question based on this context:

{context}

Question: {question}
"""

prompt = ChatPromptTemplate.from_template(template)

final_rag_chain = (
    prompt
    | llm
    | StrOutputParser()
)

ai = final_rag_chain.invoke({"context":ai,"question":question})

In [None]:
print(ai)

In [None]:
rag.get_unique_union(ai.invoke({"question":question}))

In [None]:
template = """You are an AI language model assistant. Your task is to generate five 
different versions of the given user question to retrieve relevant documents from a vector 
database. By generating multiple perspectives on the user question, your goal is to help
the user overcome some of the limitations of the distance-based similarity search. 
Provide these alternative questions separated by newlines. Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(template)

from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

generate_queries = (
    prompt_perspectives 
    | llm
    | StrOutputParser() 
    | (lambda x: x.split("\n"))
)

In [None]:
generate_queries.invoke({"question": question})