In [None]:
from pathlib import Path
import fitz


def load_pdf(file_path):
    doc = fitz.open(file_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

pdf1_text = load_pdf("/2501.00881v1.pdf")
pdf2_text = load_pdf("/2501.18033v1.pdf")
all_text = pdf1_text + "\n" + pdf2_text

In [None]:
import tiktoken

def chunk_text(text, chunk_size=300, chunk_overlap=50):
    tokenizer = tiktoken.get_encoding("cl100k_base")
    tokens = tokenizer.encode(text)

    chunks = []
    for i in range(0, len(tokens), chunk_size - chunk_overlap):
        chunk = tokens[i:i + chunk_size]
        chunk_text = tokenizer.decode(chunk)
        chunks.append(chunk_text)
    return chunks

chunks = chunk_text(all_text)


In [None]:
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import Chroma

embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
db = Chroma(persist_directory="db", embedding_function=embedding_model)


In [None]:
from transformers import pipeline
from langchain.llms import HuggingFacePipeline

# Step 1: Create a Transformers pipeline
flan_pipeline = pipeline("text2text-generation", model="google/flan-t5-base", tokenizer="google/flan-t5-base")

# Step 2: Wrap it with LangChain
llm = HuggingFacePipeline(pipeline=flan_pipeline)


Device set to use cpu


In [None]:
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=db.as_retriever())

In [None]:
prompt_template = PromptTemplate(
    input_variables=["context", "question"],
    template="""
You are a helpful AI assistant. Use the following context to answer the question.

Context:
{context}

Question: {question}
Answer:"""
)



In [None]:
def answer_question(query):
    # Step 1: Retrieve top documents
    docs = db.similarity_search(query, k=4)  # You can also use mmr for diversity

    # Step 2: Combine docs into one context string
    context = "\n\n".join([doc.page_content for doc in docs])

    # Step 3: Pass to LLM
    # The original line was:
    # response = qa_chain.run({"context": context, "question": query})
    # Instead, pass only the 'query' to qa_chain.run()
    response = qa_chain.run(query)

    return response

In [None]:
print(answer_question("What are vertical AI agents?"))


robots


In [None]:
print(answer_question("What are the core architectural layers of a vertical AI agent system as described by Dr. Bousetouane?"))


a logical layer


In [None]:
print(answer_question("Explain how agentic systems are designed to improve decision-making in enterprise workflows."))


Agentic systems are designed to improve decision-making in enterprise workflows.


In [None]:
print(answer_question("What are the main advantages of using diffusion models over GANs for image generation?"))


GANs are more efficient at generating images.


In [None]:
print(answer_question("Describe a real-world use case of vision-language models mentioned in the paper by Dr. Bousetouane."))


A vision-language model is used to describe the visual system. The model is a computer program


In [None]:
print(answer_question("How can vision-based generative models be integrated into agentic systems for task automation?"))

Vision-based generative models can be integrated into agentic systems for task automation by utilizing 
