In [12]:
from langchain_community.document_loaders import UnstructuredPowerPointLoader
def pptx_parser(file_path):
    """
    Parses a PowerPoint file and returns its content as a list of documents.

    Args:
        file_path (str): The path to the PowerPoint file.
        
    Returns:
        list: A list of documents extracted from the PowerPoint file.
    """
    loader = UnstructuredPowerPointLoader(file_path)
    documents = loader.load()
    return documents
    

In [13]:
import os
available_extentions = ['pptx', 'ppt']

def parse_file(file_path, extentions=available_extentions):
    """
    Parses a file based on its extension and returns its content.
    """
    # Check if file exists
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File not found: {file_path}")
    # get the file's extension
    file_extension = file_path.split('.')[-1].lower()
    if file_extension not in extentions:
        raise ValueError(f"Unsupported file extension: {file_extension}")
    if file_extension in ['pptx', 'ppt']:
        return pptx_parser(file_path)
    else:
        raise ValueError(f"Unsupported file extension: {file_extension}")

In [14]:
file_path = "../data/Introduction to generative AI.pptx"
documents = parse_file(file_path)
for doc in documents:
    print(doc)

page_content='Introduction to Generative AI



Khaoula ALLAK

GDG Mentor



Table of Contents

01

What is Generative AI ?

02

Fundamentals of Large Language Models 

03

How to customize the LLM  ? 

04

Practice



01

What is Generative AI ?



Evolution of AI 

What matters

 to us today !



Evolution of AI Use Cases

Predictive AI

Generative AI

Multimodal

Generative AI

Text, Image & Code Generation

Text & Code Rewriting & Formatting

Summarization

Extractive Q&A

Image & Video Descriptions

Regression & Classification

Forecasting

Sentiment Analysis

Entity Extraction

Object Detection

Natural Image Understanding 

Spatial Reasoning and Logic

Mathematical Reasoning in Visual Contexts

Video Question Answering

Automatic Speech Recognition & Translation



02

Fundamental of LLMs



What is LLM? 

An LLM is a computer program that has been fed enough examples to able to recognize and interpret human language or other types of complex data.

LLM are very large models that

In [23]:
# chunking the documents
from langchain.text_splitter import RecursiveCharacterTextSplitter
def chunk_documents(documents, chunk_size=1000, chunk_overlap=200):
    """
    Splits documents into smaller chunks.

    Args:
        documents (list): List of documents to be chunked.
        chunk_size (int): Size of each chunk.
        chunk_overlap (int): Overlap between chunks.
        
    Returns:
        list: List of text chunks.
    """
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size=chunk_size,
        chunk_overlap=chunk_overlap,
        length_function=len
    )
    return text_splitter.split_documents(documents)

# Chunk the documents
chunked_documents = chunk_documents(documents)
chunked_documents

[Document(metadata={'source': '../data/Introduction to generative AI.pptx'}, page_content='Introduction to Generative AI\n\n\n\nKhaoula ALLAK\n\nGDG Mentor\n\n\n\nTable of Contents\n\n01\n\nWhat is Generative AI ?\n\n02\n\nFundamentals of Large Language Models \n\n03\n\nHow to customize the LLM  ? \n\n04\n\nPractice\n\n\n\n01\n\nWhat is Generative AI ?\n\n\n\nEvolution of AI \n\nWhat matters\n\n to us today !\n\n\n\nEvolution of AI Use Cases\n\nPredictive AI\n\nGenerative AI\n\nMultimodal\n\nGenerative AI\n\nText, Image & Code Generation\n\nText & Code Rewriting & Formatting\n\nSummarization\n\nExtractive Q&A\n\nImage & Video Descriptions\n\nRegression & Classification\n\nForecasting\n\nSentiment Analysis\n\nEntity Extraction\n\nObject Detection\n\nNatural Image Understanding \n\nSpatial Reasoning and Logic\n\nMathematical Reasoning in Visual Contexts\n\nVideo Question Answering\n\nAutomatic Speech Recognition & Translation\n\n\n\n02\n\nFundamental of LLMs\n\n\n\nWhat is LLM? \n\nAn LL

In [34]:
# indexing the documents using QDrant
from langchain_qdrant import QdrantVectorStore
from langchain_nvidia_ai_endpoints import NVIDIAEmbeddings
import os, dotenv
from datetime import datetime
dotenv.load_dotenv()

QDRANT_URL = os.getenv("QDRANT_URL", "http://localhost:6333")
EMBEDDING_MODEL_NAME = "nv-embed-v1"

def index_documents(documents, collection_name="pptx_collection"+str(int(datetime.now().timestamp())), qdrant_url=QDRANT_URL, embedding_model_name=EMBEDDING_MODEL_NAME):
    """
    Indexes documents using QDrant vector store.

    Args:
        documents (list): List of documents to be indexed.
        collection_name (str): Name of the QDrant collection.
        qdrant_url (str): URL of the QDrant instance.
        embedding_model_name (str): Name of the embedding model to use.

    Returns:
        Qdrant: The indexed QDrant vector store.
    """
    vectorstore = QdrantVectorStore.from_documents(
        documents,
        embedding=NVIDIAEmbeddings(model_name=embedding_model_name, nvidia_api_key=os.getenv("NVIDIA_API_KEY")),
        collection_name=collection_name,
        url=qdrant_url,
        prefer_grpc=True,
    )
    return vectorstore

# get vectorstore
def get_vectorstore(collection_name="pptx_collection", qdrant_url=QDRANT_URL, embedding_model_name=EMBEDDING_MODEL_NAME):
    """
    get vectorstore with specified collection name and QDrant URL.

    Returns:
        Qdrant: The QDrant vector store.
    """
    return QdrantVectorStore.from_existing_collection(
        collection_name=collection_name,
        url=qdrant_url,
        embedding=NVIDIAEmbeddings(model_name=embedding_model_name),
    )

In [35]:
# Example usage:
vectorstore = index_documents(chunked_documents, collection_name="genai")
print(f"Indexed {len(chunked_documents)} documents into QDrant collection '{vectorstore.collection_name}'.")

  client = QdrantClient(**client_options)


Indexed 10 documents into QDrant collection 'genai'.


In [36]:
# get the vectorstore
vectorstore = get_vectorstore(collection_name="genai")
print(f"Retrieved vectorstore with collection name: {vectorstore.collection_name}")

  client = QdrantClient(


Retrieved vectorstore with collection name: genai


In [37]:
# =============================================================================
# Multi-Agent Lab Generation Workflow
#
# Roles:
#  - planner_agent: decide QCM vs coding exercise and difficulty level
#  - retriever_agent: split documents, embed chunks, retrieve relevant ones
#  - qcm_generator_agent: generate MCQ questions/options/correct_answer
#  - code_generator_agent: produce coding exercise description and stub
#  - test_generator_agent: create unit tests: input/output pairs
#  - executor_agent: run the code against tests and capture results
#  - evaluator_agent: check test outcomes; if failure, trigger refinement loop
#
# Workflow:
#  1. load_documents(paths: List[str]) -> raw_texts
#  2. chunk_texts(raw_texts) -> chunks
#  3. embed_and_store(chunks) in vector DB
#  4. planner = planner_agent(user_query, metadata)
#     if planner.task == "qcm":
#         chunks = retriever_agent(planner.topic)
#         qcms = qcm_generator_agent(chunks, planner.difficulty)
#         evaluator_agent.validate_qcm(qcms)
#     elif planner.task == "code":
#         chunks = retriever_agent(planner.topic)
#         stub = code_generator_agent(chunks, planner.difficulty)
#         tests = test_generator_agent(stub, chunks)
#         result = executor_agent.run_tests(stub, tests)
#         evaluator_agent.loop_until_pass(stub, tests, result)
#
# Each agent should be implemented as a separate function or class method.
# Use LangChain + LangGraph for orchestration; vector DB for chunk retrieval;
# code sandbox for execution.
#
# Convention:
#  - Clear function signatures for each agent
#  - Use meaningful names and type hints
#  - Keep each agent focused on its responsibility
#
# Goals:
#  - Modular, testable, and easy-to-debug pipeline
#  - Support iterative refinement via evaluator loops
# =============================================================================

In [None]:
def generate_lab(user_query: str, task: str, difficulty: str, metadata: dict):
    pass