In [3]:
from pptx import Presentation
from pptx.util import Inches, Pt

# --------------------------
# Section 1 – Introduction and Overview (10 slides)
# --------------------------
section1 = [
    {"title": "Generative AI Training: From LLM Fundamentals to Advanced Transformer Architectures", "content": ""},
    {"title": "Welcome", "content": "Welcome to our comprehensive training on Generative AI.\n- Understand the core concepts of Large Language Models (LLMs)\n- Explore the inner workings of transformer architectures\n- Learn fine tuning, optimization, and deployment techniques"},
    {"title": "About Us", "content": "Quickscale AI - Industrializing ML Projects\n- Software Development\n- Consulting\n- Training"},
    {"title": "Training Objectives", "content": "- Demystify LLM fundamentals\n- Explain transformer architecture clearly\n- Equip you with fine tuning and optimization methods\n- Prepare you for production-grade deployments"},
    {"title": "Who Should Attend?", "content": "- Data Scientists & ML Engineers\n- AI Researchers & Practitioners\n- Technical Managers & Architects"},
    {"title": "Agenda Overview", "content": "1. LLM Fundamentals and Prompt Engineering\n2. Tokenization, Embeddings, and Transformer Blocks\n3. Fine Tuning and Low-Rank Adaptation (LoRA)\n4. Training Optimization & Numerical Precision\n5. Model Deployment & Inference Optimization\n6. Advanced Topics & Model Evaluation\n7. Conclusion & Future Perspectives"},
    {"title": "Why Generative AI?", "content": "- Rapid evolution of language models\n- Broad industrial applications\n- Transforming data interaction and automation"},
    {"title": "Learning Outcomes", "content": "By the end of this training you will be able to:\n- Explain how LLMs work internally\n- Design and fine tune LLM-based systems\n- Optimize models for production environments"},
    {"title": "Training Format", "content": "- A mix of theory, diagrams, and practical examples\n- Hands-on exercises and case studies\n- Integrated Q&A sessions throughout"},
    {"title": "Let’s Begin!", "content": "Prepare to dive deep into the world of Generative AI.\n- No prior deep expertise required\n- Bring your curiosity and questions!"},
]

# --------------------------
# Section 2 – LLM Fundamentals and Prompt Engineering (30 slides: slides 11-40)
# --------------------------
section2 = [
    {"title": "What is an LLM?", "content": "A Large Language Model is a neural network trained on vast text corpora to predict the next token in a sequence."},
    {"title": "LLMs at a Glance", "content": "- Trained on trillions of tokens\n- Statistical models - not \"thinking\" machines\n- Power applications from chatbots to code assistants"},
    {"title": "LLM Limitations", "content": "- Do not \"think\" like humans\n- Prone to hallucinations and factual errors\n- Rely solely on learned patterns from data"},
    {"title": "Core Terminology", "content": "- Token: A subword unit\n- Prompt: Input instruction for the model\n- Completion: Model-generated text\n- Sampling: Process of choosing the next token"},
    {"title": "Base LLM vs. Instruct LLM", "content": "- Base LLM: Trained to predict text\n- Instruct LLM: Fine tuned to follow instructions and answer queries"},
    {"title": "The Text Generation Process", "content": "Text -> Tokenization -> Neural Network -> Sampling -> Completion"},
    {"title": "How Tokenization Works", "content": "Converts text into tokens via a lookup table.\nTokens may be subwords, not necessarily whole words."},
    {"title": "The Role of the Tokenizer", "content": "- Uses methods like Byte-Pair Encoding (BPE)\n- Ensures a consistent mapping during training and inference"},
    {"title": "Introduction to Prompt Engineering", "content": "Write effective prompts as if drafting a detailed email.\nClear instructions lead to better completions."},
    {"title": "Prompt Engineering Best Practices", "content": "- Use role priming: \"You are an expert in...\"\n- Provide context and desired output format\n- Keep instructions clear and concise"},
    {"title": "Example Prompt", "content": "\"You are a senior software engineer who values simplicity.\\nPlease summarize the following article using its full title without referring to it as 'this article'.\""},
    {"title": "How Sampling Works", "content": "- The neural network outputs a probability distribution over tokens\n- Temperature, Top-K, and Top-P parameters adjust randomness"},
    {"title": "Temperature and Creativity", "content": "- Lower temperature = more deterministic outputs\n- Higher temperature = more creative, varied responses"},
    {"title": "Top-K Sampling Explained", "content": "- Select only the K most probable tokens\n- Rescale probabilities to sum to 1\n- Sample from this constrained set"},
    {"title": "Top-P (Nucleus) Sampling", "content": "- Accumulate probabilities until a threshold is met\n- Dynamically select tokens to balance coherence and diversity"},
    {"title": "LLMs in Decision-Making", "content": "Use LLMs as evaluators (\"LLM as Judge\") to assess tone, clarity, and technical accuracy."},
    {"title": "Automated Evaluation Examples", "content": "- Check for culturally neutral language\n- Verify technical precision\n- Evaluate emotional tone"},
    {"title": "RAG: Retrieval-Augmented Generation", "content": "- Retrieve relevant documents and inject into prompts\n- Enhances answer accuracy with external data"},
    {"title": "Productivity with LLMs", "content": "- Generate boilerplate code and regex patterns\n- Draft emails, summaries, and test cases automatically\n- Accelerate your workflow by delegating repetitive tasks"},
    {"title": "Asynchronous Prompting Tip", "content": "Fire your prompt and continue working - don't wait for the model to \"type.\""},
    {"title": "Mindset for Success", "content": "Treat every \"failed\" interaction as a stepping stone toward breakthrough performance."},
    {"title": "Recap: LLM Fundamentals", "content": "- Definition, limitations, and core components\n- How prompt engineering shapes outputs\n- Sampling strategies to balance accuracy and creativity"},
    {"title": "Interactive Q&A", "content": "[Leave space for audience questions about LLM basics]"},
    {"title": "Transition to Deep Dive", "content": "Next, we explore how text is transformed into meaningful numerical representations."},
    {"title": "Why Internal Workings Matter", "content": "Understanding internal processes enables better fine tuning and error mitigation."},
    {"title": "Overview of What's Next", "content": "- Token Embeddings\n- Positional Encodings\n- Transformer Blocks and Self-Attention"},
    {"title": "Summary of Section 2", "content": "Grasp the basics of LLMs and effective prompting. Prepare for a deep dive into model internals."},
    {"title": "Quick Quiz", "content": "Q: What is the difference between a Base LLM and an Instruct LLM?\n[Leave space for audience reflection]"},
    {"title": "Key Takeaway", "content": "A well-crafted prompt is as important as the underlying model's capability."},
    {"title": "Section Transition", "content": "Now, let’s uncover how text is transformed into meaningful numerical representations."},
]

# --------------------------
# Section 3 – Tokenization and Embeddings (30 slides: slides 41-70)
# --------------------------
section3 = [
    {"title": "What Are Embeddings?", "content": "Embeddings are numerical vectors that capture the semantic meaning of tokens."},
    {"title": "How Tokens Become Numbers", "content": "Text is split into tokens, then mapped to vectors using a lookup table."},
    {"title": "Token Embedding Layer", "content": "Each token is assigned a unique vector learned during training via gradient descent."},
    {"title": "The Lookup Table", "content": "- A matrix with one row per token\n- The embedding dimensions are chosen by the model designer"},
    {"title": "Example: The \"age\" Token", "content": "The token \"age\" receives an embedding that reflects its various contextual uses."},
    {"title": "Uncontextualized Embeddings", "content": "- These embeddings are fixed vectors without contextual information\n- Later layers add contextualization"},
    {"title": "Byte-Pair Encoding (BPE) Overview", "content": "A method to split text into subword units by iteratively merging frequent byte pairs."},
    {"title": "How BPE Works", "content": "- Start with individual bytes\n- Merge the most frequent pairs\n- Continue until the vocabulary size is reached"},
    {"title": "Benefits of BPE", "content": "- Avoids out-of-vocabulary tokens\n- Provides flexible granularity: subwords or word parts"},
    {"title": "Recap: Tokenization Process", "content": "Text -> Tokenization (BPE) -> Token IDs -> Lookup in Embedding Table"},
    {"title": "The Need for Positional Information", "content": "Tokens have no inherent order; we add positional embeddings to encode sequence order."},
    {"title": "What Are Positional Embeddings?", "content": "Numerical vectors representing the position of a token within a sequence."},
    {"title": "How Positional Embeddings Work", "content": "- A second lookup table indexed by token position\n- Added element-wise to token embeddings"},
    {"title": "Combining Embeddings", "content": "Final input representation = Token Embedding + Positional Embedding"},
    {"title": "Visual Diagram (Text Version)", "content": "[Diagram: 'Token Embedding' + 'Positional Embedding' = 'Input Vector']"},
    {"title": "Example of Positional Encoding", "content": "Position 1: [0.12, 0.85, ...]\nPosition 2: [0.34, 0.67, ...]\nTheir sum encodes token order."},
    {"title": "Maximum Context Length", "content": "The positional embedding table is defined up to a maximum sequence length."},
    {"title": "Handling Longer Sequences", "content": "Tokens beyond the maximum length are truncated or processed with special rules."},
    {"title": "Recap: Embeddings", "content": "- Token embeddings convert text into vectors\n- Positional embeddings add order information\n- Together, they form the input for the transformer"},
    {"title": "Interactive Example", "content": "[Space for an example showing tokenization of a sample sentence]"},
    {"title": "Key Takeaway", "content": "Understanding embeddings is fundamental to grasping transformer operations."},
    {"title": "Transition to Transformer Blocks", "content": "Next, we explore how embeddings are processed inside a transformer."},
    {"title": "Summary of Section 3", "content": "- Creation of token embeddings\n- Importance of positional encoding\n- Foundation for subsequent transformer operations"},
    {"title": "Quick Check", "content": "Q: Why do we add positional embeddings?\n[Leave space for audience response]"},
    {"title": "Essential Concept", "content": "Embeddings capture meaning; positional encodings capture order."},
    {"title": "Moving Forward", "content": "Let’s now dive into transformer blocks and self-attention."},
    {"title": "Section Transition Note", "content": "From static embeddings to dynamic, contextual representations."},
    {"title": "Recap Reminder", "content": "Embeddings set the stage for the powerful attention mechanisms ahead."},
    {"title": "Audience Reflection", "content": "How might changing the embedding dimensions affect model performance?\n[Space for thought]"},
    {"title": "End of Section 3", "content": "Ready to explore transformer blocks? Let’s move on!"},
]

# --------------------------
# Section 4 – Transformer Architecture and Self-Attention (50 slides: slides 71-120)
# --------------------------
section4 = [
    {"title": "Introduction to Transformer Blocks", "content": "Transformer blocks are the core building units of modern LLMs."},
    {"title": "Components of a Transformer Block", "content": "- Self-Attention Mechanism\n- Residual (Skip) Connections and Layer Normalization\n- Feed-Forward Neural Network (MLP)"},
    {"title": "What is Self-Attention?", "content": "A mechanism that allows each token to attend to all previous tokens in a sequence."},
    {"title": "Self-Attention Process Overview", "content": "- Compute Query (Q), Key (K), and Value (V) matrices\n- Calculate attention scores using Q and K\n- Apply softmax normalization\n- Weight V to produce the output"},
    {"title": "Query, Key, Value Definitions", "content": "- Q: The token's 'request' for information\n- K: The keys used to match queries\n- V: The content to be aggregated"},
    {"title": "Calculation of Attention Scores", "content": "Attention Score = Softmax((Q x K^T) / sqrt(d_k))\n- d_k is the dimension of the key vectors"},
    {"title": "Masking in Self-Attention", "content": "For autoregressive models, tokens can only attend to previous tokens."},
    {"title": "Why Masking?", "content": "- Ensures that tokens do not 'see' future tokens\n- Preserves causality in text generation"},
    {"title": "Multiplying by the Value Matrix", "content": "Output = Attention Scores x V\n- Aggregates information weighted by the attention scores"},
    {"title": "Visual Diagram (Text Version)", "content": "[Diagram: Input -> Q, K, V -> Attention Scores -> Weighted Sum = Output]"},
    {"title": "Multi-Head Attention Concept", "content": "- Split Q, K, V into multiple 'heads'\n- Perform self-attention in parallel\n- Concatenate results for a richer representation"},
    {"title": "Benefits of Multi-Head Attention", "content": "- Captures diverse patterns in data\n- Enhances model capacity without excessive cost"},
    {"title": "Residual (Skip) Connections", "content": "- Add the original input to the output of the attention layer\n- Helps preserve gradient flow and information"},
    {"title": "Layer Normalization", "content": "Applied after attention and feed-forward layers for training stability."},
    {"title": "Feed-Forward Neural Network (MLP)", "content": "- Two linear transformations with an activation (ReLU/GELU) in between\n- Introduces non-linearity and further transforms features"},
    {"title": "Transformer Block Diagram (Text Version)", "content": "[Diagram: Input -> Self-Attention -> Add & Norm -> Feed-Forward -> Add & Norm -> Output]"},
    {"title": "Detailed Steps of a Transformer Block", "content": "1. Input embeddings enter the block\n2. Self-attention computes contextual representations\n3. Residual connection adds the original input\n4. Normalization stabilizes the output\n5. MLP refines the representation\n6. Second residual and normalization yield the final output"},
    {"title": "Recap: Transformer Block Components", "content": "- Self-Attention\n- Multi-Head Mechanism\n- Residual & Norm Layers\n- Feed-Forward MLP"},
    {"title": "Example Calculation: Q, K, V", "content": "Assume input X; then:\nQ = X x W(Q), K = X x W(K), V = X x W(V)"},
    {"title": "Softmax Normalization Example", "content": "Transform raw scores into probabilities that sum to 1 per token."},
    {"title": "Importance of the Scaling Factor", "content": "Dividing by sqrt(d_k) prevents overly large dot product values."},
    {"title": "Attention Masking Example", "content": "Display a matrix where future token positions are masked (set to -infinity before softmax)."},
    {"title": "Multi-Head Attention in Practice", "content": "Split and compute heads separately, then recombine:\nOutput = Concat(head1, head2, ..., head_n) x W(O)"},
    {"title": "Recap: Self-Attention", "content": "- Computes contextualized representations using Q, K, V\n- Applies softmax and masking for autoregressive modeling"},
    {"title": "Interactive Exercise", "content": "[Space for a short exercise: 'Calculate a simple attention score using dummy vectors.']"},
    {"title": "Summary of Transformer Architecture", "content": "Stacking multiple transformer blocks builds deep, contextual representations."},
    {"title": "How Many Transformer Blocks?", "content": "- Models like GPT-3 use dozens of blocks\n- Depth influences performance and resource requirements"},
    {"title": "Final Output: Unembedding Layer", "content": "Transforms the final hidden state into logits over the vocabulary.\n- Uses a learned matrix to map features back to tokens."},
    {"title": "Recap: End-to-End Flow", "content": "Input Embeddings -> Transformer Blocks -> Unembedding -> Token Prediction"},
    {"title": "Key Takeaway on Architecture", "content": "Understanding transformers is key to both generation and fine tuning."},
    {"title": "Transition to Fine Tuning", "content": "Now that we've examined the internals, we move on to adapting models for specific tasks."},
    {"title": "Quick Quiz", "content": "Q: What is the purpose of residual connections?\n[Leave space for audience reflection]"},
    {"title": "Essential Concept", "content": "Multi-head attention captures various patterns simultaneously, boosting model capacity."},
    {"title": "Review of Section 4", "content": "Recap: Transformer block components, self-attention details, and layer integration."},
    {"title": "Section Transition Note", "content": "Next, we discuss fine tuning and instruction adaptation for task-specific performance."},
    {"title": "Practical Tip", "content": "Focus on the interplay of Q, K, V and the impact of masking."},
    {"title": "Interactive Reflection", "content": "Consider how altering the number of heads might affect output diversity."},
    {"title": "Recap Reminder", "content": "A strong grasp of transformer architecture supports better fine tuning and troubleshooting."},
    {"title": "Audience Q&A", "content": "[Space reserved for questions on transformer internals]"},
    {"title": "End of Section 4", "content": "We now have a solid understanding of transformer internals."},
    {"title": "Transition Slide", "content": "Let's move to fine tuning and instruction adaptation for specific tasks."},
    {"title": "Section Transition Visual", "content": "[Visual summary diagram of transformer internals leading into fine tuning]"},
    {"title": "Key Insight", "content": "Mastering complex internals empowers practical model customization."},
    {"title": "Final Thought for This Section", "content": "Focus on the mechanics now; application strategies follow next."},
    {"title": "Section Divider", "content": "-----------------------------"},
]
# --------------------------
# Section 5 – Fine Tuning and Prompt Adaptation (40 slides: slides 121-160)
# --------------------------
section5 = [
    {"title": "Introduction to Fine Tuning", "content": "Fine tuning adapts a pre-trained LLM to a specific task or domain."},
    {"title": "Why Fine Tune?", "content": "- Improve performance on niche tasks\n- Adapt to new data or formats\n- Align the model with specific values and guidelines"},
    {"title": "Fine Tuning Approaches", "content": "Option A: Continue pre-training on domain-specific raw texts\nOption B: Instruction fine tuning using Q&A pairs"},
    {"title": "Example Fine Tuning Prompt", "content": "\"[INST] What is the sum of two plus two? [/INST]\nAnswer: Four\"\n- Embeds task-specific behavior into model weights."},
    {"title": "Role of Prompt Engineering in Fine Tuning", "content": "Combine robust prompts with weight adaptation to achieve consistent outputs."},
    {"title": "Fine Tuning Process Overview", "content": "1. Prepare a curated dataset\n2. Choose a fine tuning strategy\n3. Adjust hyperparameters (batch size, learning rate)\n4. Train and evaluate performance"},
    {"title": "Benefits of Fine Tuning", "content": "- More reliable responses\n- Reduced need for lengthy prompts\n- Faster inference on specific tasks"},
    {"title": "Instruction Fine Tuning", "content": "Use example instruction-response pairs to teach the model desired behaviors."},
    {"title": "Data Collection for Fine Tuning", "content": "Build a high-quality dataset with clear instruction-response pairs."},
    {"title": "Monitoring Fine Tuning", "content": "Track metrics such as loss and perplexity during training."},
    {"title": "Example: Fine Tuning for Code Generation", "content": "A dataset of programming tasks with correct code outputs improves model coding skills."},
    {"title": "Transition to Low-Rank Adaptation", "content": "An efficient alternative to full fine tuning is Low-Rank Adaptation (LoRA)."},
    {"title": "Introduction to LoRA", "content": "LoRA adjusts only a small set of parameters using low-rank matrices while freezing base weights."},
    {"title": "Core Principle of LoRA", "content": "Approximate a full weight matrix with two smaller matrices:\nW ≈ W0 + ΔW, where ΔW = A x B (A in R^(d x r), B in R^(r x d))"},
    {"title": "Advantages of LoRA", "content": "- Fewer trainable parameters\n- Faster fine tuning\n- Merged adapters yield no inference penalty"},
    {"title": "Visual Diagram: LoRA", "content": "[Diagram: Base weight matrix (frozen) + low-rank update layers]"},
    {"title": "Comparison: Full Fine Tuning vs. LoRA", "content": "- Full fine tuning: updates all parameters\n- LoRA: updates only low-rank matrices\n- Trade-off between speed and performance"},
    {"title": "Implementation Example (Text)", "content": "\"In our LoRA implementation, base model weights remain frozen while low-rank adapters are inserted into transformer layers.\""},
    {"title": "Fine Tuning Hyperparameters", "content": "Key settings:\n- Batch size\n- Learning rate and scheduler\n- Number of training epochs"},
    {"title": "Best Practices for Fine Tuning", "content": "- Use a validation set to avoid overfitting\n- Monitor performance metrics closely\n- Iteratively adjust hyperparameters"},
    {"title": "Recap: Fine Tuning and LoRA", "content": "- Fine tuning adapts models to specific tasks\n- LoRA offers an efficient, low-parameter alternative"},
    {"title": "Interactive Exercise", "content": "[Space for: 'Draft a prompt for a fine tuning dataset in your domain.']"},
    {"title": "Key Takeaway", "content": "A well-tuned model delivers superior performance with fewer resources."},
    {"title": "Transition to Training Optimization", "content": "Next, we cover techniques for efficient training."},
    {"title": "Summary of Section 5", "content": "Review: Fine tuning objectives, approaches, and LoRA benefits."},
    {"title": "Common Pitfalls in Fine Tuning", "content": "- Overfitting\n- Data quality issues\n- Excessive parameter updates"},
    {"title": "Mitigation Strategies", "content": "- Regularization\n- Early stopping\n- Cross-validation"},
    {"title": "Case Study: Fine Tuning Success Story", "content": "A brief example showcasing improved performance after fine tuning."},
    {"title": "Future Trends in Fine Tuning", "content": "Emerging techniques and research directions in model adaptation."},
    {"title": "Q&A on Fine Tuning", "content": "[Space for interactive questions from the audience]"},
    {"title": "Resource Links for Fine Tuning", "content": "- Key papers, GitHub repos, and tutorials for further reading."},
    {"title": "Checklist for Fine Tuning Readiness", "content": "- Data quality verification\n- Hyperparameter tuning plan\n- Validation strategy"},
    {"title": "Summary: Fine Tuning Recap", "content": "Reiterate the importance of targeted adaptation."},
    {"title": "Interactive Reflection", "content": "[Space for audience to share lessons learned from fine tuning experiments]"},
    {"title": "Transition Note", "content": "Moving on to Training Optimization and Numerical Precision."},
    {"title": "Quick Tip", "content": "Document your fine tuning experiments meticulously."},
    {"title": "Best Practice Reminder", "content": "Always validate on a held-out dataset."},
    {"title": "Closing Thought on Fine Tuning", "content": "Iterative improvement is key to effective model adaptation."},
    {"title": "Final Q&A on Fine Tuning", "content": "[Space reserved for audience questions]"},
    {"title": "End of Section 5", "content": "Transitioning now to Training Optimization."},
]
# --------------------------
# Section 6 – RAG: Concept and Pipeline Stages (50 slides: slides 161-210)
# --------------------------
section6 = [
    {"title": "RAG Overview", "content": "Retrieval-Augmented Generation (RAG) enhances LLM outputs by incorporating external data."},
    {"title": "Context & Motivation", "content": "Standalone LLMs can hallucinate and have outdated knowledge. RAG adds dynamic context."},
    {"title": "Advantages of RAG", "content": "- Dynamic updates\n- Increased reliability\n- Better grounding of responses"},
    {"title": "RAG Pipeline Stages", "content": "RAG consists of Ingestion, Indexing, Retrieval, and Generation steps."},
    {"title": "Ingestion: Overview", "content": "Ingestion involves incorporating and structuring textual data from various sources."},
    {"title": "Ingestion: Data Sources", "content": "Data can come from PDFs, databases, web documents, and more."},
    {"title": "Ingestion: Cleaning and Normalization", "content": "Clean and normalize text to ensure consistency."},
    {"title": "Ingestion: Chunking", "content": "Divide large documents into manageable text chunks."},
    {"title": "Indexing: Overview", "content": "Indexing transforms ingested text into a vector-based searchable format."},
    {"title": "Indexing: Vector Databases", "content": "Common tools include FAISS, Milvus, Pinecone, Elastic, and Qdrant."},
    {"title": "Indexing: Embedding Dimensions", "content": "Choosing the right embedding dimension is crucial for accurate retrieval."},
    {"title": "Indexing: Index Structures", "content": "Techniques like IVF and HNSW optimize search speed and accuracy."},
    {"title": "Retrieval: Overview", "content": "Retrieval searches the vector index for relevant text chunks using semantic similarity."},
    {"title": "Retrieval: Top-K Search", "content": "Retrieve the top-k most similar text chunks based on similarity scores."},
    {"title": "Retrieval: Similarity Scoring", "content": "Use cosine similarity or Euclidean distance to rank relevance."},
    {"title": "Retrieval: Re-ranking", "content": "Optionally re-rank results to further improve precision."},
    {"title": "Generation: Overview", "content": "Generation injects retrieved context into the LLM prompt for improved outputs."},
    {"title": "Generation: Context Injection", "content": "Integrate external text into prompts to ground the model's responses."},
    {"title": "Generation: Prompt Engineering", "content": "Apply techniques such as stuffing, chain-of-thought, and refinement."},
    {"title": "Overall RAG Architecture", "content": "Combines an LLM, a vector database, and an orchestrator layer."},
    {"title": "Retriever + Reader Pattern", "content": "Commonly, a retriever finds context and a reader generates the final answer."},
    {"title": "Schema Overview", "content": "A well-designed schema shows data flow from ingestion to generation."},
    {"title": "Dynamic Updates", "content": "RAG allows updates to external context without re-training the LLM."},
    {"title": "Improved Reliability", "content": "Grounding responses in external data reduces hallucinations."},
    {"title": "Better Grounding", "content": "External context helps generate fact-based answers."},
    {"title": "System Orchestration", "content": "An orchestrator manages retrieval, selection, and prompt injection."},
    {"title": "Integration Challenges", "content": "Integrating a vector database with an LLM requires careful engineering."},
    {"title": "Data Privacy Considerations", "content": "Ensure that external data is handled securely and confidentially."},
    {"title": "Cost Considerations", "content": "Factor in hosting costs for the vector database and LLM API calls."},
    {"title": "Maintenance in RAG Systems", "content": "Regularly update embeddings and data sources to maintain accuracy."},
    {"title": "RAG vs. Standalone LLM", "content": "RAG supplements LLMs with dynamic context, overcoming inherent limitations."},
    {"title": "Use Case: Chatbots", "content": "Chatbots can use RAG to access internal knowledge bases for accurate responses."},
    {"title": "Use Case: Intelligent Search", "content": "Enhance search engines with context-aware retrieval for better results."},
    {"title": "Use Case: Customer Support", "content": "Deploy RAG to create dynamic FAQs and support systems."},
    {"title": "Use Case: Expert Systems", "content": "Medical and legal systems can benefit from RAG's up-to-date information."},
    {"title": "When to Use RAG", "content": "Adopt RAG when external, dynamic context is required to supplement LLMs."},
    {"title": "Limitations of RAG", "content": "- Cost: Hosting vector databases and LLM API calls\n- Maintenance: Regular updates needed\n- Confidentiality: Handle sensitive data with care"},
    {"title": "Best Practices: Source Attribution", "content": "Always attribute sources to ensure transparency in generated responses."},
    {"title": "Best Practices: Post-Processing", "content": "Incorporate human validation or additional quality checks if needed."},
    {"title": "Best Practices: Quality Monitoring", "content": "Set up feedback loops and evaluation metrics to continuously monitor output quality."},
    {"title": "RAG Pipeline Recap", "content": "Ingestion, Indexing, Retrieval, and Generation form the core of RAG systems."},
    {"title": "Hands-On Overview", "content": "Plan a session to build your first RAG system using a Wikipedia dataset."},
    {"title": "Hands-On: Ingestion", "content": "Learn to clean, normalize, and chunk text data from diverse sources."},
    {"title": "Hands-On: Indexing", "content": "Build a vector index using FAISS or similar tools."},
    {"title": "Hands-On: Retrieval", "content": "Implement semantic search to retrieve top-k relevant text chunks."},
    {"title": "Hands-On: Generation", "content": "Inject retrieved context into prompts for improved answer generation."},
    {"title": "Packaging RAG Modules", "content": "Modularize ingestion, indexing, retrieval, and generation for reusability."},
    {"title": "Deployment Considerations", "content": "Discuss API and Dockerization techniques for deploying RAG systems."},
    {"title": "RAG Resources", "content": "Articles & Blogs: 'Attention is All You Need', 'Language Models are Few-Shot Learners'\nResearch Papers: Latest arXiv publications on RAG\nVideos: Hugging Face YouTube channel, conference talks"},
    {"title": "RAG Summary and Takeaways", "content": "RAG enhances LLMs by integrating external, dynamic context. It offers increased reliability and better grounding of responses."},
]

# --------------------------
# Section 7 – RAG: Practical Use Cases, Limitations, and Best Practices (30 slides: slides 211-240)
# --------------------------
section7 = [
    {"title": "Why Use RAG?", "content": "RAG addresses standalone LLM limitations by providing dynamic external context."},
    {"title": "Standalone LLM Limitations", "content": "LLMs can produce outdated or incorrect information due to training cutoffs and hallucinations."},
    {"title": "Advantages of RAG Revisited", "content": "- Dynamic updates\n- Increased reliability\n- Better grounding of responses"},
    {"title": "Use Case: Chatbots", "content": "Integrate RAG to provide accurate, context-rich responses from internal knowledge bases."},
    {"title": "Use Case: Intelligent Search", "content": "Enhance search engines with semantic retrieval for context-aware results."},
    {"title": "Use Case: Customer Support", "content": "Deploy RAG to create dynamic FAQs and responsive support systems."},
    {"title": "Use Case: Expert Systems", "content": "Medical and legal applications benefit from up-to-date, specialized data."},
    {"title": "When to Deploy RAG", "content": "Use RAG when LLMs need external, real-time data to supplement their outputs."},
    {"title": "Cost Considerations", "content": "Be mindful of expenses from vector database hosting and LLM API usage."},
    {"title": "Maintenance Challenges", "content": "Regularly update embeddings and verify data source reliability."},
    {"title": "Confidentiality Concerns", "content": "Handle sensitive data with care when integrating external sources."},
    {"title": "Best Practice: Source Attribution", "content": "Always cite data sources to ensure transparency in responses."},
    {"title": "Best Practice: Post-Processing", "content": "Apply human validation or additional quality checks as needed."},
    {"title": "Best Practice: Quality Monitoring", "content": "Implement feedback loops and evaluation metrics to monitor output quality."},
    {"title": "RAG in Production", "content": "Consider integration challenges and orchestration for reliable deployment."},
    {"title": "Case Study: Chatbot Integration", "content": "Example: A chatbot using RAG to access and synthesize internal data."},
    {"title": "Case Study: Intelligent Search Engine", "content": "Example: A search engine that retrieves and merges context from multiple sources."},
    {"title": "Balancing Cost and Performance", "content": "Evaluate trade-offs between operational costs and response quality."},
    {"title": "Regular Updates", "content": "Continuously refresh data sources and embeddings to maintain accuracy."},
    {"title": "Scaling RAG Systems", "content": "Address challenges of large datasets and efficient retrieval."},
    {"title": "Integration with LLMs", "content": "Seamlessly combine LLMs with vector databases and orchestrators."},
    {"title": "System Architecture Overview", "content": "[Diagram: LLM + Vector Database + Orchestrator (Retriever + Reader)]"},
    {"title": "API and Dockerization", "content": "Plan for deployment using APIs and containerization techniques."},
    {"title": "User Feedback Integration", "content": "Leverage user input to continuously refine retrieval and generation."},
    {"title": "Data Freshness", "content": "Ensure your RAG system stays current with the latest information."},
    {"title": "Ethical Considerations", "content": "Evaluate the ethical implications of using external data sources."},
    {"title": "Future Trends in RAG Usage", "content": "Emerging improvements in vector indexing and dynamic context updates."},
    {"title": "Resources for RAG Best Practices", "content": "Refer to articles, research papers, and tutorials on RAG implementations."},
    {"title": "Summary of Practical RAG Use Cases", "content": "RAG is ideal for chatbots, search engines, customer support, and expert systems."},
    {"title": "Key Takeaways on RAG Usage", "content": "RAG enhances LLM reliability through external context but requires careful cost, maintenance, and ethical management."},
]

# --------------------------
# Section 8 – Advanced RAG Architecture and Resources (50 slides: slides 241-290)
# --------------------------
section8 = [
    {"title": "Advanced RAG Architecture Overview", "content": "Deep dive into RAG systems that integrate LLMs with vector databases."},
    {"title": "RAG System Components", "content": "Key modules: Ingestion, Indexing, Retrieval, Generation, and Orchestration."},
    {"title": "Ingestion Module Details", "content": "Techniques for cleaning, normalizing, and chunking diverse textual data."},
    {"title": "Indexing Module Details", "content": "Convert text chunks into vectors using advanced embedding models."},
    {"title": "Indexing Structures", "content": "Utilize IVF, HNSW, and other methods for efficient vector search."},
    {"title": "Retrieval Module Techniques", "content": "Implement semantic search with top-k retrieval and similarity scoring."},
    {"title": "Retrieval: Re-ranking Strategies", "content": "Apply re-ranking to further refine retrieved results."},
    {"title": "Generation Module Integration", "content": "Inject retrieved context into prompts to ground LLM outputs."},
    {"title": "Prompt Engineering for RAG", "content": "Tailor prompts to effectively incorporate external context."},
    {"title": "Overall System Orchestration", "content": "Coordinate all modules for a seamless RAG pipeline."},
    {"title": "Architecture Diagram", "content": "[Schema: LLM + Vector Database + Orchestrator (Retriever + Reader)]"},
    {"title": "Dynamic Context Injection", "content": "Techniques for updating context dynamically without re-training the LLM."},
    {"title": "Handling Data Updates", "content": "Strategies to update indices and embeddings as new data becomes available."},
    {"title": "Scalability Considerations", "content": "Ensure the system can scale with increasing data and user demand."},
    {"title": "Cloud Integration", "content": "Leverage cloud platforms to host vector databases and LLM APIs."},
    {"title": "API Orchestration", "content": "Manage API calls and data flow using orchestration tools."},
    {"title": "Advanced Retrieval Techniques", "content": "Incorporate multi-modal retrieval and enhanced similarity metrics."},
    {"title": "Evaluation Metrics for RAG", "content": "Measure success with accuracy, relevance, and latency metrics."},
    {"title": "Feedback Loops in RAG", "content": "Integrate user feedback to continuously refine the system."},
    {"title": "Post-Processing in RAG", "content": "Apply quality checks and human validation to generated outputs."},
    {"title": "Case Study: Advanced RAG Implementation", "content": "Review a real-world example of an advanced RAG system."},
    {"title": "Hands-On: Building a RAG Pipeline", "content": "Overview of a session to build a RAG system using a Wikipedia dataset."},
    {"title": "Hands-On: Ingestion and Indexing", "content": "Practical steps for ingesting data and building a vector index."},
    {"title": "Hands-On: Retrieval Techniques", "content": "Implement semantic search with top-k retrieval and re-ranking."},
    {"title": "Hands-On: Generation Techniques", "content": "Integrate retrieved context into LLM prompts for enhanced outputs."},
    {"title": "Modular Design in RAG", "content": "Design each module to be reusable and maintainable."},
    {"title": "Schema Design Best Practices", "content": "Create a clear schema to outline the RAG pipeline."},
    {"title": "Orchestration Tools", "content": "Overview of tools such as Airflow or Kubeflow for managing RAG workflows."},
    {"title": "Security in RAG Systems", "content": "Implement measures to secure data and API communications."},
    {"title": "Resource Planning", "content": "Plan for computing resources, storage, and scalability in RAG systems."},
    {"title": "Optimizing RAG Performance", "content": "Techniques to improve retrieval speed and generation quality."},
    {"title": "Integration Challenges", "content": "Discuss common challenges when integrating vector databases with LLMs."},
    {"title": "Maintenance in Advanced RAG", "content": "Regularly update embeddings, indices, and monitor system performance."},
    {"title": "Cost Optimization in RAG", "content": "Balance performance with cost by optimizing API calls and storage."},
    {"title": "Vector Database Tuning", "content": "Adjust parameters in FAISS/Milvus for optimal retrieval."},
    {"title": "Advanced Prompt Engineering", "content": "Use techniques like chain-of-thought and prompt refinement in RAG."},
    {"title": "Evaluation Framework for RAG", "content": "Set up a comprehensive framework to continuously evaluate the system."},
    {"title": "User Experience in RAG", "content": "Ensure the system delivers coherent, contextually accurate responses."},
    {"title": "Case Study: RAG in Expert Systems", "content": "Example: Medical or legal systems using RAG for specialized data."},
    {"title": "RAG in Real-World Deployments", "content": "Discuss success stories and lessons learned from deployed RAG systems."},
    {"title": "Resource Slide: Articles & Blogs", "content": "Refer to 'Attention is All You Need', 'Language Models are Few-Shot Learners', and official blogs from OpenAI, Hugging Face, etc."},
    {"title": "Resource Slide: Research Papers", "content": "List recent arXiv publications on RAG, instruction tuning, and lightweight fine tuning."},
    {"title": "Resource Slide: Videos & Tutorials", "content": "Check out the Hugging Face YouTube channel, conference talks (ACL, NeurIPS), and demos on LangChain."},
    {"title": "Hands-On Session Overview", "content": "Plan for a 1.45-hour session to build your first RAG system on Wikipedia data."},
    {"title": "Hands-On: API and Dockerization", "content": "Plan for a 2-hour session to create an API for your RAG system and deploy it using Docker."},
    {"title": "Future Trends in RAG", "content": "Emerging trends include improved vector indexing and real-time context updates."},
    {"title": "Challenges and Limitations", "content": "Discuss ongoing challenges such as cost, maintenance, and data confidentiality."},
    {"title": "Best Practices Recap", "content": "Review key practices: source attribution, post-processing, and quality monitoring."},
    {"title": "Advanced RAG Recap", "content": "Reinforce the key components and strategies of advanced RAG architecture and resources."},
]

# --------------------------
# Section 9 – Conclusion and Future Perspectives on RAG (10 slides: slides 291-300)
# --------------------------
section9 = [
    {"title": "RAG Overall Training Recap", "content": "Review the key concepts of RAG and its integration with LLMs."},
    {"title": "Key Learnings on RAG", "content": "Understand the RAG pipeline: Ingestion, Indexing, Retrieval, and Generation."},
    {"title": "Advantages of RAG Revisited", "content": "Dynamic context, increased reliability, and better grounding for responses."},
    {"title": "Practical Use Cases of RAG", "content": "Ideal for chatbots, intelligent search, customer support, and expert systems."},
    {"title": "Challenges and Limitations", "content": "Consider cost, maintenance, and confidentiality issues in RAG systems."},
    {"title": "Best Practices for RAG", "content": "Source attribution, post-processing, and continuous quality monitoring are essential."},
    {"title": "Future Directions in RAG", "content": "Emerging improvements in vector indexing, dynamic updates, and orchestration."},
    {"title": "Resources and Further Reading", "content": "Refer to key articles, research papers, and video tutorials for deep dives into RAG."},
    {"title": "Next Steps", "content": "Plan hands-on projects and explore RAG deployment in your organization."},
    {"title": "Thank You and Closing", "content": "Thank you for your participation!\nFor more information, contact Quickscale AI.\nKeep innovating in the field of RAG and Generative AI."},
]


# Combine all sections into one list
slides_data = section6 + section7 + section8 + section9

# Verify we have exactly 300 slides
# assert len(slides_data) == 300, f"Expected 300 slides, but got {len(slides_data)}"

# --------------------------
# Create the presentation and add each slide
# --------------------------
prs = Presentation()

# Use layout 1 (Title and Content)
for slide_data in slides_data:
    slide = prs.slides.add_slide(prs.slide_layouts[1])
    slide.shapes.title.text = slide_data["title"]
    slide.placeholders[1].text = slide_data["content"]

# Save the presentation to a writable directory (adjust the path as necessary)
prs.save("/Users/pierre/Documents/Quickscale/Formation_slides/Generative_AI_Training_Quickscale_AI.pptx")
