In [55]:
from langchain_ollama import ChatOllama
from langchain_groq import ChatGroq
from langchain.prompts import ChatPromptTemplate, PromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate
from typing import List, Dict, Any
from typing_extensions import TypedDict
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langgraph.graph import StateGraph, START, END
from IPython.display import Image, display
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.output_parsers import PydanticOutputParser
import os
from dotenv import load_dotenv
# from langchain.document_loaders import PyMuPDFLoader
from typing import List, Dict, Any, Optional
import fitz
from pydantic import BaseModel, Field
load_dotenv()

True

In [56]:
os.environ["LANGSMITH_PROJECT"] = f"MineD 2025"

In [57]:
# import requests

# API_URL = "https://api-inference.huggingface.co/models/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B"
# headers = {
#     "Authorization": "Bearer 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",
#     "Content-Type": "application/json",
#    "x-wait-for-model": "true"
# }
# data = {
#     "inputs": "Hey, give some idea about creating a podcast from res paper summary "
# }
# response = requests.post(API_URL, headers=headers, json=data)
# print(response.json())

In [58]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other params...
)

In [59]:
# from langchain_huggingface import HuggingFaceEndpoint

# llm = HuggingFaceEndpoint(
#     # repo_id="mistralai/Mixtral-8x7B-Instruct-v0.1",
#     repo_id="meta-llama/Meta-Llama-3-8B-Instruct",
#     task="text-generation", 
#     do_sample=False,
# )


In [60]:
# llm = ChatGroq(model="gemma2-9b-it")

In [61]:
#base model to hold the metadata, and slide summeries that the llm will extract
class ResPaperText(BaseModel):
    # authors: str = Field(..., description="List of authors of the research paper")
    # title: str = Field(..., description="Title of the research paper")
    # submission_date: str = Field(..., description="Submission date of the research paper")
    # keywords: List[str] = Field(..., description="List of keywords associated with the research paper")
    # references: List[str] = Field(..., description="List of references cited in the research paper")
    # abstract: str = Field(..., description="Abstract of the research paper")
    conclusion: str = Field(..., description="Conclusion of the research paper")

In [62]:
# Define Pydantic Model for PPT slides
class SlideContent(BaseModel):
    title: str = Field(..., description="Title of the particular slide")
    bullet_points: Optional[List[str]] = Field(None, description="Content in bullet points form for the slide")
    notes: Optional[str] = Field(None, description="Additional notes for the slide")
    images: Optional[List[str]] = Field(None, description="List of relevant image paths for the slide")

class PPTPresentation(BaseModel):
    title: str = Field(..., description="Title of the presentation")
    authors: List[str] = Field(..., description="List of authors of the presentation")
    institution: str = Field(..., description="Institution associated with the presentation")
    slides: List[SlideContent] = Field(..., description="List of slides, in the presentation,which are SlideContent schemas.")

In [63]:
class ResPaperExtractState(TypedDict):
    pdf_path: Optional[str] = None  # Path to the PDF file
    extracted_text: Optional[str] = None  # Full extracted text from the PDF
    extracted_images: Optional[Dict[str,str]] = None  # Paths to extracted images
    slides_content: Optional[List[Dict[str, str]]] = None  # Prepared content for PowerPoint slides
    metadata: str
    ppt_object: PPTPresentation

In [64]:
# import fitz
# doc = fitz.open(r"C:\Users\milap\OneDrive\Desktop\CLG\3rd YR\SEM VI\mined_2025\lib\server\Milap_Tathya_ICC_June_2025.pdf")

In [65]:
def load_pdf(state: ResPaperExtractState):
    pdf_path = state["pdf_path"]
    doc = fitz.open(pdf_path)  # Load the PDF only once
    
    extracted_text = []
    extracted_images = dict()
    output_folder = "extracted_images"
    os.makedirs(output_folder, exist_ok=True)

    # Iterate through each page
    img_cntr=1
    for page_number, page in enumerate(doc):
        # Extract text
        text = page.get_text("text")
        extracted_text.append(text)

        # Extract images
        for img_index, img in enumerate(page.get_images(full=True)):
            xref = img[0]
            base_image = doc.extract_image(xref)
            image_bytes = base_image["image"]
            image_ext = base_image["ext"]
            img_filename = f"{output_folder}/page_{page_number+1}_img_{img_index+1}.{image_ext}"
            
            with open(img_filename, "wb") as img_file:
                img_file.write(image_bytes)
            
            extracted_images[f"Fig{img_cntr}"] = img_filename
            img_cntr+=1

    # Combine text from all pages
    full_text = "\n".join(extracted_text)

    # Update state
    return {"extracted_text": full_text, "extracted_images": extracted_images}

In [66]:
# condenser_instruction = """ 
# You are an AI assistant specialized in processing research papers. 

# Here is the text extracted from a research paper: {extracted_text}

# When tasked with extracting information from the provided text, follow these guidelines, and structure the content accordingly:
# 1. **Metadata Extraction:** Identify and extract:
#    - Authors  
#    - Title  
#    - Submission Date  
#    - Keywords  
#    - References (return as a list) 

# 2. **Text Structuring:** Organize the content into:
#    - Abstract  
#    - Conclusion  
#    - Body (as a list of sections or paragraphs)  

# Ensure the extracted content is well-structured, concise, and retains essential details.

# """
# parser = PydanticOutputParser(pydantic_object=ResPaperText)

# condenser_template = ChatPromptTemplate(
#    messages=[("system", condenser_instruction),
#    ("human", "Extract the details from the given text")],
#    input_variables=["extracted_text"],
#    partial_variables={"format_instructions": parser.get_format_instructions()},
# )
# # summarizer = """ 
# # Please provide a concise summary of the following research text, highlighting the main points, key findings, and conclusions. 
# # Focus on summarizing the purpose of the study, the methods used, and the significant results, while avoiding unnecessary details. The text is as follows: {extracted_text}
# # """
# summarizer = """
# "You are an expert at creating PowerPoint presentations. Generate a PowerPoint (PPT) presentation that summarizes a research paper. Follow these guidelines:"

# Title Slide:

# Include the title of the research paper.
# Mention the author(s) and the institution (if available).
# Introduction Slide:

# Summarize the research problem and objectives.
# Highlight the motivation behind the study.
# Methods Slide:

# Briefly explain the research methodology.
# Mention key techniques, datasets, or experimental setups used.
# Results Slide:

# Summarize the major findings of the study.
# Use bullet points or simple visuals (graphs, tables) to illustrate key results.
# Discussion/Analysis Slide:

# Explain the significance of the results.
# Compare findings with previous research (if applicable).
# Conclusion Slide:

# Summarize key takeaways from the research.
# Mention potential future work or applications of the study.
# References Slide:

# Include citations or sources (if necessary).
# Additional Instructions:

# Keep the slides concise with minimal text (bullet points preferred).
# Use visuals like diagrams, graphs, or charts where applicable.
# Maintain a professional and visually appealing slide design.

# Here is the given text: {extracted_text}
# """


# # Initialize the Output Parser
# parser = PydanticOutputParser(pydantic_object=PPTPresentation)
# summarizer_temp = PromptTemplate(
#    template=summarizer,
#    input_variables=["extracted_text"],
#    partial_variables={"format_instructions": parser.get_format_instructions()},
# )
# def get_data(state: ResPaperExtractState):
#    extracted_text = state["extracted_text"]
#    #  structured_llm = llm.with_structured_output(ResPaperText)
#    #  condenser_prompt = condenser_template.format(extracted_text=extracted_text)
#    #  response = structured_llm.invoke(condenser_prompt)
#    response = llm.invoke(summarizer_temp.format(extracted_text=extracted_text))
#    ppt_object = parser.invoke(response)

#    return {"ppt_object": ppt_object}

In [67]:
system_message = SystemMessagePromptTemplate.from_template(
    """You are an expert in creating PowerPoint presentations. Generate a structured PowerPoint (PPT) presentation 
    that summarizes a research paper based on the provided extracted text. Follow these instructions:
    
    Remember that the objective of this PPT is for a third party to understand the key points of the research paper, and 
    give them a gist of the research paper.

    - Title Slide: Include the research paper title, authors, and institution.
    - Introduction Slide: Summarize the problem, objectives, and motivation.
    - Methods Slide: Briefly explain the methodology, datasets, and experimental setup.
    - Results Slide: Summarize key findings with bullet points. Mention any visuals (graphs, tables) found from the extracted text. You should definetly mention in the presentation any figures related to a performance metric or tables that are mentioned in the extracted text.
    - Graphics: Include any images of graphs or charts or other images, relevant to the results,or images depicting a performance metric,
      that are mentioned in the extracted text. You can find such images by looking for any captions that mention figures or tables. 
      It is necessary to name this slide as Graphics.
      Note that you should only mention the image number, like Fig1, Fig2, etc...
      Include only relevant image names.
    - Discussion Slide: Explain the significance of results and compare with prior work.
    - Conclusion Slide: Summarize key takeaways and potential future work.
    - References Slide: Include citations if available.

    Additional Guidelines:
    - Keep slides concise (use bullet points).
    - Maintain a professional and visually appealing slide design.
    - Give the text in markdown format.
    - Each slide should have rich information content, summarizing the information related to the particular slide heading, 
    and also include some content that is related to the slide heading but not directly mentioned in the extracted text.
    - Also keep in mind that the text for each slide should not be too lengthy, and should be concise and to the point.

    {format_instructions}
    """
)

# Human Message: Supplies extracted text from the research paper
human_message = HumanMessagePromptTemplate.from_template("Here is the extracted text:\n\n{extracted_text}")

parser = JsonOutputParser(pydantic_object=PPTPresentation)
# Combine into a structured chat prompt
chat_prompt = ChatPromptTemplate(
    messages=[system_message, human_message],
    partial_variables={"format_instructions": parser.get_format_instructions()}
)

def get_data(state):
    extracted_text = state["extracted_text"]
    
    # Format prompt with extracted text
    
    # Invoke LLM with structured output
    chain = chat_prompt | llm | parser

    # Parse structured output into Pydantic model
    ppt_object = chain.invoke({"extracted_text":extracted_text})
    
    return {"ppt_object": ppt_object}

In [68]:
builder = StateGraph(ResPaperExtractState)

builder.add_node("pdf-2-text", load_pdf)
builder.add_node("text-condensation", get_data)

builder.add_edge(START, "pdf-2-text")
builder.add_edge("pdf-2-text", "text-condensation")
builder.add_edge("text-condensation", END)

graph = builder.compile()

In [69]:
path1 = r"C:\Users\milap\OneDrive\Desktop\CLG\3rd YR\SEM VI\mined_2025\lib\server\Milap_Tathya_ICC_June_2025.pdf"
path2 = r"C:\Users\milap\OneDrive\Desktop\CLG\3rd YR\SEM VI\mined_2025\lib\server\STORM.pdf"
path3 = r"C:\Users\milap\OneDrive\Desktop\CLG\3rd YR\SEM VI\mined_2025\lib\server\SuFIA.pdf"
path4 = r"C:\Users\milap\OneDrive\Desktop\CLG\3rd YR\SEM VI\mined_2025\lib\server\ankit review.pdf"
state_output = graph.invoke({"pdf_path":path1})

In [70]:
state_output["extracted_text"]

'ConvNeXt-based Multi-Class Hydrocarbon Spill\nClassification in Hyperspectral Imagery\nMilap Patel, Tathya Patel, Anuja Nair, Member, IEEE, Tarjni Vyas, Shivani Desai,\nSudeep Tanwar, Senior Member, IEEE\nDepartment of Computer Science and Engineering, School of Technology, Nirma University, Ahmedabad, Gujarat, India\nEmails: 22bce186@nirmauni.ac.in, 22bce352@nirmauni.ac.in, anuja.nair@nirmauni.ac.in,\ntarjni.vyas@nirmauni.ac.in, shivani.desai@nirmauni.ac.in, sudeep.tanwar@nirmauni.ac.in\nAbstract—This paper proposes a new approach of hydrocarbon\nspill detection using hyperspectral imaging (HSI) and fine-tuning\nConvNeXt convolutional neural network (CNN). Hydrocarbon\nspill hyperspectral dataset (HSHD) containing 124 HSIs into four\nclasses-cleans, gasoline, motor oil, and thinner is used in the\ntraining as well as testing phase. To overcome the computational\ncomplexity associated with the high spatial dimensions of HSIs\n(1024 × 1024 × 20), instead of resizing, each image is divi

In [71]:
type(state_output["ppt_object"])

dict

In [72]:
ppt_content = state_output["ppt_object"]
# print(ppt_content)
print(ppt_content["slides"][1])

{'title': 'Methods', 'bullet_points': ['Dataset: Hydrocarbon Spill Hyperspectral Dataset (HSHD) with 124 HSIs (1024x1024x20) categorized into four classes: clean, gasoline, motor oil, and thinner.', 'Methodology: Patch-based approach dividing each HSI into 16 smaller patches (256x256x20) to manage computational complexity while preserving spatial-spectral information.', 'Model: Fine-tuned ConvNeXt CNN architecture adapted for 20 spectral channels and multi-class classification.', 'Training: AdamW optimizer, CrossEntropyLoss function, 20 epochs, learning rate scheduler, and early stopping to prevent overfitting.', 'Additional Context:  The patch-based approach is a common technique to handle large hyperspectral images, balancing computational efficiency and information preservation.  Other CNN architectures were also considered and compared.'], 'notes': None, 'images': None}


In [78]:
state_output["extracted_images"]

{'Fig1': 'extracted_images/page_2_img_1.png',
 'Fig2': 'extracted_images/page_3_img_1.png',
 'Fig3': 'extracted_images/page_4_img_1.png',
 'Fig4': 'extracted_images/page_5_img_1.png',
 'Fig5': 'extracted_images/page_5_img_2.png',
 'Fig6': 'extracted_images/page_5_img_3.png',
 'Fig7': 'extracted_images/page_6_img_1.png'}

In [79]:
ppt_content["slides"]

[{'title': 'Introduction',
  'bullet_points': ['Problem: Inefficient and inaccurate hydrocarbon spill detection using traditional methods.',
   'Objectives: Develop a deep learning-based approach for accurate and rapid hydrocarbon spill classification using hyperspectral imagery (HSI).',
   'Motivation:  HSI offers superior spectral and spatial resolution for oil spill identification, enabling quicker response and minimizing environmental damage. Traditional methods lack the accuracy and speed needed for effective response.',
   'Additional Context: The increasing frequency and severity of oil spills necessitate the development of advanced detection technologies for effective environmental protection and mitigation of economic losses.'],
  'notes': None,
  'images': None},
 {'title': 'Methods',
  'bullet_points': ['Dataset: Hydrocarbon Spill Hyperspectral Dataset (HSHD) with 124 HSIs (1024x1024x20) categorized into four classes: clean, gasoline, motor oil, and thinner.',
   'Methodolog

In [105]:
from pptx import Presentation
from pptx.util import Pt, Inches
from pptx.enum.text import PP_ALIGN, MSO_ANCHOR, MSO_AUTO_SIZE
from pptx.dml.color import RGBColor

class ThemeConfig:
    def __init__(self, name="modern"):
        themes = {
            "modern": {
                "background": RGBColor(30, 30, 30),  # Dark gray background
                "title": RGBColor(255, 215, 0),  # Gold title text
                "body": RGBColor(200, 200, 200),  # Light gray body text
                "title_font": "Montserrat",
                "body_font": "Lato",
            },
            "vintage": {
                "background": RGBColor(245, 222, 179),  # Wheat background
                "title": RGBColor(139, 69, 19),  # Saddle brown title
                "body": RGBColor(105, 105, 105),  # Dim gray text
                "title_font": "Georgia",
                "body_font": "Times New Roman",
            },
            "corporate": {
                "background": RGBColor(255, 255, 255),  # White background
                "title": RGBColor(0, 51, 102),  # Navy blue title
                "body": RGBColor(51, 51, 51),  # Dark gray body text
                "title_font": "Arial",
                "body_font": "Verdana",
            },
            "minimal": {
                "background": RGBColor(240, 240, 240),  # Light gray background
                "title": RGBColor(50, 50, 50),  # Dark gray title
                "body": RGBColor(80, 80, 80),  # Slightly lighter gray for body
                "title_font": "Helvetica",
                "body_font": "Sans-Serif",
            },
            "bold": {
                "background": RGBColor(0, 0, 0),  # Black background
                "title": RGBColor(255, 0, 0),  # Red title text
                "body": RGBColor(255, 255, 255),  # White body text
                "title_font": "Impact",
                "body_font": "Arial Black",
            }
        }
        self.theme = themes.get(name, themes["minimal"])

def apply_background(slide, color):
    """Apply background color to a slide"""
    background = slide.background
    fill = background.fill
    fill.solid()
    fill.fore_color.rgb = color

def create_ppt_from_dict(ppt_data: dict, image_mapping: dict, theme_name: str="default", output_file: str = "presentation.pptx"):
    prs = Presentation()
    theme = ThemeConfig(theme_name)

    slide_width = prs.slide_width
    slide_height = prs.slide_height

    # Title Slide Fix
    title_slide_layout = prs.slide_layouts[0]  # Title slide layout
    title_slide = prs.slides.add_slide(title_slide_layout)
    apply_background(title_slide, theme.theme["background"])
    # Set title
    title = title_slide.shapes.title
    title.text = ppt_data['title']
    title_para = title.text_frame.paragraphs[0]
    title_para.font.size = Pt(40)
    title_para.font.name = theme.theme["title_font"]
    title_para.font.color.rgb = theme.theme["title"]
    title_para.alignment = PP_ALIGN.CENTER

    # Set subtitle (authors and institutions)
    subtitle = title_slide.placeholders[1]
    subtitle.text_frame.clear()  # Clear default placeholder text

    # Add authors as one paragraph
    authors_para = subtitle.text_frame.add_paragraph()
    authors_para.text = ", ".join(ppt_data['authors'])
    authors_para.font.size = Pt(18)
    authors_para.font.name = theme.theme["body_font"]
    authors_para.font.color.rgb = theme.theme["body"]
    authors_para.alignment = PP_ALIGN.CENTER

    # Add institution as a separate paragraph
    institution_para = subtitle.text_frame.add_paragraph()
    institution_para.text = "".join(ppt_data['institution'])
    institution_para.font.size = Pt(16)  # Slightly smaller font
    institution_para.font.name = theme.theme["body_font"]
    institution_para.font.color.rgb = theme.theme["body"]
    institution_para.alignment = PP_ALIGN.CENTER

    # Ensure the text fits within the shape
    subtitle.text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT
    subtitle.text_frame.word_wrap = True

    # Add content slides
    for i in range(1, len(ppt_data["slides"])):
        slide_data = ppt_data["slides"][i]
        title_text = slide_data.get("title", "")

        # Detect Graphics/Graphs Slide
        is_graphics_slide = "graphics" in title_text.lower() or "graphs slide" in title_text.lower()

        # Use a blank layout for Graphics slides
        slide_layout = prs.slide_layouts[6] if is_graphics_slide else prs.slide_layouts[1]
        slide = prs.slides.add_slide(slide_layout)
        apply_background(slide, theme.theme["background"])

        if not is_graphics_slide:
            title = slide.shapes.title
            title.text = title_text
            title_para = title.text_frame.paragraphs[0]
            title_para.font.size = Pt(32)
            title_para.font.name = theme.theme["title_font"]
            title_para.font.color.rgb = theme.theme["title"]
        # Handling Graphics/Graphs Slide
        
        if is_graphics_slide and "images" in slide_data:
            image_filenames = slide_data["images"]
            image_paths = [image_mapping.get(fig.replace(".", "").replace(" ", "")) for fig in image_filenames]
            image_paths = [img for img in image_paths if img and os.path.exists(img)]  # Remove missing files
    
            num_images = len(image_paths)
    
            # Get theme colors
            caption_font = theme.theme["body_font"]
            caption_color = theme.theme["body"]
    
            # Define positioning based on number of images
            if num_images == 1:
                left, top, width, height = Inches(1.5), Inches(1.5), Inches(7), Inches(5)
                img_shape = slide.shapes.add_picture(image_paths[0], left, top, width=width, height=height)
                caption_left = left + width / 2 - Inches(0.5)
                caption_top = top + height + Inches(0.2)
    
                # Add caption
                caption = slide.shapes.add_textbox(caption_left, caption_top, Inches(1), Inches(0.5))
                text_frame = caption.text_frame
                text_frame.text = image_filenames[0]
                para = text_frame.paragraphs[0]
                para.font.size = Pt(14)
                para.font.name = caption_font
                para.font.color.rgb = caption_color
                para.alignment = PP_ALIGN.CENTER
    
            elif num_images == 2:
                positions = [(Inches(1), Inches(2)), (Inches(5.5), Inches(2))]
                size = (Inches(4), Inches(3))
    
                for i, img_path in enumerate(image_paths[:2]):
                    img_left, img_top = positions[i]
                    img_shape = slide.shapes.add_picture(img_path, img_left, img_top, *size)
    
                    # Add caption
                    caption_left = img_left + size[0] / 2 - Inches(0.5)
                    caption_top = img_top + size[1] + Inches(0.2)
                    caption = slide.shapes.add_textbox(caption_left, caption_top, Inches(1), Inches(0.5))
                    text_frame = caption.text_frame
                    text_frame.text = image_filenames[i]
                    para = text_frame.paragraphs[0]
                    para.font.size = Pt(14)
                    para.font.name = caption_font
                    para.font.color.rgb = caption_color
                    para.alignment = PP_ALIGN.CENTER
    
            elif num_images >= 3:
                positions = [
                    (Inches(1), Inches(1.5)), (Inches(5), Inches(1.5)),
                    (Inches(3), Inches(4))
                ]
                size = (Inches(3.5), Inches(2.5))
    
                for i, img_path in enumerate(image_paths[:3]):
                    img_left, img_top = positions[i]
                    img_shape = slide.shapes.add_picture(img_path, img_left, img_top, *size)
    
                    # Add caption
                    caption_left = img_left + size[0] / 2 - Inches(0.5)
                    caption_top = img_top + size[1] + Inches(0.2)
                    caption = slide.shapes.add_textbox(caption_left, caption_top, Inches(1), Inches(0.5))
                    text_frame = caption.text_frame
                    text_frame.text = image_filenames[i]
                    para = text_frame.paragraphs[0]
                    para.font.size = Pt(14)
                    para.font.name = caption_font
                    para.font.color.rgb = caption_color
                    para.alignment = PP_ALIGN.CENTER

        else: 
            bullet_points = slide_data.get("bullet_points", [])
            content_placeholder = slide.placeholders[1]
            text_frame = content_placeholder.text_frame
            text_frame.clear()
            if bullet_points:
                text_frame = content_placeholder.text_frame
                text_frame.clear()  # Remove default placeholder text
                text_frame.word_wrap = True  # Enable text wrapping
                text_frame.auto_size = MSO_AUTO_SIZE.SHAPE_TO_FIT_TEXT  # Enable auto size for content

                # Set default font size based on slide type
                is_references = "references" in slide_data.get("title", "").lower()
                DEFAULT_FONT_SIZE = 12 if is_references else 20

            for point in slide_data['bullet_points']:
                paragraph = text_frame.add_paragraph()
                paragraph.text = point
                paragraph.font.size = Pt(DEFAULT_FONT_SIZE)
                paragraph.font.name = theme.theme["body_font"]
                paragraph.font.color.rgb = theme.theme["body"]
                

    # Save PowerPoint file
    prs.save(output_file)
    print(f"PowerPoint presentation saved as {output_file}")

create_ppt_from_dict(ppt_content, state_output["extracted_images"], "modern", "hydrocarbon_2.pptx")

PowerPoint presentation saved as hydrocarbon_2.pptx


In [93]:
from pptx import Presentation
from pptx.util import Inches
import os

def create_ppt_from_dict(ppt_data: dict, image_mapping: dict, theme_name: str="default", output_file: str = "presentation.pptx"):
    prs = Presentation()
    theme = ThemeConfig(theme_name)

    for slide_data in ppt_data["slides"]:
        title_text = slide_data.get("title", "")

        # Detect Graphics/Graphs Slide
        is_graphics_slide = "graphics" in title_text.lower() or "graphs slide" in title_text.lower()

        # Use a blank layout for Graphics slides
        slide_layout = prs.slide_layouts[6] if is_graphics_slide else prs.slide_layouts[1]
        slide = prs.slides.add_slide(slide_layout)
        apply_background(slide, theme.theme["background"])

        # Title
        if not is_graphics_slide:
            title = slide.shapes.title
            title.text = title_text

        # Handling Graphics/Graphs Slide
        if is_graphics_slide and "images" in slide_data:
            image_filenames = slide_data["images"]
            image_paths = [image_mapping.get(fig.replace(".", "").replace(" ", "")) for fig in image_filenames]
            image_paths = [img for img in image_paths if img and os.path.exists(img)]  # Remove missing files

            num_images = len(image_paths)

            # Define positioning based on number of images
            if num_images == 1:
                left, top, width, height = Inches(1.5), Inches(1.5), Inches(7), Inches(5)
                slide.shapes.add_picture(image_paths[0], left, top, width=width, height=height)

            elif num_images == 2:
                positions = [(Inches(1), Inches(2)), (Inches(5.5), Inches(2))]
                size = (Inches(4), Inches(3))
                for i, img_path in enumerate(image_paths[:2]):
                    slide.shapes.add_picture(img_path, positions[i][0], positions[i][1], *size)

            elif num_images >= 3:
                positions = [
                    (Inches(1), Inches(1.5)), (Inches(5), Inches(1.5)),
                    (Inches(3), Inches(4))
                ]
                size = (Inches(3.5), Inches(2.5))
                for i, img_path in enumerate(image_paths[:3]):
                    slide.shapes.add_picture(img_path, positions[i][0], positions[i][1], *size)

        else:  # Standard text slide
            content = slide.placeholders[1]
            bullet_points = slide_data.get("bullet_points", [])
            if bullet_points:
                text_frame = content.text_frame
                text_frame.clear()
                for point in bullet_points:
                    p = text_frame.add_paragraph()
                    p.text = point

    prs.save(output_file)
    print(f"PowerPoint presentation saved as {output_file}")

# Example Call
create_ppt_from_dict(ppt_content, state_output["extracted_images"], "modern", "hydrocarbon_2.pptx")

PowerPoint presentation saved as hydrocarbon_2.pptx
