In [1]:
from typing import Annotated, Sequence, TypedDict, List, Dict
from dotenv import load_dotenv  
from langchain_core.messages import BaseMessage
from langgraph.graph.message import add_messages
from langgraph.graph import StateGraph, END, START
from langchain_groq import ChatGroq
from langgraph.prebuilt import ToolNode
import os
import json
import requests
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import MSO_ANCHOR
from io import BytesIO


In [2]:
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
os.environ["GROQ_API_KEY"] = GROQ_API_KEY

llm = ChatGroq(model="llama-3.1-8b-instant")

In [3]:
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], add_messages]
    subtopic: List[str]
    slide_segments: List[Dict[str, str]]
    ppt_output_path: str
    audio_output_path: List[str]

In [4]:
def load_json_to_agent_state(json_path: str) -> AgentState:
    with open(json_path, 'r', encoding='utf-8') as f:
        data = json.load(f)

    complete_slide_segments = []
    for slide in data.get('slide_segments', []):
        complete_slide = {
            "slide_no": slide.get("slide_no", 0),
            "subtopic": slide.get("subtopic", ""),
            "content_to_display": slide.get("content_to_display", ""),
            "narration_script": slide.get("narration_script", ""),
            "is_blank_slide": slide.get("is_blank_slide", False),
            "image_address": slide.get("image_address", ""),
            "video_address": slide.get("video_address", ""),
            "image_position": slide.get("image_position", ""),
            "content_position": slide.get("test_position", "")
        }
        complete_slide_segments.append(complete_slide)

    return AgentState(
        messages=[],
        subtopic=data.get('subtopics', []),
        slide_segments=complete_slide_segments,
        ppt_output_path=""
    )


In [6]:
!pip install TTS

Collecting numpy>=1.24.3 (from TTS)
  Obtaining dependency information for numpy>=1.24.3 from https://files.pythonhosted.org/packages/3f/6b/5610004206cf7f8e7ad91c5a85a8c71b2f2f8051a0c0c4d5916b76d6cbb2/numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata
  Using cached numpy-1.26.4-cp311-cp311-win_amd64.whl.metadata (61 kB)
  Obtaining dependency information for numpy>=1.24.3 from https://files.pythonhosted.org/packages/d8/ec/ebef2f7d7c28503f958f0f8b992e7ce606fb74f9e891199329d5f5f87404/numpy-1.24.4-cp311-cp311-win_amd64.whl.metadata
  Using cached numpy-1.24.4-cp311-cp311-win_amd64.whl.metadata (5.6 kB)
Using cached numpy-1.24.4-cp311-cp311-win_amd64.whl (14.8 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.3.1
    Uninstalling numpy-2.3.1:
      Successfully uninstalled numpy-2.3.1


ERROR: Could not install packages due to an OSError: [WinError 5] Access is denied: 'C:\\Users\\Nikhi\\anaconda3\\Lib\\site-packages\\~-mpy.libs\\libscipy_openblas64_-13e2df515630b4a41f92893938845698.dll'
Consider using the `--user` option or check the permissions.



In [9]:
import os
from TTS.api import TTS

custom_path = "../models/tacotron2-DDC"
tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)


# Load the free TTS model (offline)
# tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)


 > tts_models/en/ljspeech/tacotron2-DDC is already downloaded.
 > vocoder_models/en/ljspeech/hifigan_v2 is already downloaded.
 > Using model: Tacotron2
 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000.0
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024
 > Model's reduction rate `r` is set to: 1
 > Vocoder Model: hifigan
 > Setting up Audio P

In [None]:
def narration_to_audio(state: dict) -> dict:
    updated_audio_paths = []
    output_dir = "../assets/audio"
    os.makedirs(output_dir, exist_ok=True)

    for slide in state['slide_segments']:
        slide_no = slide["slide_no"]
        narration = slide.get("narration_script", "")

        if not narration:
            updated_audio_paths.append("")
            continue

        speech_file_path = os.path.join(output_dir, f"Slide{slide_no}.wav")

        try:
            # Generate audio using local TTS model
            tts.tts_to_file(text=narration, file_path=speech_file_path)
            updated_audio_paths.append(speech_file_path)
        except Exception as e:
            print(f"[ERROR] Failed to generate audio for slide {slide_no}: {e}")
            updated_audio_paths.append("")

    state["audio_output_path"] = updated_audio_paths
    return state

In [12]:
def load_data_node(state: AgentState) -> AgentState:
    return load_json_to_agent_state('../assets/scripts/slide_segments.json')

In [13]:
graph = StateGraph(AgentState)

graph.add_node("LoadData", load_data_node)
graph.add_node("NarrationToAudio", narration_to_audio)

graph.add_edge(START, "LoadData")
graph.add_edge("LoadData", "NarrationToAudio")
graph.add_edge("NarrationToAudio", END)


compiled_graph = graph.compile()


In [14]:
final_state = compiled_graph.invoke({})
print("Generated Audio Paths:", final_state['audio_output_path'])

 > Text splitted to sentences.
['Hello and welcome back to our channel.', "Today, we're going to talk about something that's really crucial for businesses to succeed in today's fast-paced and competitive market: supply chain management.", 'So, what is supply chain management?', "Well, it's the coordination of a business's entire production flow, from sourcing raw materials to delivering a finished item.", 'This involves a complex network of suppliers, manufacturers, distributors, retailers, wholesalers, and customers, all working together to get products to the end-users.', "It's a delicate dance of logistics, where every step is crucial to ensure that products reach their destination on time.", 'Effective supply chain management is all about optimizing this network to ensure that everything gets where it needs to be, when it needs to be there, and as smoothly as possible.', 'It involves obtaining the necessary components, manufacturing the product, storing it, transporting it, and get

In [13]:
print(final_state['ppt_output_path'])


