In [17]:

import vertexai
import os
# Path to your service account key file
key_path = r"stoked-forest-447811-u4-ecf33505a9e7.json"

# Set the e
# nvironment variable
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = key_path
LOCATION="us-central1"
vertexai.init(location=LOCATION)

In [18]:
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["TAVILY_API_KEY"] = os.getenv("TAVILY_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")

In [19]:
os.environ["LANGCHAIN_PROJECT"] = "playground"
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [20]:

from langchain_community.tools.tavily_search import TavilySearchResults

tavily_tool = TavilySearchResults(max_results=5)

In [21]:
from langchain_core.tools import tool
from google.cloud import texttospeech
from pathlib import Path
import os

@tool
def text_to_speech(text: str, voice: str, output_filename: str) -> str:
    """
    Converts text to speech using Google Cloud's Text-to-Speech API, saving the audio file locally with a dynamic filename.

    Args:
        text (str): The text to convert to speech.
        voice (str): The voice to use for speech synthesis. This should be in the format:
                     - "en-US-Wavenet-D" (for US English, WaveNet voice)
                     - "en-GB-Standard-A" (for UK English, Standard voice)
                     - See https://cloud.google.com/text-to-speech/docs/voices for more options.
        output_filename (str): The name for the output audio file, including file extension (e.g., 'output.mp3').

    Returns:
        str: The path to the saved audio file.
    """
    # Set up Google Cloud credentials (ensure environment variable is set)
    

    # Initialize the client
    client = texttospeech.TextToSpeechClient()

    # Set the text input
    synthesis_input = texttospeech.SynthesisInput(text=text)

    # Build the voice request
    voice_config = texttospeech.VoiceSelectionParams(
        language_code=voice[:5],  # Extract language code (e.g., "en-US" from "en-US-Wavenet-D")
        name=voice,  # Use the full voice name
    )

    # Set the audio configuration
    audio_config = texttospeech.AudioConfig(
        audio_encoding=texttospeech.AudioEncoding.MP3
    )

    # Perform the text-to-speech request
    response = client.synthesize_speech(
        input=synthesis_input, voice=voice_config, audio_config=audio_config
    )

    # Save the audio to a file
    speech_file_path = Path(output_filename)
    with open(speech_file_path, "wb") as out:
        out.write(response.audio_content)

    return str(speech_file_path)

In [22]:
from langchain_core.tools import tool
from pydub import AudioSegment
from typing import List

@tool
def edit_podcast_audio(segments: List[str], pauses_between_segments: int = 1000, output_filename: str = "final_podcast_episode.mp3") -> str:
    """
    Edits a podcast episode by combining audio segments with specified pauses between them, ensuring consistent volume.

    Args:
        segments (List[str]): List of paths to audio segment files.
        pauses_between_segments (int): Duration of pause between segments in milliseconds. Default is 1000.
        output_filename (str): The name for the output podcast file, including file extension (e.g., 'episode.mp3').

    Returns:
        str: The path to the saved podcast episode.
    """
    podcast_episode = AudioSegment.silent(duration=0)  # Initialize an empty audio segment

    for segment_path in segments:
        segment = AudioSegment.from_file(segment_path)  # Load the segment
        podcast_episode += AudioSegment.silent(duration=pauses_between_segments) + segment  # Append with pause

    podcast_episode = podcast_episode.normalize()  # Normalize volume
    podcast_episode.export(output_filename, format='mp3')  # Export the edited podcast

    return output_filename


In [23]:
# Define the tools we want to use
tools = [
    tavily_tool,  # Built-in search tool via Tavily
    text_to_speech,  # Our custom text to speech tool
    edit_podcast_audio # Audio Mix Tool
]


In [24]:

from langgraph.prebuilt import ToolExecutor
tool_executor = ToolExecutor(tools)

  tool_executor = ToolExecutor(tools)


In [25]:

from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model = "gemini-2.0-flash-exp", temperature=0, streaming=True)

In [26]:
import json
from langchain_core.messages import (
    AIMessage,
    BaseMessage,
    ChatMessage,
    FunctionMessage,
    HumanMessage,
)
from langchain.tools.render import format_tool_to_openai_function
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langgraph.graph import END, StateGraph
from langgraph.prebuilt.tool_executor import ToolExecutor, ToolInvocation
from google.generativeai import GenerativeModel  # Import Gemini's GenerativeModel

def create_agent(gemini_model, tools, system_message: str):
    """
    Create an agent using Google's Gemini model.

    Args:
        gemini_model: An instance of Google's GenerativeModel (Gemini).
        tools: A list of tools the agent can use.
        system_message (str): The system message to guide the agent's behavior.

    Returns:
        A configured agent that can interact with Gemini and use tools.
    """
    # Convert tools to a format Gemini can understand (e.g., descriptions)
    tool_descriptions = "\n".join([f"- {tool.name}: {tool.description}" for tool in tools])

    # Define the prompt template
    prompt = ChatPromptTemplate.from_messages(
        [
            (
                "system",
                "You are a helpful AI assistant, collaborating with other assistants."
                " Use the provided tools to progress towards answering the question."
                " If you are unable to fully answer, that's OK, another assistant with different tools "
                " will help where you left off. Execute what you can to make progress."
                " If you or any of the other assistants have the final answer or deliverable,"
                " prefix your response with FINAL ANSWER so the team knows to stop."
                " You have access to the following tools:\n{tool_descriptions}\n{system_message}",
            ),
            MessagesPlaceholder(variable_name="messages"),
        ]
    )
    prompt = prompt.partial(system_message=system_message)
    prompt = prompt.partial(tool_descriptions=tool_descriptions)

    # Bind the prompt to the Gemini model
    agent = prompt | gemini_model

    return agent

In [27]:

import operator
from typing import Annotated, List, Sequence, Tuple, TypedDict, Union

from langchain.agents import create_openai_functions_agent
from langchain.tools.render import format_tool_to_openai_function
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder

from typing_extensions import TypedDict


# This defines the object that is passed between each node
# in the graph. We will create different nodes for each agent and tool
class AgentState(TypedDict):
    messages: Annotated[Sequence[BaseMessage], operator.add]

In [28]:
import functools

def agent_node(state, agent, name):
  result = agent.invoke(state)
  if isinstance(result, FunctionMessage):
    pass
  else:
    result = HumanMessage(**result.dict(exclude={"type", "name"}), name=name)
  return {
      "messages": [result],
      "sender": name
  }

#Podcast Planner Agent and Node
podcast_planner_agent = create_agent(
    model,
    [tavily_tool, text_to_speech],
    system_message="""
You are tasked with creating a structured script for a podcast episode. The script should consist of a series of interactions between the host and the guest based on the provided topic and information from the research.

For each part of the dialogue, clearly specify whether it's the host speaking or the guest. Also, assign a suitable voice model for text-to-speech conversion for each segment. Use the following voice models based on the character:

- Host segments: Use the 'alloy' voice model.
- Guest segments: Use the 'fable' voice model.

The output should be a list where each item is a dictionary with keys 'speaker', 'text', and 'voice', indicating the speaker (host or guest), their line of dialogue, and the voice model to use.

Example output format:
[
    {"speaker": "host", "text": "Welcome to our podcast, where we explore the latest in technology.", "voice": "alloy"},
    {"speaker": "guest", "text": "Thank you for having me, it's great to be here to share my experiences.", "voice": "fable"},
    {"speaker": "host", "text": "Can you tell us about your current project?", "voice": "alloy"},
    {"speaker": "guest", "text": "Certainly! I've been working on a new AI platform that...", "voice": "fable"},
    ...
]

Your task is to generate a similar structured script, ensuring each dialogue segment between the host and guest is well-defined and allocates the appropriate voice model for the text-to-speech conversion process.
"""

)

podcast_planner_node = functools.partial(agent_node, agent=podcast_planner_agent, name="Podcast Plannner")

#Research Agent

research_agent = create_agent(
    model,
    [tavily_tool],
    system_message="You should provide accurate data for both the Podcast Planner to use"

)
research_node = functools.partial(agent_node, agent=research_agent, name="Researcher")

#Editor Agent
editor_agent = create_agent(
    model,
    [tavily_tool],
    system_message="""
You are the Editor, tasked with a critical review of the podcast script before it goes to audio production. Your review must focus on three key areas:

1. Flow and Clarity: Ensure that the dialogue between the host and guest flows naturally and is clear for listeners. The script must be optimized for text-to-speech conversion, paying close attention to pronunciation, pacing, and expression.

2. File System Uniqueness: Verify that the filenames suggested for each audio segment are unique and follow a logical naming convention. This is crucial to avoid overwriting files and to ensure seamless integration in the final podcast episode.

3. Content Quality and Rewrites: Assess the content for its informational value, engagement, and suitability for the podcast's audience. You have the authority to rewrite parts of the dialogue to enhance clarity, engagement, or factual accuracy. Your goal is to refine the script to a point where it translates effectively into an engaging audio experience.

After your review, you may either approve the script for audio production or make necessary adjustments. If adjustments are made, clearly indicate the changes and provide updated filenames if necessary. Your input will directly influence the quality of the final podcast episode.
"""
)
editor_node = functools.partial(agent_node, agent=editor_agent, name="Editor")


#Audio Mixer Agent
audio_agent = create_agent(
    model,
    [text_to_speech, edit_podcast_audio],
    system_message="""
You are responsible for producing the final audio for the podcast episode. Take the structured script provided by the Podcast Planner, which contains segments marked with 'speaker' (either 'host' or 'guest'), the 'text' for each segment, and the 'voice' model to use.

For each segment, use the 'text_to_speech' tool to generate audio, specifying the 'text' and 'voice' as provided. Ensure each segment is saved as a separate audio file.

After generating all segments, use the 'edit_podcast_audio' tool to combine these audio files into one seamless podcast episode. The audio files should be combined in the order they are provided in the script, with appropriate pauses between segments to simulate a natural conversation flow.

Your output should be the path to the final combined podcast episode audio file.
"""

)
audio_node = functools.partial(agent_node, agent=audio_agent, name="Audio")

     

In [29]:
def tool_node(state):
    """This runs tools in the graph

    It takes in an agent action and calls that tool and returns the result."""
    messages = state["messages"]
    # Based on the continue condition
    # we know the last message involves a function call
    last_message = messages[-1]
    # We construct an ToolInvocation from the function_call
    tool_input = json.loads(
        last_message.additional_kwargs["function_call"]["arguments"]
    )
    # We can pass single-arg inputs by value
    if len(tool_input) == 1 and "__arg1" in tool_input:
        tool_input = next(iter(tool_input.values()))
    tool_name = last_message.additional_kwargs["function_call"]["name"]
    action = ToolInvocation(
        tool=tool_name,
        tool_input=tool_input,
    )
    # We call the tool_executor and get back a response
    response = tool_executor.invoke(action)
    # We use the response to create a FunctionMessage
    function_message = FunctionMessage(
        content=f"{tool_name} response: {str(response)}", name=action.tool
    )
    # We return a list, because this will get added to the existing list
    return {"messages": [function_message]}

In [30]:
def router(state):
    # This is the router
    messages = state["messages"]
    last_message = messages[-1]
    if "function_call" in last_message.additional_kwargs:
        # The previus agent is invoking a tool
        return "call_tool"
    if "FINAL ANSWER" in last_message.content:
        # Any agent decided the work is done
        return "end"
    return "continue"


In [31]:
from langgraph.graph import END, StateGraph

# Assuming AgentState, tool_node, and all other nodes are defined elsewhere in your code.

# Initialize the graph with the state type
workflow = StateGraph(AgentState)

# Add nodes for each part of the workflow
workflow.add_node("Researcher", research_node)
workflow.add_node("Podcast Planner", podcast_planner_node)
workflow.add_node("Editor", editor_node)  # Editor node added
workflow.add_node("Audio", audio_node)

# Node to handle tool invocations
workflow.add_node("call_tool", tool_node)

# Define the flow from Researcher to Podcast Planner
workflow.add_conditional_edges(
    "Researcher",
    router,
    {"continue": "Podcast Planner", "call_tool": "call_tool", "end": END},
)

# Define the flow from Podcast Planner to Editor
workflow.add_conditional_edges(
    "Podcast Planner",
    router,
    {"continue": "Editor", "call_tool": "call_tool", "end": END},
)

# Define the flow from Editor to Audio
workflow.add_conditional_edges(
    "Editor",
    router,
    {"continue": "Audio", "call_tool": "call_tool", "end": END},
)

# Define the flow for the Audio node
workflow.add_conditional_edges(
    "Audio",
    router,
    {"continue": END, "call_tool": "call_tool", "end": END},  # After Audio, the process ends or calls a tool
)

# Define how the graph transitions back from calling a tool
workflow.add_conditional_edges(
    "call_tool",
    lambda state: state["sender"],  # Routes back to the original agent who invoked the tool
    {
        "Researcher": "Researcher",
        "Podcast Planner": "Podcast Planner",
        "Editor": "Editor",
        "Audio": "Audio",
    },
)

# Set the entry point for the graph to "Researcher"
workflow.set_entry_point("Researcher")

# Compile the graph
graph = workflow.compile()

In [None]:

from langchain_core.messages import HumanMessage

# Start the graph with an initial message that represents the podcast topic or question
initial_message = HumanMessage(
    content="Research the upcoming 2024 superbowl, some sub plots about Pat Mahomes, Kelce and Taylor swift as well as well as the halftime performance"
)

# Stream through the graph, processing each step according to the defined workflow
for state in graph.stream(
    {"messages": [initial_message], "sender": "User"},  # Initial state with the message from the user
    {"recursion_limit": 150}  # Set a limit to prevent infinite loops
):
    print(state)  # Print out the state at each step to observe the progress
    print("----")

# This loop will go through the research, planning, audio production, and potentially tool invocation steps,
# depending on how your nodes and router logic are set up.



{'Researcher': {'messages': [HumanMessage(content='```tool_code\n{"tavily_search_results_json": {"query": "2024 Super Bowl, Patrick Mahomes, Travis Kelce, Taylor Swift, halftime show"}}\n```', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': [{'category': 'HARM_CATEGORY_HATE_SPEECH', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_DANGEROUS_CONTENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_HARASSMENT', 'probability': 'NEGLIGIBLE', 'blocked': False}, {'category': 'HARM_CATEGORY_SEXUALLY_EXPLICIT', 'probability': 'NEGLIGIBLE', 'blocked': False}]}, name='Researcher', id='run-5419c168-bfb4-49cd-9b29-84af3c3f2004-0', tool_calls=[], invalid_tool_calls=[], usage_metadata={'input_tokens': 182, 'output_tokens': 42, 'total_tokens': 224, 'input_token_details': {'cache_read': 0}})]}}
----
