<a href="https://colab.research.google.com/github/Saloni1707/TrainModel/blob/main/TTSagentLangraph.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
from typing import TypedDict
from langgraph.graph import StateGraph,END
from IPython.display import display,Audio,Markdown
from openai import OpenAI
from getpass import getpass
import os
os.environ["OPENAI_API_KEY"]=getpass("Enter your key: ")

Enter your key: ··········


In [27]:
client = OpenAI()
class AgentState(TypedDict):
  input_text:str
  processed_text:str
  audio_data:bytes
  audio_path:str
  content_type:str

In [28]:
###Node functions
import io
import tempfile
def classify_input(state:AgentState) -> AgentState:
  response=client.chat.completions.create(
  model="gpt-4o-mini",
  messages=[
      {"role":"system","content":"Classify the content as one of:'general','poem','news','joke'."},
      {"role":"user","content":state["input_text"]}
    ]
  )
  state["content_type"]=response.choices[0].message.content.strip().lower()
  return state

def process_general(state:AgentState) -> AgentState:
  state["processed_text"]=state["input_text"]
  return state

def process_poems(state:AgentState) -> AgentState:
  response=client.chat.completions.create(
      model="gpt-4o-mini",
      messages=[
          {"role":"system","content":"Rewrite the following text as a short, beautiful poem:"},
          {"role":"user","content":state["input_text"]}
      ]
  )
  state["processed_text"]=response.choices[0].message.content.strip()
  return state

def process_news(state: AgentState) -> AgentState:
    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "Rewrite the following text in a formal news anchor style:"},
            {"role": "user", "content": state["input_text"]}
        ]
    )
    state["processed_text"] = response.choices[0].message.content.strip()
    return state

def process_joke(state:AgentState) -> AgentState:
    response=client.chat.completions.create(
      model="gpt-4o-mini",
      messages=[
        {"role":"system","content":"Turn the following text into a short, funny joke:"},
        {"role":"user","content":state["input_text"]}
      ]
    )
    state["processed_text"]=response.choices[0].message.content.strip()
    return state

def text_to_speech(state:AgentState,save_file:bool=False) -> AgentState:
  voice_map={
      "general":"alloy",
      "poem":"nova",
      "news":"onyx",
      "joke":"shimmer"
  }
  voice = voice_map.get(state["content_type"],"alloy")
  audio_data=io.BytesIO()

  with client.audio.speech.with_streaming_response.create(
      model="tts-1",
      voice=voice,
      input=state["processed_text"]
  ) as response:
    for chunk in response.iter_bytes():
      audio_data.write(chunk)

    state["audio_data"]= audio_data.getvalue()

    if save_file:
      with tempfile.NamedTemporaryFile(delete=False,suffix=".mp3") as temp_audio:
        temp_audio.write(state["audio_data"])
        state["audio_path"]=temp_audio.name
  return state

In [33]:
# @title Default title text
#Langgraph pipeline here
workflow=StateGraph(AgentState)
workflow.add_node("classify_input",classify_input)
workflow.add_node("process_general",process_general)
workflow.add_node("process_poems",process_poems)
workflow.add_node("process_news",process_news)
workflow.add_node("process_joke",process_joke)
workflow.add_node("text_to_speech",text_to_speech)

#set the entry point of the graph
workflow.set_entry_point("classify_input")
workflow.add_conditional_edges(
    "classify_input",
    lambda x:x["content_type"],{
        "general":"process_general",
        "poem":"process_poems",
        "news":"process_news",
        "joke":"process_joke"
    }
)

#attach the process to TTS
workflow.add_edge("process_general","text_to_speech")
workflow.add_edge("process_poems","text_to_speech")
workflow.add_edge("process_news","text_to_speech")
workflow.add_edge("process_joke","text_to_speech")
#compile the graph here
app = workflow.compile() # Compiled the workflow and assigned to app

In [30]:
import re
def filename(text, max_length=20):
    """Convert text to a valid and concise filename."""
    sanitized = re.sub(r'[^\w\s-]', '', text.lower())
    sanitized = re.sub(r'[-\s]+', '_', sanitized)
    return sanitized[:max_length]

In [31]:
def run_tts(input_text:str,content_type:str,save_file:bool=True):
  result=app.invoke({ # Corrected app.invoke to workflow.invoke
      "input_text":input_text,
      "processed_text":"",
      "audio_data":"",
      "audio_path":"",
      "content_type":content_type
  })
  print(f"Type of your content:{result['content_type']}")
  print(f"Processed Text:{result['processed_text']}")

  display(Audio(result['audio_data'],autoplay=True))
  if save_file:
    audio_dir=os.path.join('..','audio')
    os.makedirs(audio_dir,exist_ok=True)
    filen=filename(input_text)
    file_name=f"{content_type}_{filen}.mp3"
    file_path=os.path.join(audio_dir,file_name)
    with open(file_path,'wb') as f:
      f.write(result['audio_data'])
    print(f"Audio saved at:{file_path}")
    github_relative_path = f"../audio/{file_name}"#path for github
    display(Markdown(f"[Download {content_type} audio: {filen}]({github_relative_path})"))
    print("Audio playback not supported here")
  else:
    print("Audio not saved")

  return result

In [None]:
examples = {
    "general": "The quick brown fox jumps over the lazy dog.",
    "poem": "Roses are red, violets are blue, AI is amazing, and so are you!",
    "news": "Breaking news: Scientists discover a new species of deep-sea creature in the Mariana Trench.",
    "joke": "Why don't scientists trust atoms? Because they make up everything!"
}
for content_type,text in examples.items():
      print(f"\nProcessing example for {content_type} content:")
      print(f"Input text: {text}")
      result=run_tts(text,content_type,save_file=True)
      print("-"*30)

print("All downloads processed!")

