# News Podcaster Demo
Use OpenAI Assistant to generate a script then convert it into audio scripts


In [1]:
from dotenv import load_dotenv
load_dotenv()

True

### Fetch new articles

In [4]:
import requests
import json
import os
import random

news_api = os.getenv("NEWS_API")

categories = [
    "business",
    "crime",
    "domestic",
    "education",
    "entertainment",
    "environment",
    "food",
    "health",
    "other",
    "politics",
    "science",
    "sports",
    "technology",
    "top",
    "tourism",
    "world",
]

countries = ["nz", "au", "us", "hk"]


category = random.choice(categories)
country = random.choice(countries)

print(country)
print(category)


nz
business


In [3]:
# url = f"https://newsapi.org/v2/top-headlines?category={category}&country={country}&apiKey={news_api}"
url = f"https://newsdata.io/api/1/news?category={category}&country={country}&full_content=1&apikey={news_api}"
response = requests.get(url)
response_json = response.json()
print(json.dumps(response_json, indent=4))

{
    "status": "success",
    "totalResults": 709,
    "results": [
        {
            "article_id": "293c67b37d3f7cc750a4123e7f37ba10",
            "title": "Aussie sevens dominate again",
            "link": "https://www.brisbanetimes.com.au/sport/rugby-union/aussie-sevens-dominate-again-20231210-p5eqch.html?ref=rss&utm_medium=rss&utm_source=rss_sport",
            "keywords": [
                "Sport / Rugby Union"
            ],
            "creator": null,
            "video_url": null,
            "description": "Australia's men and women enjoyed a near-perfect first day of the Sevens in Cape Town, topping their respective pools.",
            "content": "Aussie sevens dominate again Australia's men and women enjoyed a near-perfect first day of the Sevens in Cape Town, topping their respective pools.",
            "pubDate": "2023-12-10 19:48:02",
            "image_url": null,
            "source_id": "brisbanetimes",
            "source_priority": 189315,
            "count

In [12]:
from pydantic import BaseModel, Field
from typing import Optional

# Define the NewsArticle model
class NewsArticle(BaseModel):
    """
    NewsArticle model represents an article fetched from the news API.
    """
    id: str
    title: str
    link: str
    description: Optional[str] = Field(None)
    content: Optional[str] = Field(None)



In [13]:
articles = []

for article in response_json["results"]:
    article_obj = NewsArticle(
        id=article["article_id"],
        title=article["title"],
        url=article["link"],
        description=article["description"],
        content=article['content'],
    )
    articles.append(article_obj)


print(articles[0])

id='efe98270bcaddf3d86e29fcc25db035e' title='Impact of Imperfect Timekeeping on Quantum Control And Computing' url='https://hackaday.com/2023/12/09/impact-of-imperfect-timekeeping-on-quantum-control-and-computing/' description='In classical control theory, both open-loop and closed-loop control systems are commonly used. These systems are well understood and rather straightforward, controlling everything from washing machines to industrial equipment to …read more' content='In classical control theory, both open-loop and closed-loop control systems are commonly used. These systems are well understood and rather straightforward, controlling everything from washing machines to industrial equipment to the classical computing devices that make today’s society work. When trying to transfer this knowledge to the world of quantum control theory, however, many issues arise. The most pertinent ones involve closed-loop quantum control and the clocking of quantum computations. With physical limita

In [109]:
# from bs4 import BeautifulSoup
# articles = []

# for article in response_json['articles']:
#     article_url = article['url']
#     article_response = requests.get(article_url)
#     soup = BeautifulSoup(article_response.text, 'html.parser')
#     content = soup.find_all('p')
#     article_content = ' '.join([p.text for p in content])
#     article_obj = NewsArticle(title=article['title'], url=article['url'], description=article['description'], content=article_content)
#     articles.append(article_obj)



In [None]:
# from markdownify import MarkdownConverter

# for article in articles:
#     article.title = MarkdownConverter(heading_style="atx").convert(article.title)
#     article.description = MarkdownConverter(heading_style="atx").convert(article.description) if article.description else ""
#     article.content = MarkdownConverter(heading_style="atx").convert(article.content) if article.content else ""

# print(len(articles))
# print(articles[0])

In [14]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4-turbo")

total_token = len(encoding.encode(str(articles)))

print(f"total_token: {total_token}") 
print(f"total estimated cost: {total_token/1000*0.01}")

total_token: 7230
total estimated cost: 0.0723


### Generate characters

In [15]:
from pydantic import BaseModel, Field

class PodcastCharacter(BaseModel):
    """A model representing a podcast character"""

    default_voice: str = Field(..., description="Default voice for the podcaster")
    voice_options: list[str] = Field(
        ..., description="Available voice_options options of the podcaster"
    )
    name: str = Field(..., description="Name of the podcaster")
    role: str = Field(
        ..., description="The primary role of the character in the podcast"
    )
    interest: str = Field(..., description="Main area of interest or expertise")
    political_view: str = Field(..., description="Political leanings of the character")
    specialty: str = Field(..., description="Specific area of focus or specialty")
    description: str = Field(..., description="A brief description about the character")



# Creating instances of the PodcastCharacter model with descriptions for each character
alexa_reef = PodcastCharacter(
    default_voice="Nova",
    voice_options=["Shimmer", "Nova"],
    name="Alexa Reef",
    role="Environmental Journalist",
    interest="Marine Biology",
    political_view="Left-Leaning",
    specialty="Climate Change and Conservation",
    description="Passionate about environmental issues, Alexa brings a scientific perspective to discussions on climate change and sustainability.",
)

raj_cybertech = PodcastCharacter(
    default_voice="Echo",
    voice_options=["Alloy", "Echo", "Fable", "Onyx"],
    name="Raj Cybertech",
    role="Technology Reporter",
    interest="Tech Trends",
    political_view="Moderate",
    specialty="Impact of Technology on Society",
    description="With a keen eye on emerging technologies, Raj delves into how tech innovations influence modern society and economy.",
)

michael_neuro_johnson = PodcastCharacter(
    default_voice="Fable",
    voice_options=["Alloy", "Echo", "Fable", "Onyx"],
    name="Michael Neuro Johnson",
    role="Sports Commentator",
    interest="Sports and Fitness",
    political_view="Right-Leaning",
    specialty="Personal Responsibility in Sports",
    description="A former athlete, Michael offers insights into the sports world, emphasizing personal achievement and fitness.",
)

lena_logic = PodcastCharacter(
    default_voice="Shimmer",
    voice_options=["Shimmer", "Nova"],
    name="Lena Logic",
    role="Investigative Journalist",
    interest="International Relations",
    political_view="Balanced",
    specialty="World News and Political Affairs",
    description="Lena provides a nuanced and analytical perspective on global political affairs, valuing balanced reporting.",
)

elijah_byte = PodcastCharacter(
    default_voice="Onyx",
    voice_options=["Alloy", "Echo", "Fable", "Onyx"],
    name="Elijah Byte",
    role="Political Commentator",
    interest="Economics",
    political_view="Conservative",
    specialty="Economic and Political Debate",
    description="Elijah offers a conservative viewpoint on political and economic issues, often sparking lively debates.",
)

sophia_algorithm = PodcastCharacter(
    default_voice="Nova",
    voice_options=["Shimmer", "Nova"],
    name="Sophia Algorithm",
    role="Cultural Critic",
    interest="Social Justice",
    political_view="Progressive",
    specialty="Social and Cultural Impact",
    description="Sophia addresses social and cultural topics with a progressive lens, advocating for social justice and inclusivity.",
)

david_data = PodcastCharacter(
    default_voice="Alloy",
    voice_options=["Alloy", "Echo", "Fable", "Onyx"],
    name="David Data",
    role="Financial Analyst",
    interest="Market Trends",
    political_view="Centrist",
    specialty="Economic News Analysis",
    description="David brings a data-driven approach to economic analysis, focusing on market trends and financial policies.",
)

nora_neural = PodcastCharacter(
    default_voice="Shimmer",
    voice_options=["Shimmer", "Nova"],
    name="Nora Neural",
    role="Freelance Journalist",
    interest="Human Rights",
    political_view="Humanitarian",
    specialty="Global Health Issues",
    description="Nora highlights the human stories behind the news, focusing on human rights and global health concerns.",
)

# List of character instances
characters = [
    alexa_reef,
    raj_cybertech,
    michael_neuro_johnson,
    lena_logic,
    elijah_byte,
    sophia_algorithm,
    david_data,
    nora_neural,
]

characters  # Displaying the list of character instances with descriptions


[PodcastCharacter(default_voice='Nova', voice_options=['Shimmer', 'Nova'], name='Alexa Reef', role='Environmental Journalist', interest='Marine Biology', political_view='Left-Leaning', specialty='Climate Change and Conservation', description='Passionate about environmental issues, Alexa brings a scientific perspective to discussions on climate change and sustainability.'),
 PodcastCharacter(default_voice='Echo', voice_options=['Alloy', 'Echo', 'Fable', 'Onyx'], name='Raj Cybertech', role='Technology Reporter', interest='Tech Trends', political_view='Moderate', specialty='Impact of Technology on Society', description='With a keen eye on emerging technologies, Raj delves into how tech innovations influence modern society and economy.'),
 PodcastCharacter(default_voice='Fable', voice_options=['Alloy', 'Echo', 'Fable', 'Onyx'], name='Michael Neuro Johnson', role='Sports Commentator', interest='Sports and Fitness', political_view='Right-Leaning', specialty='Personal Responsibility in Sports

### Selecting Character and Articles

In [23]:
from pydantic import BaseModel, validator

class SelectedCharacter(BaseModel):
    name: str
    voice: str

    @validator('name')
    def validate_name(cls, v):
        valid_names = [character.name for character in characters]
        if v not in valid_names:
            raise ValueError(f'Name must be one of the characters from the characters list: {valid_names}')
        return v

    @validator('voice')
    def validate_voice(cls, v, values):
        if 'name' in values:
            character = next((char for char in characters if char.name == values['name']), None)
            if character and v not in character.voice_options:
                raise ValueError(f"Voice must be within the voice_options list for {values['name']}. Voice Options: {character.voice_options}")
        return v
    


class SelectedArticle(BaseModel):
    id: str = Field(..., description="The ID of the selected article.")
    title: str = Field(..., description="Example: 'Resident of home that exploded near DC as police tried to execute search warrant identified as suspect and presumed dead - CNN'")
    rationale: str = Field(..., description="A summary of the podcast show's content and themes.")


    @validator('id')
    def validate_id(cls, v):
        article_ids = [article.id for article in articles]
        if v not in article_ids:
            raise ValueError(f'ID must be one of the articles from the articles list: {article_ids}')
        return v
    
    # @validator('title')
    # def validate_title(cls, v):
    #     article_titles = [article.title for article in articles]
    #     if v not in article_titles:
    #         raise ValueError(f'Title must be one of the articles from the articles list: {article_titles}')
    #     return v







/var/folders/m3/5kvp29j54zb5q123n2f50z_40000gp/T/ipykernel_30834/4094992767.py:7: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  @validator('name')
/var/folders/m3/5kvp29j54zb5q123n2f50z_40000gp/T/ipykernel_30834/4094992767.py:14: PydanticDeprecatedSince20: Pydantic V1 style `@validator` validators are deprecated. You should migrate to Pydantic V2 style `@field_validator` validators, see the migration guide for more details. Deprecated in Pydantic V2.0 to be removed in V3.0. See Pydantic V2 Migration Guide at https://errors.pydantic.dev/2.4/migration/
  @validator('voice')
/var/folders/m3/5kvp29j54zb5q123n2f50z_40000gp/T/ipykernel_30834/4094992767.py:30: PydanticDeprecatedSince20: Pydantic V1 style `@validato

In [27]:
# Example usage of SelectedCharacter
selected_character = SelectedCharacter(name="Nora Neural", voice="Shimmer")
selected_article = SelectedArticle(
    id=articles[0].id,
    title=articles[0].title,
    rationale="Nora is a good fit for the article",
)

### Generate guidelines

In [28]:
from typing import List

# Define the model using Pydantic
class GuidelinePoint(BaseModel):
    """
    A model representing a guideline point for the podcast.
    Each guideline point has a category and a list of points.
    """
    category: str
    points: List[str]

# Creating instances of the model
guidelines = [
    GuidelinePoint(
        category="News and Current Events",
        points=[
            "Focus on the most recent and impactful stories.",
            "Provide a brief background for context, especially for ongoing issues.",
            "Highlight the implications of the news on society, economy, or specific communities.",
            "Include diverse perspectives and avoid bias in reporting."
        ]
    ),
    GuidelinePoint(
        category="Technology and Innovations",
        points=[
            "Explain complex technical concepts in layman's terms.",
            "Discuss the potential impact of new technologies on everyday life.",
            "Explore both the benefits and challenges associated with technological advancements.",
            "Include expert opinions or predictions about future trends."
        ]
    ),
    GuidelinePoint(
        category="Environmental and Sustainability Topics",
        points=[
            "Emphasize the relevance of environmental issues to the listener's daily life.",
            "Discuss both local and global perspectives on sustainability.",
            "Offer practical tips or solutions for living a more sustainable lifestyle.",
            "Highlight success stories and positive developments in environmental conservation."
        ]
    ),
    GuidelinePoint(
        category="Cultural and Social Issues",
        points=[
            "Address cultural trends, social movements, and major events in arts and entertainment.",
            "Respect and represent diverse viewpoints and cultural backgrounds.",
            "Discuss the broader societal impact of cultural phenomena.",
            "Incorporate historical context where relevant to understand current trends."
        ]
    )
    # More categories can be added similarly
]

guidelines # Display the guidelines list



[GuidelinePoint(category='News and Current Events', points=['Focus on the most recent and impactful stories.', 'Provide a brief background for context, especially for ongoing issues.', 'Highlight the implications of the news on society, economy, or specific communities.', 'Include diverse perspectives and avoid bias in reporting.']),
 GuidelinePoint(category='Technology and Innovations', points=["Explain complex technical concepts in layman's terms.", 'Discuss the potential impact of new technologies on everyday life.', 'Explore both the benefits and challenges associated with technological advancements.', 'Include expert opinions or predictions about future trends.']),
 GuidelinePoint(category='Environmental and Sustainability Topics', points=["Emphasize the relevance of environmental issues to the listener's daily life.", 'Discuss both local and global perspectives on sustainability.', 'Offer practical tips or solutions for living a more sustainable lifestyle.', 'Highlight success st

### Generate a show

In [29]:
# Define the Show model
class PodcastShow(BaseModel):
    """
    A model representing a podcast show. It includes the show description, host, guests, guidelines, and news articles.
    """
    show_description: str = Field(..., description="A summary of the podcast show's content and themes.")
    host_name: SelectedCharacter = Field(..., description="The host's name must be one of the Podcast Characters.")
    guest_names: List[SelectedCharacter] = Field(..., description="The guest must be one of the Podcast Characters.")
    guidelines: GuidelinePoint = Field(..., description="The guideline for the podcast show.")
    news_articles: List[SelectedArticle] = Field(..., description="The list of news articles to be discussed in the show.")
    show_rationale: str = Field(
        ..., 
        description="Provides the reasoning behind the selection of specific host and guests, articles, and guidelines for each article selected, ensuring a cohesive theme and relevant discussion."
    )

    from pydantic import model_validator
    @model_validator(mode='after')
    def check_guest_voices(self) -> 'PodcastShow':
        
        guests = self.guest_names
        host = self.host_name
        participants = guests + [host]
        voices = set()
        for person in participants:
            if person.voice in voices:
                raise ValueError('Guests must have unique voices.')
            voices.add(person.voice)
        return self


In [31]:
# Example use case of PodcastShow
def example_use_case():
    # Create an instance of PodcastShow with example data
    example_show = PodcastShow(
        show_description="Today's discussion will revolve around the impact of technology on society.",
        host_name=SelectedCharacter(
            voice="Alloy",
            name="Raj Cybertech",
        ),
        guest_names=[
            SelectedCharacter(
                voice="Shimmer",
                name="Alexa Reef",
            ),
            SelectedCharacter(voice="Echo", name="Michael Neuro Johnson"),
        ],
        guidelines=GuidelinePoint(
            category="Technology and Innovations",
            points=[
                "Explain complex technical concepts in layman's terms.",
                "Discuss the potential impact of new technologies on everyday life.",
                "Explore both the benefits and challenges associated with technological advancements.",
                "Include expert opinions or predictions about future trends.",
            ],
        ),
        news_articles=[
            SelectedArticle(
                id=articles[0].id,
                title=articles[0].title,
                rationale="This article is great because Raj is great at talking tech"
            )
        ],
        show_rationale="We've selected Raj Cybertech as the host to provide a balanced and expert perspective on the technological theme of today's show. Alexa Reef will be our guest to discuss the environmental implications of tech advancements.",
    )
    return example_show


example_use_case()

PodcastShow(show_description="Today's discussion will revolve around the impact of technology on society.", host_name=SelectedCharacter(name='Raj Cybertech', voice='Alloy'), guest_names=[SelectedCharacter(name='Alexa Reef', voice='Shimmer'), SelectedCharacter(name='Michael Neuro Johnson', voice='Echo')], guidelines=GuidelinePoint(category='Technology and Innovations', points=["Explain complex technical concepts in layman's terms.", 'Discuss the potential impact of new technologies on everyday life.', 'Explore both the benefits and challenges associated with technological advancements.', 'Include expert opinions or predictions about future trends.']), news_articles=[SelectedArticle(id='efe98270bcaddf3d86e29fcc25db035e', title='Impact of Imperfect Timekeeping on Quantum Control And Computing', rationale='This article is great because Raj is great at talking tech')], show_rationale="We've selected Raj Cybertech as the host to provide a balanced and expert perspective on the technological 

In [75]:
from llama_index.llms import OpenAI
from llama_index.program import OpenAIPydanticProgram

prompt_template_str = """
You are a podcast writer for a show named 'PodGen'. Generate content for today's show with a host, a list of guests (min 1 guest, max 3 guests), guidelines and news articles (min 1 article, max 5 articles). The show should pick the relevant guest, host and guideline based on the news articles provided.
The articles should all share a cohensive theme and can be linked together in a wider topic.
Use the following available options:
Host: {host}

Guests: {guests}

Guidelines: {guidelines}

News Articles: {articles}
"""
program = OpenAIPydanticProgram.from_defaults(
    llm=OpenAI(model="gpt-4-1106-preview"),
    output_cls=PodcastShow,
    prompt_template_str=prompt_template_str,
    verbose=True,
)

In [76]:
output = program(host=characters, guests=characters, guidelines=guidelines, articles=articles)


Function call: PodcastShow with args: {
  "show_description": "Today's episode of 'PodGen' dives into the fascinating world of technological advancements and their impact on society. We'll explore the latest in quantum computing, the cultural influence of gaming, advancements in space exploration, and the ethical considerations of technological progress.",
  "host_name": {
    "name": "Raj Cybertech",
    "voice": "Echo"
  },
  "guest_names": [
    {
      "name": "Lena Logic",
      "voice": "Shimmer"
    },
    {
      "name": "David Data",
      "voice": "Alloy"
    }
  ],
  "guidelines": {
    "category": "Technology and Innovations",
    "points": [
      "Explain complex technical concepts in layman's terms.",
      "Discuss the potential impact of new technologies on everyday life.",
      "Explore both the benefits and challenges associated with technological advancements.",
      "Include expert opinions or predictions about future trends."
    ]
  },
  "news_articles": [
    

In [None]:

selected_articles = [article for article in articles if article.id in [selected_article.id for selected_article in output.news_articles]]

In [77]:
template = f"""You help write podcast scripts for a show called "Podgen". Your objective is to write a podcast script based on the show's description, characters, guideline and the articles.
Show description: {output.show_description}
Show rationale: {output.show_rationale}
----
Character details:
Host: {output.host_name} 
Guest(s): {output.guest_names}
----
Distill these stories into easy-to-understand narratives, following these guidelines:
{output.guidelines}
In additional to the guidelines, you must not talk about each story individually. Instead, you must bring all these stories together into one narrative. I.e. each guest must be able to link a article with another article.
----
The script should be a conversation between the interviewer (Host) and the guest(s), with no additional information like headers or subheaders. Host and Guest dialogues should be clearly marked. Example:
Raj Cybertech: <Message>
Nora Neural: <Message>
---
You must use all the news articles provided. The news articles are as followed:
{selected_articles}
Each article was selected for a reason. The reason are as followed:
{output.news_articles}
---
At the end of the show, ask the audience if they have any questions for the host and guest regarding to the content of the show.
"""

In [78]:
from llama_index.llms import OpenAI

response = OpenAI(model="gpt-4-1106-preview", temperature=0.1).complete(template)

print(response)

Raj Cybertech: Welcome to PodGen, where we decode the complex web of technology and its societal ripples. I'm your host, Raj Cybertech, and today we're threading the needle through quantum computing, the cultural tapestry of gaming, the vastness of space exploration, and the roadways of the future with Tesla's Cybertruck. Joining me are Lena Logic, with her finger on the pulse of world news, and David Data, our economic analyst. Let's dive in. Lena, quantum computing is a term that buzzes with potential. Can you simplify this for us?

Lena Logic: Absolutely, Raj. Imagine a computer that doesn't just work in ones and zeros, but in a state that can be both at the same time. That's the crux of quantum computing. However, a recent article highlighted a hiccup in this tech—timekeeping. Quantum computers need precise timing to function, but the more we push the clock rate, the more accuracy suffers. It's a delicate dance of speed and precision.

David Data: And that precision is crucial, not

In [48]:
def split_text_by_speaker(text, host_name, guest_names):
    speaker_text = {}
    lines = [line for line in text.strip().split("\n") if line != '']
    for i, line in enumerate(lines):
        if ": " in line:
            speaker, speech = line.split(": ", 1)
            if host_name in speaker:
                speaker_key = f"{i} {host_name}"
            elif any(guest_name in speaker for guest_name in guest_names):
                speaker_key = f"{i} {speaker}"
            else:
                continue  # Skip lines that don't match the host or any guest
            speaker_text[speaker_key] = speech
    return speaker_text

host_name = output.host_name.name
guest_names = [guest.name for guest in output.guest_names]
speaker_map = split_text_by_speaker(str(response), host_name, guest_names)


In [49]:
speaker_map

{'0 Raj Cybertech': "Welcome to PodGen, where we decode the tech shaping our world. I'm your host, Raj Cybertech, and today we're diving into the pulse of technological innovation. Joining me are two brilliant minds: Lena Logic, with her analytical prowess on global affairs, and David Data, our economic analysis maestro. Welcome to the show!",
 '1 Lena Logic': 'Thanks for having me, Raj. Excited to be here!',
 '2 David Data': 'Pleasure to be on board, Raj. Looking forward to our discussion.',
 '3 Raj Cybertech': "Let's kick things off with quantum computing. A recent article highlighted the impact of imperfect timekeeping on quantum control and computing. Lena, can you break this down for us in layman's terms?",
 '4 Lena Logic': "Absolutely, Raj. Quantum computing is like upgrading from a bicycle to a sports car in the computing world. But this sports car is so advanced, it needs a super precise clock to work correctly. The article discusses how even tiny errors in timekeeping can caus

In [None]:
# def strip_source_and_url_from_string(speaker_map):
#     import re
#     for key, value in speaker_map.items():
#         speaker_map[key] = re.sub(r'\[Source\]\(.*\)', '', value).strip()
#     return speaker_map

# speaker_map = strip_source_and_url_from_string(speaker_map)
# speaker_map


In [70]:
import os
import requests
import io
import tempfile

def text_to_audio(file_name, text, voice):
    response = requests.post(
        "https://api.openai.com/v1/audio/speech",
        headers={
            "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
        },
        json={
            "model": "tts-1",
            "input": text,
            "voice": f'{voice if voice else "alloy"}',
        },
    )

    # audio_file_path = f"{file_name}.wav"
    # with open(audio_file_path, "wb") as audio_file:
    #     for chunk in response.iter_content(chunk_size=1024 * 1024):
    #         audio_file.write(chunk)

    # # To play the audio in Jupyter after saving
    # Audio(audio_file_path)
    # Check if the request was successful
    if response.status_code != 200:
        raise Exception("Request failed with status code")
    # ...
    # Create an in-memory bytes buffer
    audio_bytes_io = io.BytesIO()

    # Write audio data to the in-memory bytes buffer
    for chunk in response.iter_content(chunk_size=1024 * 1024):
        audio_bytes_io.write(chunk)

    # Important: Seek to the start of the BytesIO buffer before returning
    audio_bytes_io.seek(0)

    # Save audio to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, prefix=file_name, suffix=".wav") as tmpfile:
        for chunk in response.iter_content(chunk_size=1024 * 1024):
            tmpfile.write(chunk)
        audio_filename = tmpfile.name

    return audio_filename, audio_bytes_io

In [65]:
def extract_speaker_name(string):
    name = string.split(' ', 1)[1]
    return name

def get_voice_for_speaker(speaker):
    if speaker in output.host_name.name:
        voice = output.host_name.voice
    else:
        for guest in output.guest_names:
            if guest.name == speaker:
                voice = guest.voice
    return voice


In [72]:
audio_bytes_combined = io.BytesIO()
temp_files = []
for item in speaker_map.items():
    speaker_name = extract_speaker_name(item[0])
    # Assuming get_voice_for_speaker is a function that retrieves the voice for the given speaker
    speaker_voice = get_voice_for_speaker(speaker_name)
    audio_filename, audio_bytes_io = text_to_audio(
        item[0], item[1], speaker_voice.lower()
    )
    audio_bytes_combined.write(audio_bytes_io.read())
    temp_files.append(audio_filename)
audio_bytes_combined.seek(0)

# # Delete all the temporary files after audio combined
for temp_file in temp_files:
    os.remove(temp_file)

In [None]:
# audio_bytes_combined = io.BytesIO()
# temp_files = []
# for item in speaker_map.items():
#     if 'host' in item[0]:
#         audio_filename, audio_bytes_io = text_to_audio(item[0], item[1], "alloy")
#     elif 'guest' in item[0]:
#         audio_filename, audio_bytes_io = text_to_audio(item[0], item[1], "nova")
#     audio_bytes_combined.write(audio_bytes_io.read())
#     temp_files.append(audio_filename)
# audio_bytes_combined.seek(0)

# # Delete all the temporary files after audio combined
# for temp_file in temp_files:
#     os.remove(temp_file)

In [73]:
print(os.getcwd())
os.chdir("../")
print(os.getcwd())

/Users/Chris_Pang/Developer/Code_Repository/llm_podgen/src
/Users/Chris_Pang/Developer/Code_Repository/llm_podgen


In [74]:
# Save the combined audio to a file in the output folder
# Check if the output directory exists, if not, create it
from datetime import date

audio_file = f'podgen_news_{country}_{category}_{date.today()}'
output_dir = f'output/podgen_news'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

with open(os.path.join(output_dir, f'{audio_file}.wav'), 'wb') as f:
    f.write(audio_bytes_combined.read())
audio_bytes_combined.seek(0)

0