# News Podcaster Demo
Use OpenAI Assistant to generate a script then convert it into audio scripts


In [61]:
from dotenv import load_dotenv
load_dotenv()

True

In [62]:
import requests
import json

news_api = os.getenv("NEWS_API")

url = f"https://newsapi.org/v2/top-headlines?sources=abc-news-au&apiKey={news_api}"

response = requests.get(url)
response_json = response.json()
print(json.dumps(response_json, indent=4))


{
    "status": "ok",
    "totalResults": 10,
    "articles": [
        {
            "source": {
                "id": "abc-news-au",
                "name": "ABC News (AU)"
            },
            "author": "Chris Calcino",
            "title": "Cairns artist David Hudson recalls Island of Dr Moreau experience with Marlon Brando and Val Kilmer",
            "description": "David Hudson has led an extraordinary life, but his stint as a half-human, half-bison hybrid alongside the Godfather star and Val Kilmer while filming The Island of Dr Moreau\u00a0remains his strangest claim to fame.",
            "url": "http://www.abc.net.au/news/2023-04-03/david-hudson-bison-man-brando-island-doctor-moreau-cairns/102171602",
            "urlToImage": "https://live-production.wcms.abc-cdn.net.au/80c7fd8e780bcaa4cddfa095c6fdf2a1?impolicy=wcms_crop_resize&cropH=1080&cropW=1920&xPos=0&yPos=0&width=862&height=485",
            "publishedAt": "2023-04-03T00:40:54Z",
            "content": "David Hu

In [38]:
from bs4 import BeautifulSoup
articles = []

for article in response_json['articles']:
    article_map = {}
    article_url = article['url']
    article_response = requests.get(article_url)
    soup = BeautifulSoup(article_response.text, 'html.parser')
    content = soup.find_all('p')
    article_content = ' '.join([p.text for p in content])
    article_map['title'] = article['title']
    article_map['url'] = article['url']
    article_map['description'] = article['description']
    article_map['content'] = article_content
    articles.append(article_map)



In [41]:
print(json.dumps(articles[0], indent=4))

{
    "title": "Cairns artist David Hudson recalls Island of Dr Moreau experience with Marlon Brando and Val Kilmer",
    "url": "http://www.abc.net.au/news/2023-04-03/david-hudson-bison-man-brando-island-doctor-moreau-cairns/102171602",
    "description": "David Hudson has led an extraordinary life, but his stint as a half-human, half-bison hybrid alongside the Godfather star and Val Kilmer while filming The Island of Dr Moreau\u00a0remains his strangest claim to fame.",
}


In [43]:
from markdownify import MarkdownConverter

for article in articles:
    article['title'] = MarkdownConverter(heading_style="atx").convert(article['title'])
    article['description'] = MarkdownConverter(heading_style="atx").convert(article['description'])
    article['content'] = MarkdownConverter(heading_style="atx").convert(article['content'])

print(json.dumps(articles[0], indent=4))

{
    "title": "Cairns artist David Hudson recalls Island of Dr Moreau experience with Marlon Brando and Val Kilmer",
    "url": "http://www.abc.net.au/news/2023-04-03/david-hudson-bison-man-brando-island-doctor-moreau-cairns/102171602",
    "description": "David Hudson has led an extraordinary life, but his stint as a half-human, half-bison hybrid alongside the Godfather star and Val Kilmer while filming The Island of Dr Moreau\u00a0remains his strangest claim to fame.",
}


In [48]:
import tiktoken
encoding = tiktoken.encoding_for_model("gpt-4-turbo")

total_token = len(encoding.encode(str(articles)))

print(f"total_token: {total_token}") 
print(f"total estimated cost: {total_token/1000*0.01}")

total_token: 21355
total estimated cost: 0.21355000000000002


In [50]:
from llama_index.llms import OpenAI

response = OpenAI(model="gpt-4-1106-preview").complete(
    f"""You help write podcast scripts for a podcast show called "Podgen". Your objective is to handpick a few selected stories within the given list and distil them into easy to understand stories. You  will write a script that is a conversation between an interviewer and a guest.
    The output format should be in markdown:
    Host: <Message>
    Guest: <Message>
    Host: <Message>
    ----
    Characters:
    Host: The host name is Alan, he displays insight, understanding, and tries to relate the concepts to other ideas. 
    Guest: The guest name is Taylor. He is a a commentoator and jouranlist specalised in discussing nuance within the news.
    ----
    Remember to always site your sources (i.e. url) in the markdown.
    The top news articles today are:
    {str(articles)}                    
"""
)

print(response)

```markdown
Host: Welcome to another episode of Podgen, where we distill the news into digestible stories. I'm your host, Alan, and today we're joined by Taylor, a commentator and journalist known for discussing the nuances within the news. Taylor, it's great to have you on the show.

Guest: Thanks for having me, Alan. It's always a pleasure to dive into the stories that are shaping our world.

Host: Let's start with a rather unique story from the world of cinema. David Hudson, a Cairns artist, had an extraordinary experience working on the set of "The Island of Dr. Moreau" alongside Marlon Brando and Val Kilmer. Taylor, can you share more about this?

Guest: Absolutely, Alan. David Hudson's life is filled with remarkable experiences, but his role as a half-human, half-bison hybrid in the mid-1990s film stands out as his strangest claim to fame. The film, based on H.G. Wells' novel, featured Hudson in elaborate prosthetics, working closely with the legendary Marlon Brando. Despite the 

In [53]:
def split_text_by_speaker(text):
    speaker_text = {}

    lines = [line for line in text.strip().split("\n") if line != '']
    for i, line in enumerate(lines):
        if ": " in line:
            speaker, speech = line.split(": ", 1)
            if "Host" in speaker:
                speaker_text[str(i) + '_host'] = speech
            else:
                speaker_text[str(i) + '_guest'] = speech

    return speaker_text

speaker_map = split_text_by_speaker(str(response))

In [54]:
speaker_map

{'1_host': "Welcome to another episode of Podgen, where we distill the news into digestible stories. I'm your host, Alan, and today we're joined by Taylor, a commentator and journalist known for discussing the nuances within the news. Taylor, it's great to have you on the show.",
 '2_guest': "Thanks for having me, Alan. It's always a pleasure to dive into the stories that are shaping our world.",
 '3_host': 'Let\'s start with a rather unique story from the world of cinema. David Hudson, a Cairns artist, had an extraordinary experience working on the set of "The Island of Dr. Moreau" alongside Marlon Brando and Val Kilmer. Taylor, can you share more about this?',
 '4_guest': "Absolutely, Alan. David Hudson's life is filled with remarkable experiences, but his role as a half-human, half-bison hybrid in the mid-1990s film stands out as his strangest claim to fame. The film, based on H.G. Wells' novel, featured Hudson in elaborate prosthetics, working closely with the legendary Marlon Bran

In [55]:
def strip_source_and_url_from_string(speaker_map):
    import re
    for key, value in speaker_map.items():
        speaker_map[key] = re.sub(r'\[source\]\(.*\)', '', value).strip()
    return speaker_map

speaker_map = strip_source_and_url_from_string(speaker_map)
speaker_map


{'1_host': "Welcome to another episode of Podgen, where we distill the news into digestible stories. I'm your host, Alan, and today we're joined by Taylor, a commentator and journalist known for discussing the nuances within the news. Taylor, it's great to have you on the show.",
 '2_guest': "Thanks for having me, Alan. It's always a pleasure to dive into the stories that are shaping our world.",
 '3_host': 'Let\'s start with a rather unique story from the world of cinema. David Hudson, a Cairns artist, had an extraordinary experience working on the set of "The Island of Dr. Moreau" alongside Marlon Brando and Val Kilmer. Taylor, can you share more about this?',
 '4_guest': "Absolutely, Alan. David Hudson's life is filled with remarkable experiences, but his role as a half-human, half-bison hybrid in the mid-1990s film stands out as his strangest claim to fame. The film, based on H.G. Wells' novel, featured Hudson in elaborate prosthetics, working closely with the legendary Marlon Bran

In [56]:
import openai
import os
import requests
import io
import tempfile

def text_to_audio(file_name, text, voice):
    response = requests.post(
        "https://api.openai.com/v1/audio/speech",
        headers={
            "Authorization": f"Bearer {os.getenv('OPENAI_API_KEY')}",
        },
        json={
            "model": "tts-1",
            "input": text,
            "voice": f'{voice if voice else "onyx"}',
        },
    )

    # audio_file_path = f"{file_name}.wav"
    # with open(audio_file_path, "wb") as audio_file:
    #     for chunk in response.iter_content(chunk_size=1024 * 1024):
    #         audio_file.write(chunk)

    # # To play the audio in Jupyter after saving
    # Audio(audio_file_path)
    # Check if the request was successful
    if response.status_code != 200:
        raise Exception("Request failed with status code")
    # ...
    # Create an in-memory bytes buffer
    audio_bytes_io = io.BytesIO()

    # Write audio data to the in-memory bytes buffer
    for chunk in response.iter_content(chunk_size=1024 * 1024):
        audio_bytes_io.write(chunk)

    # Important: Seek to the start of the BytesIO buffer before returning
    audio_bytes_io.seek(0)

    # Save audio to a temporary file
    with tempfile.NamedTemporaryFile(delete=False, prefix=file_name, suffix=".wav") as tmpfile:
        for chunk in response.iter_content(chunk_size=1024 * 1024):
            tmpfile.write(chunk)
        audio_filename = tmpfile.name

    return audio_filename, audio_bytes_io

In [57]:
audio_bytes_combined = io.BytesIO()
temp_files = []
for item in speaker_map.items():
    if 'host' in item[0]:
        audio_filename, audio_bytes_io = text_to_audio(item[0], item[1], "alloy")
    elif 'guest' in item[0]:
        audio_filename, audio_bytes_io = text_to_audio(item[0], item[1], "nova")
    audio_bytes_combined.write(audio_bytes_io.read())
    temp_files.append(audio_filename)
audio_bytes_combined.seek(0)

# Delete all the temporary files after audio combined
for temp_file in temp_files:
    os.remove(temp_file)

In [58]:
print(os.getcwd())
os.chdir("../")
print(os.getcwd())

/Users/Chris_Pang/Developer/Code_Repository/llm_podgen/src
/Users/Chris_Pang/Developer/Code_Repository/llm_podgen


In [59]:
# Save the combined audio to a file in the output folder
# Check if the output directory exists, if not, create it
from datetime import date

output_dir = f'output/daily_news_{date.today()}'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

with open(os.path.join(output_dir, 'openai_tts_combined_audio.wav'), 'wb') as f:
    f.write(audio_bytes_combined.read())
audio_bytes_combined.seek(0)

0