# Getting Started
Podcastfy: Your GenAI-Powered Companion for Transforming Multi-Source Text into Captivating Audio Conversations

In [None]:
import os
from podcastfy.content_parser.content_extractor import ContentExtractor
from podcastfy.content_generator import ContentGenerator
from podcastfy.text_to_speech import TextToSpeech
from podcastfy.utils.config import load_config

In [None]:
%pip install ipython
from IPython.display import Audio, display

def embed_audio(audio_file):
	"""
	Embeds an audio file in the notebook, making it playable.

	Args:
		audio_file (str): Path to the audio file.
	"""
	try:
		display(Audio(audio_file))
		print(f"Audio player embedded for: {audio_file}")
	except Exception as e:
		print(f"Error embedding audio: {str(e)}")

## Setup

The project uses a combination of a `.env` file for managing API keys and sensitive information, and a `config.yaml` file for non-sensitive configuration settings. Follow these steps to set up your configuration:

1. Create a `.env` file in the root directory of the project.
2. Add your API keys and other sensitive information to the `.env` file. For example:

   ```
   JINA_API_KEY=your_jina_api_key_here
   GEMINI_API_KEY=your_gemini_api_key_here
   ELEVENLABS_API_KEY=your_elevenlabs_api_key_here
   OPENAI_API_KEY=your_openai_api_key_here
   ```

3. The `config.yaml` file in the root directory contains non-sensitive configuration settings. You can modify this file to adjust various parameters such as output directories, text-to-speech settings, and content generation options. By default, output audio and transcripts are generated at data/audio and data/transcripts, respectively.

The application will automatically load the environment variables from `.env` and the configuration settings from `config.yaml` when it runs.

See [Configuration](usage/config.md) if you would like to further customize settings.

In [5]:
# Load configuration
config = load_config()

content_extractor = ContentExtractor(config.JINA_API_KEY)
content_generator = ContentGenerator(config.GEMINI_API_KEY)
tts_openai = TextToSpeech(api_key=config.OPENAI_API_KEY)
tts_11labs = TextToSpeech(model = 'elevenlabs', api_key=config.OPENAI_API_KEY)

## Generate podcast from URL

This code demonstrates the process of generating a podcast from a single URL:
1. Extract content from the URL
2. Generate a Q&A transcript from the extracted content
3. Convert the transcript to speech using OpenAI's TTS model
4. Save the generated audio file to data/audio

In [6]:
url = "www.souzatharsis.com"
content = content_extractor.extract_content(url)
transcript = content_generator.generate_qa_content(content)
print("Generated transcript:")
print(transcript[:100])

audio_file = os.path.join(config.get('output_directories')['audio'], "single_url_audio.mp3")
tts_openai.convert_to_speech(transcript, audio_file)
print(f"Generated audio saved to: {audio_file}")

Generated transcript:
<Person1> "Welcome to Podcastfy - Your Personal GenAI Podcast! I'm really excited to be talking abou
[('"Welcome to Podcastfy - Your Personal GenAI Podcast! I\'m really excited to be talking about Tharsis Souza today, an incredible computer scientist making waves in the tech world! He\'s making a real impact."', '"Oh, I\'ve heard his name! Isn\'t he deeply involved with data-driven products? What\'s he up to these days?"'), ('"Totally! He\'s super passionate about that. Right now, he\'s leading the charge as the Senior Vice President of Product Management and Modeling Engineering at Two Sigma Investments. Plus, he\'s also a lecturer at the prestigious Columbia University. Talk about impressive!"', '"Wow! Columbia University and Two Sigma Investments, that\'s a pretty powerful combination. He must be teaching some pretty high-level stuff. What\'s his area of expertise at Columbia?"'), ('"Well, he\'s actually a faculty member in their Master of Science in Applied An

In [7]:
# Embed the audio file generated in the previous cell
embed_audio(audio_file)

Audio player embedded for: ./data/audio/single_url_audio.mp3


## Generate podcast from multiple sources

In [None]:
def generate_audio_from_multiple_sources(file_path):
	"""
	Generates audio from multiple sources using a file containing URLs.
	"""
	content_extractor = ContentExtractor(config.JINA_API_KEY)
	content_generator = ContentGenerator(config.GEMINI_API_KEY)
	tts = TextToSpeech(model="openai", api_key=config.OPENAI_API_KEY)

	with open(file_path, 'r') as file:
		urls = [line.strip() for line in file]

	contents = []
	for url in urls:
		if 'youtube.com' in url:
			content = content_extractor.extract_youtube_transcript(url)
		else:
			content = content_extractor.extract_content(url)
		contents.append(content)

	combined_content = "\n\n".join(contents)
	transcript = content_generator.generate_qa_content(combined_content)

	audio_file = os.path.join(config.get('output_directories')['audio'], "multi_source_audio.mp3")
	tts.convert_to_speech(transcript, audio_file)
	print(f"Generated audio saved to: {audio_file}")

def generate_audio_from_multiple_sources_with_pdf(file_path, pdf_path):
	"""
	Generates audio from multiple sources using a file containing URLs and a PDF.
	"""
	content_extractor = ContentExtractor(config.JINA_API_KEY)
	content_generator = ContentGenerator(config.GEMINI_API_KEY)
	tts = TextToSpeech(model="openai", api_key=config.OPENAI_API_KEY)

	with open(file_path, 'r') as file:
		urls = [line.strip() for line in file]

	contents = []
	for url in urls:
		if 'youtube.com' in url:
			content = content_extractor.extract_youtube_transcript(url)
		else:
			content = content_extractor.extract_content(url)
		contents.append(content)

	pdf_content = content_extractor.extract_pdf_content(pdf_path)
	contents.append(pdf_content)

	combined_content = "\n\n".join(contents)
	transcript = content_generator.generate_qa_content(combined_content)

	audio_file = os.path.join(config.get('output_directories')['audio'], "multi_source_with_pdf_audio.mp3")
	tts.convert_to_speech(transcript, audio_file)
	print(f"Generated audio saved to: {audio_file}")

def generate_transcript_only(urls):
	"""
	Generates only a transcript (without audio) from URLs.
	"""
	content_extractor = ContentExtractor(config.JINA_API_KEY)
	content_generator = ContentGenerator(config.GEMINI_API_KEY)

	contents = [content_extractor.extract_content(url) for url in urls]
	combined_content = "\n\n".join(contents)
	transcript = content_generator.generate_qa_content(combined_content)

	transcript_file = os.path.join(config.get('output_directories')['transcripts'], "transcript_only.txt")
	with open(transcript_file, 'w') as file:
		file.write(transcript)
	print(f"Generated transcript saved to: {transcript_file}")
	return transcript_file

def generate_audio_from_transcript(transcript_file, new_transcript=None):
	"""
	Generates audio from a transcript file.
	"""
	tts = TextToSpeech(model="openai", api_key=config.OPENAI_API_KEY)

	# Using existing transcript
	with open(transcript_file, 'r') as file:
		transcript = file.read()

	audio_file = os.path.join(config.get('output_directories')['audio'], "audio_from_existing_transcript.mp3")
	tts.convert_to_speech(transcript, audio_file)
	print(f"Generated audio from existing transcript saved to: {audio_file}")

	# Using new transcript
	if new_transcript:
		audio_file = os.path.join(config.get('output_directories')['audio'], "audio_from_new_transcript.mp3")
		tts.convert_to_speech(new_transcript, audio_file)
		print(f"Generated audio from new transcript saved to: {audio_file}")

def generate_audio_in_multiple_languages(transcript):
	"""
	Generates audio in multiple languages.
	"""
	tts = TextToSpeech(model="elevenlabs", api_key=config.ELEVENLABS_API_KEY)

	# French
	french_transcript = ContentGenerator(config.GEMINI_API_KEY).translate_content(transcript, "French")
	french_audio_file = os.path.join(config.get('output_directories')['audio'], "french_audio.mp3")
	tts.convert_to_speech(french_transcript, french_audio_file, voice="French")
	print(f"Generated French audio saved to: {french_audio_file}")

	# Portuguese
	portuguese_transcript = ContentGenerator(config.GEMINI_API_KEY).translate_content(transcript, "Portuguese")
	portuguese_audio_file = os.path.join(config.get('output_directories')['audio'], "portuguese_audio.mp3")
	tts.convert_to_speech(portuguese_transcript, portuguese_audio_file, voice="Portuguese")
	print(f"Generated Portuguese audio saved to: {portuguese_audio_file}")

# Example usage
if __name__ == "__main__":

	generate_audio_from_multiple_sources("path/to/urls.txt")

	generate_audio_from_multiple_sources_with_pdf("path/to/urls.txt", "path/to/document.pdf")

	transcript_file = generate_transcript_only(["https://example.com/article1", "https://example.com/article2"])

	generate_audio_from_transcript(transcript_file, "This is a new transcript for testing.")

	sample_transcript = "This is a sample transcript for multilingual audio generation."
	generate_audio_in_multiple_languages(sample_transcript)


