# YouTube Video URL Fetcher

This script demonstrates how to use the YouTube Data API to fetch video URLs from a specific YouTube channel. The script performs the following tasks:

1. Retrieves the channel ID for a given channel name.
2. Retrieves the playlist ID for the uploads playlist of the channel.
3. Fetches all video IDs from the playlist.
4. Converts video IDs into YouTube video URLs.
5. Saves the list of video URLs to a specified file.

Place your YouTube API key in a `.env` file as shown below:

```dotenv
YOUTUBE_API_KEY=your_youtube_api_key_here


In [None]:
import os
from dotenv import load_dotenv
from googleapiclient.discovery import build

# Load environment variables from a .env file
load_dotenv()  # Loads .env file to get API keys.

# API Key and channel name
api_key = os.getenv("YOUTUBE_API_KEY")  # Retrieve the YouTube API key from environment variables
channel_name = 'TelekomHilfe-Videos'  # The name of the YouTube channel to search for

def get_channel_id(api_key, channel_name):
    """
    Retrieves the channel ID for a given channel name using the YouTube Data API.
    
    Args:
        api_key (str): The API key for authenticating with the YouTube Data API.
        channel_name (str): The name of the YouTube channel to search for.
        
    Returns:
        str or None: The channel ID if found, otherwise None.
    """
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    # Search for the channel by name
    request = youtube.search().list(
        part='snippet',
        q=channel_name,
        type='channel',
        maxResults=1
    )
    response = request.execute()
    
    # Extract and return the channel ID if available
    if response['items']:
        return response['items'][0]['id']['channelId']
    else:
        return None

def get_playlist_id(api_key, channel_id):
    """
    Retrieves the playlist ID for the uploads playlist of a given channel ID.
    
    Args:
        api_key (str): The API key for authenticating with the YouTube Data API.
        channel_id (str): The ID of the YouTube channel.
        
    Returns:
        str or None: The playlist ID for uploads if found, otherwise None.
    """
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    # Get the channel details, including the uploads playlist ID
    request = youtube.channels().list(
        part='contentDetails',
        id=channel_id
    )
    response = request.execute()
    
    # Extract and return the playlist ID if available
    if response['items']:
        playlist_id = response['items'][0]['contentDetails']['relatedPlaylists']['uploads']
        return playlist_id
    else:
        return None

def get_video_ids_from_playlist(api_key, playlist_id):
    """
    Retrieves all video IDs from a given playlist.
    
    Args:
        api_key (str): The API key for authenticating with the YouTube Data API.
        playlist_id (str): The ID of the playlist to fetch video IDs from.
        
    Returns:
        list of str: A list of video IDs from the playlist.
    """
    youtube = build('youtube', 'v3', developerKey=api_key)
    video_ids = []
    
    # Request video IDs from the playlist
    request = youtube.playlistItems().list(
        part='contentDetails',
        playlistId=playlist_id,
        maxResults=50
    )
    
    while request:
        response = request.execute()
        # Append video IDs to the list
        video_ids.extend(item['contentDetails']['videoId'] for item in response.get('items', []))
        # Get the next page of results if available
        request = youtube.playlistItems().list_next(request, response)
    
    return video_ids

def get_video_urls(video_ids):
    """
    Converts a list of video IDs into YouTube video URLs.
    
    Args:
        video_ids (list of str): A list of video IDs to convert.
        
    Returns:
        list of str: A list of full YouTube video URLs.
    """
    return [f"https://www.youtube.com/watch?v={video_id}" for video_id in video_ids]

def save_urls_to_file(urls, file_path):
    """
    Saves a list of video URLs to a specified file.
    
    Args:
        urls (list of str): The list of URLs to save.
        file_path (str): The path to the file where URLs will be saved.
    """
    os.makedirs(os.path.dirname(file_path), exist_ok=True)  # Ensure the directory exists
    with open(file_path, 'w') as file:
        for url in urls:
            file.write(url + '\n')  # Write each URL on a new line

def main():
    """
    Main function to execute the process of fetching video URLs from a YouTube channel
    and saving them to a file.
    """
    # Retrieve the channel ID for the given channel name
    channel_id = get_channel_id(api_key, channel_name)
    if channel_id:
        # Retrieve the playlist ID for the uploads playlist of the channel
        playlist_id = get_playlist_id(api_key, channel_id)
        if playlist_id:
            # Get video IDs from the playlist
            video_ids = get_video_ids_from_playlist(api_key, playlist_id)
            
            # Convert video IDs to URLs
            video_urls = get_video_urls(video_ids)
            
            # Save the video URLs to a file
            output_file = 'data/video_urls.txt'
            save_urls_to_file(video_urls, output_file)
            print(f"Fetched {len(video_urls)} video URLs and saved to {output_file}")
        else:
            print("Playlist not found.")
    else:
        print("Channel not found.")

if __name__ == "__main__":
    main()


# Downloading and Processing Audio Files from YouTube

This notebook automates the process of extracting audio from YouTube videos and converting them to MP3 format, making it useful for collecting and processing audio content for various purposes.


### Libraries Used

- **`os`**: Provides a way to interact with the operating system, including file and directory operations.
- **`yt_dlp`**: A command-line program to download videos from YouTube and other video platforms, used here to extract audio from videos.
- **`re`**: Supports regular expression operations to manipulate and validate filenames.

### Code Workflow

1. **Read Video URLs**:
   - Video URLs are read from `data/video_urls.txt`, with each URL stripped of extra whitespace.

2. **Configure `yt-dlp`**:
   - Set up options for `yt-dlp` to download the best available audio format and convert it to MP3 format.
   - The output files are saved in the directory `data/mp3_files/`.

3. **Download and Process Audio**:
   - For each URL, the script extracts video information and downloads the audio.
   - The audio file is then saved as an MP3 file using a sanitized filename.
   - If an error occurs during download, it is caught and reported.
   - After downloading, the script verifies the presence of the MP3 file and prints a confirmation message.

4. **Completion**:
   - A message is printed to indicate that all processes have been completed.

### Error Handling

- Errors during the download process are caught and printed, allowing the script to continue processing other URLs even if one fails.
- The existence of the MP3 file is checked to confirm successful download.

In [None]:
import os
import yt_dlp
import re

# Function to create a safe filename by removing or replacing invalid characters
def safe_filename(filename):
    # Remove invalid characters for filenames
    filename = re.sub(r'[\\/*?:"<>|]', "", filename)
    # Replace spaces with underscores
    filename = re.sub(r'\s+', '_', filename)
    # Limit the filename length to 255 characters
    filename = filename[:255]
    return filename.lower()

# Read video URLs from 'data/video_urls.txt'
input_file_path = "data/video_urls.txt"
with open(input_file_path, "r") as file:
    video_urls = [line.strip() for line in file.readlines()]

# yt-dlp options for downloading and processing audio files
ydl_opts = {
    # Download the best available audio format
    'format': 'bestaudio/best',
    # Post-process the downloaded file to extract audio and convert to MP3
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'mp3',
        'preferredquality': '192',
    }],
    # Output template for the downloaded files
    'outtmpl': 'data/mp3_files/%(title)s.%(ext)s',
    # Suppress verbose output
    'quiet': True,
}

# Loop through each video URL to download and save audio files
for video_url in video_urls:
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        try:
            # Extract information and download the video
            info_dict = ydl.extract_info(video_url, download=True)
            # Get the title of the video
            title = info_dict.get('title', None)
            # Create a safe filename from the video title
            safe_title = safe_filename(title)
            # Define the path for the MP3 file
            mp3_filename = f"data/mp3_files/{safe_title}.mp3"
        except Exception as e:
            # Print an error message if there is an issue with downloading
            print(f"Download error: {e}")
            continue

        # Check if the MP3 file has been successfully created
        if not os.path.exists(mp3_filename):
            print(f"File not found: {mp3_filename}")
            continue

        # Print a success message with the path of the downloaded MP3 file
        print(f"MP3 file successfully downloaded: {mp3_filename}")

# Indicate that all processes have completed
print("All operations completed.")


# MP3 Files Transcription

It is designed to handle the transcription of MP3 audio files using the Whisper model. The notebook performs the following steps:

1. **Conversion of MP3 Files to WAV Format**:
   - MP3 files are converted to WAV format as Whisper works more efficiently with WAV files for transcription.

2. **Transcription of WAV Files**:
   - The converted WAV files are processed using the Whisper model to generate transcriptions.

3. **Saving Transcriptions**:
   - The transcriptions are saved as text files in a specified directory.

In [None]:
# transcribe_mp3_files.ipynb

import os
import whisper
from pydub import AudioSegment

# Define paths for input and output folders
mp3_folder = 'data/mp3_files'  # Folder containing the MP3 files to be processed
wav_folder = 'data/wav_files'  # Folder where converted WAV files will be saved
transcript_folder = 'data/transcripts'  # Folder where transcriptions will be saved

# Create necessary folders if they do not exist
os.makedirs(wav_folder, exist_ok=True)  # Create the WAV files folder if it does not exist
os.makedirs(transcript_folder, exist_ok=True)  # Create the transcripts folder if it does not exist

# Load the Whisper model for transcription
model = whisper.load_model("base")

# Function to convert MP3 file to WAV format
def convert_mp3_to_wav(mp3_path, wav_path):
    """
    Convert an MP3 file to WAV format using the AudioSegment module.
    
    Parameters:
    mp3_path (str): Path to the input MP3 file.
    wav_path (str): Path where the output WAV file will be saved.
    """
    audio = AudioSegment.from_mp3(mp3_path)  # Load MP3 file
    audio.export(wav_path, format='wav')  # Export as WAV file

# Function to transcribe audio file in WAV format
def transcribe_audio(wav_path):
    """
    Transcribe the audio from a WAV file using the Whisper model.
    
    Parameters:
    wav_path (str): Path to the WAV file to be transcribed.
    
    Returns:
    str: Transcribed text from the audio file.
    """
    result = model.transcribe(wav_path)  # Transcribe the WAV file
    return result['text']  # Return the transcribed text

# Process each MP3 file in the specified folder
for mp3_file in os.listdir(mp3_folder):
    if mp3_file.endswith('.mp3'):  # Check if the file has an MP3 extension
        mp3_path = os.path.join(mp3_folder, mp3_file)  # Full path to the MP3 file
        wav_file = mp3_file.replace('.mp3', '.wav')  # Generate the WAV filename
        wav_path = os.path.join(wav_folder, wav_file)  # Full path to the WAV file
        transcript_file = mp3_file.replace('.mp3', '.txt')  # Generate the transcript filename
        transcript_path = os.path.join(transcript_folder, transcript_file)  # Full path to the transcript file
        
        # Convert MP3 file to WAV format
        convert_mp3_to_wav(mp3_path, wav_path)
        
        # Transcribe the WAV file
        transcript = transcribe_audio(wav_path)
        
        # Save the transcription to a text file
        with open(transcript_path, 'w') as f:
            f.write(transcript)
        
        # Print a message indicating that the transcription has been saved
        print(f"Transcription saved for: {mp3_file}")
