In [15]:
from youtube_transcript_api import YouTubeTranscriptApi
from pytube import Playlist, YouTube
import json
import re

def sanitize_filename(filename):
    """Sanitize the filename to avoid issues with file systems."""
    return re.sub(r'[\\/*?:"<>|]', "", filename)

def fetch_transcripts(playlist_url):
    playlist = Playlist(playlist_url)
    all_transcripts = []

    for video_url in playlist.video_urls:
        yt = YouTube(video_url)  # Use pytube's YouTube object to access video details
        video_title = sanitize_filename(yt.title)  # Get the sanitized video title
        video_id = yt.video_id  # Get the video ID

        try:
            transcript = YouTubeTranscriptApi.get_transcript(video_id)
            all_transcripts.append((video_title, transcript))
            print(f"Transcript fetched for video: {video_title}")
        except Exception as e:
            print(f"Could not fetch transcript for video {video_title} ({video_id}): {e}")

    return all_transcripts

def save_transcripts_to_json(transcripts, filename):
    with open(filename, "w", encoding="utf-8") as file:
        json.dump([{title: transcript} for title, transcript in transcripts], file, ensure_ascii=False, indent=4)
    print(f"Transcripts saved to {filename}")

def save_transcripts_to_txt(transcripts, filename):
    with open(filename, "w", encoding="utf-8") as file:
        for title, transcript in transcripts:
            file.write(f"*{title}*\n\n")
            for entry in transcript:
                file.write(f"{entry['text']}\n")
            file.write("\n\n")
    print(f"Text transcripts saved to {filename}")


playlist_urls = [
    'https://www.youtube.com/watch?v=Ab-1wMFj3DA&list=PLMcG1Hs2JbcsGGJ84BtG2fClp7SF7K9jU',
    'https://www.youtube.com/watch?v=xoJ6vmK9m3Q&list=PLMcG1Hs2JbcsyDndXARl6TVtBRCal0VHD',
]

for url in playlist_urls:
    transcripts = fetch_transcripts(url)
    if transcripts:
        first_video_title = transcripts[0][0]  # Get the title of the first video
        json_filename = f"transcript_{sanitize_filename(first_video_title)}.json"
        txt_filename = f"transcript_{sanitize_filename(first_video_title)}.txt"
        
        save_transcripts_to_json(transcripts, json_filename)
        save_transcripts_to_txt(transcripts, txt_filename)
    else:
        print(f"No transcripts fetched for playlist: {url}")

Transcript fetched for video: PI System Basics - Course Introduction
Transcript fetched for video: PI System Basics - What can the PI System do
Transcript fetched for video: PI System Basics - PI System Components
Transcript fetched for video: PI System Basics - Building Blocks of the PI System
Transcript fetched for video: PI System Basics - PI Time
Transcript fetched for video: PI System Basics - Building Blocks in Action
Transcript fetched for video: PI System Basics - Learning Platform Overview
Transcript fetched for video: PI System Basics - YouTube Channel Overview
Transcript fetched for video: PI System Basics - myOSIsoft and Customer Portal Overview
Transcript fetched for video: PI System Basics - PI Square and feedback.osisoft.com
Transcript fetched for video: OSIsoft PI Basics- Map of the PI System
Transcript fetched for video: OSIsoft PI Basics- What are PI Assets, PI Attributes and PI Tags
Transcript fetched for video: OSIsoft PI Basics- Time in the PI System
Transcript fet