In [1]:
import os
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter
from fpdf import FPDF
from time import time
from pytube import YouTube, Playlist
from concurrent.futures import ThreadPoolExecutor, as_completed

In [2]:
video_id = "rZ1vPXK2bTA"

def download_transcript(title, vid):
    try:
        transcript = YouTubeTranscriptApi.get_transcript(vid)
        formatter = TextFormatter()
        text_formatted = formatter.format_transcript(transcript)
        
        pdf = FPDF()
        pdf.add_page()
        pdf.set_font('Arial', size=8)
        pdf.write(5, text_formatted)
        pdf.output(title + ".pdf")
    except Exception as e:
        print(e)

In [3]:
video_list = [
		{'title': 'Communications', 'vid': 'MfqPp37KGBQ&t=283s'},
    ]


In [7]:
for item in video_list:
    print(item['title'], " : ", item['vid'])
    download_transcript(item['title'], item['vid'])

Communications  :  MfqPp37KGBQ&t=283s


In [11]:
prompt = """Please summarize the main topics and key points discussed in the following communication video. 
Identify any relevant subtopics or supporting arguments. Extract actionable learning points and provide practical tips for applying these concepts in communication.

<Text>
"""

In [13]:
# playlist_link = "https://www.youtube.com/playlist?list=PL6vMAFPIKMUgV3udSPPsX8DNiEojRUbZK" # Advanced English
playlist_link = "https://www.youtube.com/watch?list=PLgQfCCkY17g3H02NsG8Zpan6vVorDqdKH" # Professional English
sub_dir = "Jefferson_Fisher"

if not os.path.exists(sub_dir):
    os.makedirs(sub_dir)

video_links = Playlist(playlist_link).video_urls

start = time()

def get_video_title(link):
    title = YouTube(link).title
    download_transcript(title, link)
    return [title, link]

def download_transcript(title, url):
    try:
        vid = url.split("watch?v=")[1]
        transcript = YouTubeTranscriptApi.get_transcript(vid)
        formatter = TextFormatter()
        text_formatted = formatter.format_transcript(transcript)
        
        file_name = os.path.join(sub_dir, title + ".txt")
        with open(file_name, "w") as f:
            f.write(prompt + text_formatted + "\n</Text>")

        # pdf = FPDF()
        # pdf.add_page()
        # pdf.set_font('Arial', size=8)
        # pdf.write(5, text_formatted)
        # title = title.replace("?", "")
        # title = title.replace("|", "")
        # file_name = os.path.join(sub_dir, title + ".pdf")
        # pdf.output(file_name)
    except Exception as e:
        print(e)

processes = []
with ThreadPoolExecutor(max_workers=10) as executor:
    for url in video_links:
        processes.append(executor.submit(get_video_title, url))

video_titles = []
for task in as_completed(processes):
    video_titles.append(task.result())
    print(task.result())


print(f'Time taken: {time() - start}')

['Jefferson Fisher Compilation Part 30', 'https://www.youtube.com/watch?v=6_Z4LAVnYmw']
['Jefferson Fisher Compilation Part 33', 'https://www.youtube.com/watch?v=X7cH0gUg7fc']
['Jefferson Fisher Compilation Part 17', 'https://www.youtube.com/watch?v=Lz3DA5xcPKM']
['Jefferson Fisher Compilation Part 8', 'https://www.youtube.com/watch?v=wHTzIHd6rsM']
['Jefferson Fisher Compilation Part 5', 'https://www.youtube.com/watch?v=TcdEXLc_NdU']
['Jefferson Fisher Compilation Part 1', 'https://www.youtube.com/watch?v=B6c8ZbVMQJo']
['Jefferson Fisher Compilation Part 34', 'https://www.youtube.com/watch?v=IOCvuch68uA']
['Jefferson Fisher Compilation Part 15', 'https://www.youtube.com/watch?v=7l-gUal9FWk']
['Jefferson Fisher Compilation Part 7', 'https://www.youtube.com/watch?v=81NK7a0kYCs']
['Jefferson Fisher Compilation Part 12', 'https://www.youtube.com/watch?v=KJKVl31uIao']
['Jefferson Fisher Compilation Part 25', 'https://www.youtube.com/watch?v=OsovWJGePes']
['Jefferson Fisher Compilation Part 