# Get youtube transcripts

- install the [youtube-transcript-api](https://pypi.org/project/youtube-transcript-api/)

In [1]:
from urllib.parse import parse_qs, urlparse

import requests
from bs4 import BeautifulSoup
from youtube_transcript_api import YouTubeTranscriptApi
from youtube_transcript_api.formatters import TextFormatter

In [2]:
def get_title(url):
    resp = requests.get(url)
    resp.raise_for_status()
    s = BeautifulSoup(resp.text, "html.parser")
    return s.find("title").text.replace("\n", "").replace("- YouTube", "").rstrip()


def get_video_id(url):
    parsed_url = urlparse(url)
    return parse_qs(urlparse(url).query).get("v")[0]


def get_transcript(url):
    formatter = TextFormatter()
    video_id = get_video_id(url)
    transcript_list = YouTubeTranscriptApi.list_transcripts(video_id)
    transcript = transcript_list.find_transcript(["en", "en-GB"]).fetch()
    return formatter.format_transcript(transcript)


def append_transcript(url):
    title = get_title(url)
    video_id = get_video_id(url)
    transcript = get_transcript(url)

    with open(f"transcripts/{video_id}.md", "w+") as file:
        file.write(title + "\n---\n")
        file.write(f"<{url}>")
        file.write("\n\n")
        file.write(transcript)

    with open("README.md", "a") as f:
        f.write(f"- [{title}][./transcripts/{video_id}.md]\n")

## actually run it

In [3]:
urls = [
    "https://www.youtube.com/watch?v=aChUZZOp654",
    "https://www.youtube.com/watch?v=1gm6UfD9qSc",
    "https://www.youtube.com/watch?v=nUb5O7lQKbE",
    "https://www.youtube.com/watch?v=yZkIoU4OO5A",
    "https://www.youtube.com/watch?v=1poWV0OLSMA",
    "https://www.youtube.com/watch?v=kZV-tnx6BN8",
    "https://www.youtube.com/watch?v=tU1y7hBSgiY",
    "https://www.youtube.com/watch?v=TvmYngoL8RQ",
    "https://www.youtube.com/watch?v=45Ja8pJU73s",
    "https://www.youtube.com/watch?v=nYFb43tEylk&t=424s",
    "https://www.youtube.com/watch?v=ipB6P1uzNYM",
    "https://www.youtube.com/watch?v=iI09bbH22vQ",
    "https://www.youtube.com/watch?v=fEXLftYYV8o",
    "https://www.youtube.com/watch?v=FzOY2tHyZ4w&t=531s",
    "https://www.youtube.com/watch?v=cSEgP4VNynQ",
    "https://www.youtube.com/watch?v=kEZZCQTSSAg",
    "https://www.youtube.com/watch?v=SIzFhnZ32Y0&t=1246s",
    "https://www.youtube.com/watch?v=lFwJF-_SUr0",
    "https://www.youtube.com/watch?v=jtCbIbi2dHI",
    "https://www.youtube.com/watch?v=11ZSXVZbQbA",
    "https://www.youtube.com/watch?v=v4QyfElo1fs",
    "https://www.youtube.com/watch?v=1eK0eidOA_U",
    "https://www.youtube.com/watch?v=aQOKa61YBYc",
    "https://www.youtube.com/watch?v=BoYBLn9hRqs&t=911s",
    "https://www.youtube.com/watch?v=ckBWoRHkUZ4",
    "https://www.youtube.com/watch?v=aTFsBqhpLes",
    "https://www.youtube.com/watch?v=Vyhn49uXyHk&t=330s",
    "https://www.youtube.com/watch?v=avM-XsaTBIc",
    "https://www.youtube.com/watch?v=F4wrUP4c5P4",
    "https://www.youtube.com/watch?v=j5rygXblZJU&t=302s",
    "https://www.youtube.com/watch?v=dQiLVamJRdk",
    "https://www.youtube.com/watch?v=er2voEn8ZDU&t=401s",
    "https://www.youtube.com/watch?v=QE4NT8MY-Zo",
    "https://www.youtube.com/watch?v=bVqGgvMnp4M",
    "https://www.youtube.com/watch?v=QAzE-_ocf1U",
    "https://www.youtube.com/watch?v=D8zqp3CgcbA",
    "https://www.youtube.com/watch?v=WeA-IqDEfMw",
    "https://www.youtube.com/watch?v=-3mB4MBITEI",
    "https://www.youtube.com/watch?v=ROBoq7kYuNE",
    "https://www.youtube.com/watch?v=CbHkdAv0XWA",
    "https://www.youtube.com/watch?v=MHZdX7WtdWc",
    "https://www.youtube.com/watch?v=6LGeBcUZ9YA",
    "https://www.youtube.com/watch?v=DLv2Fzhktb0&t=542s",
    "https://www.youtube.com/watch?v=_qiwccma8Hs",
    "https://www.youtube.com/watch?v=y1LUkfpTtu0",
    "https://www.youtube.com/watch?v=po3oGIicu-8&t=745s",
    "https://www.youtube.com/watch?v=7QYlExoEpvA",
    "https://www.youtube.com/watch?v=iGL7LtgC_0I",
    "https://www.youtube.com/watch?v=lrRdRxAqMHc&t=250s",
    "https://www.youtube.com/watch?v=O9YnLFrM7Fs&t=804s",
    "https://www.youtube.com/watch?v=e0kQ5WqjcU8&t=1490s",
    "https://www.youtube.com/watch?v=bgjvLQu5NlE&t=23s",
    "https://www.youtube.com/watch?v=53lTlpjM2sQ&t=63s",
    "https://www.youtube.com/watch?v=3X84AYoNEQw",
    "https://www.youtube.com/watch?v=WkYqHWThIpA&t=459s",
    "https://www.youtube.com/watch?v=3oFV88PzEFE",
    "https://www.youtube.com/watch?v=_Py8JOi3REg&t=677s",
    "https://www.youtube.com/watch?v=8jAMOAmYCJI",
    "https://www.youtube.com/watch?v=6Kv1e00rCbc",
    "https://www.youtube.com/watch?v=xb3IxAr4RCo",
    "https://www.youtube.com/watch?v=bQ-ta6s931c&t=133s",
    "https://www.youtube.com/watch?v=GKA2COJRt6M",
]
for url in urls:
    print(url)
    append_transcript(url)

https://www.youtube.com/watch?v=aChUZZOp654
https://www.youtube.com/watch?v=1gm6UfD9qSc
https://www.youtube.com/watch?v=nUb5O7lQKbE
https://www.youtube.com/watch?v=yZkIoU4OO5A
https://www.youtube.com/watch?v=1poWV0OLSMA
https://www.youtube.com/watch?v=kZV-tnx6BN8
https://www.youtube.com/watch?v=tU1y7hBSgiY
https://www.youtube.com/watch?v=TvmYngoL8RQ
https://www.youtube.com/watch?v=45Ja8pJU73s
https://www.youtube.com/watch?v=nYFb43tEylk&t=424s
https://www.youtube.com/watch?v=ipB6P1uzNYM
https://www.youtube.com/watch?v=iI09bbH22vQ
https://www.youtube.com/watch?v=fEXLftYYV8o
https://www.youtube.com/watch?v=FzOY2tHyZ4w&t=531s
https://www.youtube.com/watch?v=cSEgP4VNynQ
https://www.youtube.com/watch?v=kEZZCQTSSAg
https://www.youtube.com/watch?v=SIzFhnZ32Y0&t=1246s
https://www.youtube.com/watch?v=lFwJF-_SUr0
https://www.youtube.com/watch?v=jtCbIbi2dHI
https://www.youtube.com/watch?v=11ZSXVZbQbA
https://www.youtube.com/watch?v=v4QyfElo1fs
https://www.youtube.com/watch?v=1eK0eidOA_U
https://ww