In [1]:
# Import API keys
from dotenv import load_dotenv
import os

load_dotenv()  # loads .env from the current working directory

True

In [2]:
from __future__ import annotations

import json
import time
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Dict, Iterable, List, Optional, Tuple

from yt_dlp import YoutubeDL
from yt_dlp.utils import DownloadError
from youtube_transcript_api import YouTubeTranscriptApi

from datetime import datetime, timezone

# Keep exception imports robust across youtube-transcript-api versions
try:
    from youtube_transcript_api._errors import (  # type: ignore
        NoTranscriptFound,
        TranscriptsDisabled,
        VideoUnavailable,
        RequestBlocked,
        IpBlocked,
        TooManyRequests,
    )
except Exception:  # pragma: no cover
    NoTranscriptFound = TranscriptsDisabled = VideoUnavailable = Exception  # type: ignore
    RequestBlocked = IpBlocked = TooManyRequests = Exception  # type: ignore

In [3]:
def parse_yyyymmdd(s: str) -> datetime:
    # s like "20240115"
    return datetime(int(s[0:4]), int(s[4:6]), int(s[6:8]), tzinfo=timezone.utc)

def upload_dt_from_meta(video_meta: Dict[str, Any]) -> Optional[datetime]:
    ud = video_meta.get("upload_date")
    if isinstance(ud, str) and len(ud) == 8 and ud.isdigit():
        return parse_yyyymmdd(ud)

    ts = video_meta.get("timestamp")
    if isinstance(ts, (int, float)):
        return datetime.fromtimestamp(ts, tz=timezone.utc)

    return None

def ydl_extract_video_date(video_url: str) -> Optional[datetime]:
    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "no_warnings": True,
    }
    try:
        with YoutubeDL(ydl_opts) as ydl:
            info = ydl.extract_info(video_url, download=False) or {}
    except DownloadError:
        # private / needs auth / unavailable / etc.
        return None

    ud = info.get("upload_date")
    if isinstance(ud, str) and len(ud) == 8 and ud.isdigit():
        return parse_yyyymmdd(ud)

    ts = info.get("timestamp")
    if isinstance(ts, (int, float)):
        return datetime.fromtimestamp(ts, tz=timezone.utc)

    return None

In [4]:
def is_playlist_url(url: str) -> bool:
    # Works for both /playlist?list=... and watch?v=...&list=...
    return ("list=" in url) or ("/playlist" in url)

def iter_playlists_from_source(source_url: str) -> Iterable[Dict[str, Any]]:
    """
    If source_url is already a playlist URL, treat it as a single playlist.
    Otherwise assume it's a channel /podcasts tab and enumerate all podcast playlists.
    """
    if is_playlist_url(source_url):
        info = ydl_extract_flat(source_url)
        yield {
            "playlist_title": info.get("title") or "playlist",
            "playlist_url": source_url,
            "playlist_id": info.get("id"),
        }
    else:
        yield from iter_podcast_playlists(source_url)

In [5]:
def ydl_extract_flat(url: str) -> Dict[str, Any]:
    """
    Use yt-dlp in "flat" mode to quickly list entries (podcast playlists/videos)
    without downloading any media.
    """
    ydl_opts = {
        "quiet": True,
        "skip_download": True,
        "extract_flat": True,
        "ignoreerrors": True,
        "extractor_args": {"youtube": {"playlist_ajax": "true"}, 
                           "youtubetab": {"approximate_date": "true"}},
    }
    with YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(url, download=False)
        if info is None:
            raise RuntimeError(f"yt-dlp returned no info for {url}")
        return info


def normalize_url(u: str) -> str:
    if u.startswith("http"):
        return u
    if u.startswith("/"):
        return "https://www.youtube.com" + u
    return "https://www.youtube.com/" + u


def playlist_url_from_entry(entry: Dict[str, Any]) -> Optional[str]:
    """
    Try to get a playlist URL from a yt-dlp entry (varies by extractor/version).
    """
    for key in ("url", "webpage_url"):
        v = entry.get(key)
        if isinstance(v, str) and v:
            v = normalize_url(v)
            if "list=" in v or "/playlist" in v:
                return v

    pid = entry.get("id")
    if isinstance(pid, str) and pid:
        return f"https://www.youtube.com/playlist?list={pid}"

    return None


def iter_podcast_playlists(podcasts_tab_url: str) -> Iterable[Dict[str, Any]]:
    """
    On /podcasts, yt-dlp typically returns entries that correspond to podcast playlists.
    """
    info = ydl_extract_flat(podcasts_tab_url)
    for e in info.get("entries") or []:
        if not e:
            continue
        pl_url = playlist_url_from_entry(e)
        if not pl_url:
            continue
        yield {
            "playlist_title": e.get("title") or "podcast",
            "playlist_url": pl_url,
            "playlist_id": e.get("id"),
        }


def iter_videos_from_playlist(playlist_url: str) -> Iterable[Dict[str, Any]]:
    """
    Returns video entries with at least id + title.
    """
    info = ydl_extract_flat(playlist_url)
    for e in info.get("entries") or []:
        if not e:
            continue
        vid = e.get("id")
        if not isinstance(vid, str) or not vid:
            continue
        yield {
            "video_id": vid,
            "video_title": e.get("title") or "",
            "video_url": f"https://www.youtube.com/watch?v={vid}",
            "upload_date": e.get("upload_date"),  # YYYYMMDD sometimes
            "timestamp": e.get("timestamp"),      # unix seconds sometimes
        }


def load_processed_video_ids(manifest_path: Path) -> set[str]:
    if not manifest_path.exists():
        return set()
    ids: set[str] = set()
    with manifest_path.open("r", encoding="utf-8") as f:
        for line in f:
            try:
                obj = json.loads(line)
                if obj.get("status") == "ok":
                    ids.add(obj["video_id"])
            except Exception:
                continue
    return ids


def append_manifest(manifest_path: Path, record: Dict[str, Any]) -> None:
    manifest_path.parent.mkdir(parents=True, exist_ok=True)
    with manifest_path.open("a", encoding="utf-8") as f:
        f.write(json.dumps(record, ensure_ascii=False) + "\n")


def transcript_to_text(segments: List[Dict[str, Any]]) -> str:
    """
    youtube-transcript-api segments look like:
    {"text": "...", "start": 12.34, "duration": 3.21}
    """
    lines = [(s.get("text") or "").replace("\n", " ").strip() for s in segments]
    lines = [ln for ln in lines if ln]
    return "\n".join(lines) + "\n"


def fetch_and_save_transcript(
    out_dir: Path,
    channel: str,
    playlist_meta: Dict[str, Any],
    video_meta: Dict[str, Any],
    ytt_api: YouTubeTranscriptApi,
    languages: List[str],
    sleep_s: float = 0.25,
) -> None:
    """
    Saves ONLY:
      - JSON: metadata + transcript segments
      - TXT: plain text transcript
    Also logs to manifest.jsonl so reruns skip already processed videos.
    """
    manifest_path = out_dir / "manifest.jsonl"
    json_dir = out_dir / "json" / channel
    txt_dir = out_dir / "txt" / channel

    video_id = video_meta["video_id"]
    now = datetime.now(timezone.utc).isoformat()

    try:
        fetched = ytt_api.fetch(video_id, languages=languages)
        segments = fetched.to_raw_data()
        #segments = ytt_api.get_transcript(video_id, languages=languages)

        payload = {
            "source": "youtube",
            "channel": channel,
            "playlist_title": playlist_meta.get("playlist_title"),
            "playlist_url": playlist_meta.get("playlist_url"),
            "playlist_id": playlist_meta.get("playlist_id"),
            "video_id": video_id,
            "video_title": video_meta.get("video_title"),
            "video_url": video_meta.get("video_url"),
            "fetched_at": now,
            "transcript": segments,  # list[{"text","start","duration"}]
        }

        json_dir.mkdir(parents=True, exist_ok=True)
        txt_dir.mkdir(parents=True, exist_ok=True)

        (json_dir / f"{video_id}.json").write_text(
            json.dumps(payload, ensure_ascii=False, indent=2),
            encoding="utf-8",
        )
        (txt_dir / f"{video_id}.txt").write_text(
            transcript_to_text(segments),
            encoding="utf-8",
        )

        append_manifest(
            manifest_path,
            {
                "video_id": video_id,
                "channel": channel,
                "playlist_url": playlist_meta.get("playlist_url"),
                "status": "ok",
                "fetched_at": now,
            },
        )

    except (TranscriptsDisabled, NoTranscriptFound, VideoUnavailable) as e:
        append_manifest(
            manifest_path,
            {
                "video_id": video_id,
                "channel": channel,
                "playlist_url": playlist_meta.get("playlist_url"),
                "status": "no_transcript",
                "error": str(e),
                "fetched_at": now,
            },
        )
    except (RequestBlocked, IpBlocked, TooManyRequests) as e:
        append_manifest(
            manifest_path,
            {
                "video_id": video_id,
                "channel": channel,
                "playlist_url": playlist_meta.get("playlist_url"),
                "status": "blocked",
                "error": str(e),
                "fetched_at": now,
            },
        )
        raise
    except Exception as e:
        append_manifest(
            manifest_path,
            {
                "video_id": video_id,
                "channel": channel,
                "playlist_url": playlist_meta.get("playlist_url"),
                "status": "error",
                "error": repr(e),
                "fetched_at": now,
            },
        )
    finally:
        time.sleep(sleep_s)

In [6]:
def video_id_from_input(s: str) -> Optional[str]:
    """
    Accepts:
      - raw video id: "l0qFvnlCHI0"
      - watch url: "https://www.youtube.com/watch?v=l0qFvnlCHI0"
      - youtu.be short url: "https://youtu.be/l0qFvnlCHI0"
    Returns video_id or None.
    """
    s = s.strip()
    if not s:
        return None

    # raw ID (YouTube IDs are typically 11 chars; be permissive but safe-ish)
    if "http" not in s and "/" not in s and "?" not in s and "&" not in s:
        return s

    # try to parse URLs without adding new deps
    if "watch?v=" in s:
        vid = s.split("watch?v=", 1)[1].split("&", 1)[0]
        return vid or None

    if "youtu.be/" in s:
        vid = s.split("youtu.be/", 1)[1].split("?", 1)[0].split("&", 1)[0].split("/", 1)[0]
        return vid or None

    return None


def iter_videos_from_list(inputs: Iterable[str]) -> Iterable[Dict[str, Any]]:
    """
    Produces video_meta dicts compatible with fetch_and_save_transcript().
    Title/date are filled later (via yt-dlp fallback).
    """
    for s in inputs:
        vid = video_id_from_input(s)
        if not vid:
            continue
        yield {
            "video_id": vid,
            "video_title": "",  # will be populated via yt-dlp if desired
            "video_url": f"https://www.youtube.com/watch?v={vid}",
            "upload_date": None,
            "timestamp": None,
        }


In [7]:
def download_podcast_tab_transcripts(
    channels: List[Tuple[str, str]],
    out_dir: Path = Path("data/youtube"),
    languages: Optional[List[str]] = None,
    sleep_s: float = 0.25,
    min_upload_date: datetime = datetime(2024, 1, 1, tzinfo=timezone.utc),
    videos: Optional[List[str]] = None, 
) -> None:
    if languages is None:
        languages = ["en"]

    manifest_path = out_dir / "manifest.jsonl"
    processed = load_processed_video_ids(manifest_path)
    ytt_api = YouTubeTranscriptApi()

    # NEW: direct videos/urls mode (still uses manifest + date filter)
    if videos:
        channel = "direct_videos"
        pl = {
            "playlist_title": "direct_videos",
            "playlist_url": None,
            "playlist_id": None,
        }

        for v in iter_videos_from_list(videos):
            if v["video_id"] in processed:
                continue

            # date filter (same as playlist flow)
            upload_dt = upload_dt_from_meta(v)
            if upload_dt is None:
                upload_dt = ydl_extract_video_date(v["video_url"])  # also can provide title if you want, see note below

            if upload_dt is None or upload_dt < min_upload_date:
                continue

            # optional: fill in title for nicer filenames/logging
            # (minimal change: reuse yt-dlp extract again only if title missing)
            if not v.get("video_title"):
                try:
                    info = ydl_extract_flat(v["video_url"])
                    v["video_title"] = info.get("title") or ""
                except Exception:
                    pass

            print(f"    Fetching: {v.get('video_title','')} [{v['video_id']}] ({upload_dt.date()})")
            fetch_and_save_transcript(
                out_dir=out_dir,
                channel=channel,
                playlist_meta=pl,
                video_meta=v,
                ytt_api=ytt_api,
                languages=languages,
                sleep_s=sleep_s,
            )
            processed.add(v["video_id"])
        return


    for channel, podcasts_url in channels:
        print(f"\n== Channel: {channel} | {podcasts_url}")
        playlists = list(iter_playlists_from_source(podcasts_url))

        for pl in playlists:
            print(f"  - Playlist: {pl['playlist_title']}")

            for v in iter_videos_from_playlist(pl["playlist_url"]):
                if v["video_id"] in processed:
                    continue

                # NEW: date filter
                upload_dt = upload_dt_from_meta(v)
                if upload_dt is None:
                    upload_dt = ydl_extract_video_date(v["video_url"])  # fallback

                if upload_dt is None or upload_dt < min_upload_date:
                    # skip if unknown or too old
                    continue

                print(f"    Fetching: {v['video_title']} [{v['video_id']}] ({upload_dt.date()})")
                fetch_and_save_transcript(
                    out_dir=out_dir,
                    channel=channel,
                    playlist_meta=pl,
                    video_meta=v,
                    ytt_api=ytt_api,
                    languages=languages,
                    sleep_s=sleep_s,
                )
                processed.add(v["video_id"])

In [8]:
# From the Regualting AI podcast: 
# https://www.youtube.com/watch?v=ldBrCJENOJ8 

# ReRun
download_podcast_tab_transcripts(
    channels=[("Regulating_AI_Sanjay_Puri", "https://www.youtube.com/playlist?list=PLGBFGI3ApzqdS70H84yli1Q-j_cqqiJbL")],
    videos=[
        "https://www.youtube.com/watch?v=ldBrCJENOJ8", # EU vs US vs China 
        "https://www.youtube.com/watch?v=f62JIPXT8Cs", # EU AI Act
        "https://www.youtube.com/watch?v=NnRB_KEDjxs", # EU AI Act
        "https://www.youtube.com/watch?v=PnxM30ezvmQ", # AI innovation versus regulation
        "https://www.youtube.com/watch?v=mMK_YeRrYrI", # UNESCO’s AI Ethics Framework 
        "https://www.youtube.com/watch?v=glkmS74l_o0", # Francesca Rossi from IBM
        "https://www.youtube.com/watch?v=OxRk-YKbZyg", # Open-source AI
    ],
)



    Fetching: International AI Governance Explained: EU vs US vs China — The Future of AI Regulation [ldBrCJENOJ8] (2025-10-10)




    Fetching: Balancing AI Governance & Innovation: Lessons from the EU Ft. Lucilla Sioli | RegulatingAI Podcast [NnRB_KEDjxs] (2025-03-07)




    Fetching: Discussing rapid advancements of AI with Nick Begich | The RegulatingAI Podcast [PnxM30ezvmQ] (2025-04-16)




    Fetching: UNESCO’s AI Ethics Framework Ft. Prof. Emma Ruttkamp-Bloem | The RegulatingAI Podcast [mMK_YeRrYrI] (2025-03-07)




    Fetching: Exploring AI Ethics with Francesca Rossi: Insights from IBM's Global Leader | RegulatingAI Podcast [glkmS74l_o0] (2025-01-30)




    Fetching: Patrik Gayer on Open-Source AI and Global Policy | The RegulatingAI Podcast [OxRk-YKbZyg] (2025-01-22)


In [9]:
# rerun
# Pass podcast URLs as function inputs
channels = [
#    ("TWIML_AI_Sam_Charrington", "https://www.youtube.com/playlist?list=PLILZm3MRkvH83C46bZ4rPmB-jKWBltWkP"), # partially done
    ("Regulating_AI_Sanjay_Puri", "https://www.youtube.com/playlist?list=PLGBFGI3ApzqdS70H84yli1Q-j_cqqiJbL"),
#    ("AI_Governance_Luiza_Jarovsky", "https://www.youtube.com/@LuizaJarovsky/podcasts"), # done
#    ("CEPS_Think_Tank", "https://www.youtube.com/@CEPSThinkTank/podcasts"),
#    ("AI_Policy_CSIS", "https://www.youtube.com/@Legal4Tech/podcasts"), 
#    ("Legal4Tech", "https://www.youtube.com/@Legal4Tech/podcasts"),
#    ("AI_Policy_CSIS", "https://www.youtube.com/playlist?list=PLnArnDQHeUqeErR8mbkEGUqzGD2b5O3Cc"),
]
download_podcast_tab_transcripts(channels, out_dir=Path("data/youtube"), languages=["en"], sleep_s=0.5)


== Channel: Regulating_AI_Sanjay_Puri | https://www.youtube.com/playlist?list=PLGBFGI3ApzqdS70H84yli1Q-j_cqqiJbL
  - Playlist: RegulatingAI Podcast: Innovate Responsibly
    Fetching: AI Governance & Global Policy at ASEAN | Sanjay Puri in Conversation with Congressman Jay Obernolte [PYxvTZKBAgk] (2026-01-02)


ERROR: [youtube] Dyp6pOOUZbM: Video unavailable. This video has been removed by the uploader


    Fetching: AI in Everyday Life and Cognitive Assistance | Prof. Antonio Krüger | RegulatingAI Podcast [q9cYwaHg5_g] (2025-05-02)
    Fetching: RegulatingAI Podcast: How AI Impacts Civil Rights – A Conversation with Koustubh "K.J." Bagchi [VgMEU8x7sMw] (2025-05-02)
    Fetching: Empowering Africa Through AI: Dr. Shikoh Gitau's Vision for Equitable AI Development [CrTi5ZFrlXg] (2025-05-02)
    Fetching: Why the US Needs to Lead AI Innovation – Congressman Jake Auchincloss Speaks Out [77xencIffAg] (2025-05-02)
    Fetching: Congressman Gabe Amo on AI Policy and the Future of Responsible Regulation | RegulatingAI Podcast [AcMPFkOosKY] (2025-04-02)
    Fetching: Bipartisan AI Regulation and the Future of AI with Congressman Ted Lieu | Regulating AI Podcast [G8ja7UVN8AE] (2025-04-02)
    Fetching: The Intersection of AI, Regulation and Economic Growth with David Schweikert | Regulating AI Podcast [n9qMEKuf1DI] (2025-04-02)
    Fetching: The Future of Responsible AI with Dr. Richard Benjam

KeyboardInterrupt: 

In [None]:
# rerun
download_podcast_tab_transcripts(
    channels=[("Legal4Tech", "https://www.youtube.com/playlist?list=PLj1oI1joplmggONYVxg_pa3LHZa0y2B84")],
    videos=[
        "https://www.youtube.com/watch?v=6tqsftznuN0", # EU AI Act enforcement
        "https://www.youtube.com/watch?v=0nWoQTqS8xQ", # EU AI Act enforcement
    ],
)

In [None]:
#rerun
download_podcast_tab_transcripts(
    channels=[("CEPS_Think_Tank", "https://www.youtube.com/@CEPSThinkTank/podcasts")],
    videos=[
        "https://www.youtube.com/watch?v=yWyLDQN2Rfc",
        "https://www.youtube.com/watch?v=n7bDGczA5oE", 
        "https://www.youtube.com/watch?v=ky_ESWrvHwo", 
        "https://www.youtube.com/watch?v=StcBR-s8_E4", 
        "https://www.youtube.com/watch?v=IseK-FxpUOs", 
        "https://www.youtube.com/watch?v=zVa8HiLfmp8"
    ],
)

In [None]:
download_podcast_tab_transcripts(
    channels=[("TWIML_AI_Sam_Charrington", "https://www.youtube.com/watch?v=YgQxlKPeC-g&list=PLILZm3MRkvH83C46bZ4rPmB-jKWBltWkP")],
    videos=[
        "https://www.youtube.com/watch?v=l0qFvnlCHI0",
        "https://www.youtube.com/watch?v=fS34g5gdPsQ",
        "https://www.youtube.com/watch?v=nEbMiczJ_gc",
        "https://www.youtube.com/watch?v=HScABWB98Kw",
        #"https://www.youtube.com/watch?v=GmypOIq1LV8", #done
        "https://www.youtube.com/watch?v=zisE1p2plxk", 
    ],
)

In [None]:
download_podcast_tab_transcripts(
    channels=[("AI_Policy_CSIS", "https://www.youtube.com/playlist?list=PLnArnDQHeUqeErR8mbkEGUqzGD2b5O3Cc")],
    videos=[
        "https://www.youtube.com/watch?v=vTf4Skp2_bA", # AI regulation in China
        "https://www.youtube.com/watch?v=sC4_4twgWI4", # AI bubble concerns
        "https://www.youtube.com/watch?v=4DG-f-3b2rI", # EU AI Code of Practice
        "https://www.youtube.com/watch?v=DlJ1RRCRMds", # AI regulation in California 
        "https://www.youtube.com/watch?v=MAPqErwZzv0", # Trump against AI regulation
        "https://www.youtube.com/watch?v=uzSF29l5Lf0", # EU AI Act delays
        "https://www.youtube.com/watch?v=WkX_Xc60qYE", # future of American AI regulation
        "https://www.youtube.com/watch?v=frZcvn11uco", # AI's impact on the labor market
        "https://www.youtube.com/watch?v=GeJ0lWMkV1w", # Trump against AI regulation
        "https://www.youtube.com/watch?v=-pf5XlFYhrQ", # AI regulation in China
        "https://www.youtube.com/watch?v=P792YGrvTNs", # American and European AI policy
    ],
)

In [10]:
download_podcast_tab_transcripts(
    channels=[("Legal4Tech", "https://www.youtube.com/playlist?list=PLj1oI1joplmggONYVxg_pa3LHZa0y2B84")],
    videos=[
        "https://www.youtube.com/watch?v=4cw-fXPOedo", # Digital Omnibus
        "https://www.youtube.com/watch?v=1JiPzmZ97kU", # Digital Omnibus
        "https://www.youtube.com/watch?v=sdJDwiXqGAI", # GenAI and copyright laws
        "https://www.youtube.com/watch?v=0srLa3fC0Lw", # Copyright laws & AI bubble concerns
        "https://www.youtube.com/watch?v=8KQAeken94w", # Digital regulation & judicial enforcement
        "https://www.youtube.com/watch?v=ZOwk3wrzSHs", # from EU law to national law
        "https://www.youtube.com/watch?v=zjtoyn06zxE", # EU Data Act 
        "https://www.youtube.com/watch?v=Sfza4J-YjGA", # AI regulation in the UK
        "https://www.youtube.com/watch?v=PQQf8gpUa0c", # intellectual property
        "https://www.youtube.com/watch?v=1U9FaQBzwJA", # synthetic data
        "https://www.youtube.com/watch?v=vVAccNCrUNA", # fighting big tech
        "https://www.youtube.com/watch?v=BdCa0INMiTo", # risk management for LLMs
        "https://www.youtube.com/watch?v=KUNKsybJPhY", # brand visibility
        "https://www.youtube.com/watch?v=6tqsftznuN0", # EU AI Act enforcement
        "https://www.youtube.com/watch?v=0nWoQTqS8xQ", # EU AI Act enforcement
    ],
)

In [None]:
# rerun
# Not all podcast episodes are accessible on GitHub
download_podcast_tab_transcripts(
    channels=[("DataTalksClub", "https://www.youtube.com/playlist?list=PL3MmuxUbc_hK60wsCyvrEK2RjQsUi4Oa_")],
    videos=[
        "https://www.youtube.com/watch?v=LBuGzyOkx7c", # Explainability
        "https://www.youtube.com/watch?v=YncdlUscUOo", # Knowledge graphs & LLMs
        "https://www.youtube.com/watch?v=HwCR59VuYn4&t=7s", # Agentic AI
        "https://www.youtube.com/watch?v=1aMuynlLM3o", # AI infrastructure
        "https://www.youtube.com/watch?v=AlCFKbFIEM8", # Data engineering
        "https://www.youtube.com/watch?v=eC3RNuI6ow0", # LLM evals
    ],
)

In [None]:
# Process these seperately: "DataTalksClub_Alexey_Grigorev", "https://www.youtube.com/@DataTalksClub/podcasts" 

In [None]:
# Correctly processed TWIML AI podcast videos
# https://www.youtube.com/watch?v=<video_id>
#3Zu8Zsqw26s # done
#GmypOIq1LV8 # done Assessing the Risks of Open AI Models (Sayash Kapoor)

# To download: 
# AI Regulation and Automated Decisioning with Peter van der Putten: https://www.youtube.com/watch?v=l0qFvnlCHI0
# RAG Risks: Why Retrieval-Augmented LLMs Are Not Safer with Sebastian Gehrmann: https://www.youtube.com/watch?v=fS34g5gdPsQ
# Ensuring Privacy for Any LLM with Patricia Thaine: https://www.youtube.com/watch?v=nEbMiczJ_gc
# AI Agents: Substance or Snake Oil with Arvind Narayanan: https://www.youtube.com/watch?v=HScABWB98Kw
# Assessing the Risks of Open AI Models with Sayash Kapoor: https://www.youtube.com/watch?v=GmypOIq1LV8 # done
# The Decentralized Future of Private AI with Illia Polosukhin: https://www.youtube.com/watch?v=zisE1p2plxk

# Successfully processed videos from the The RegulatingAI Podcast
#iKg00zm99xk
#XNRvEwqrlfw
#m1Ipy31auzg
#dI8cF4Xy0dI
#2YvNfV5iZFo
#9X0u3jZJfX8
#cE2fOZx2wFY
#mQ1G3Kj5N8U
#zHq9Q2c8cF4
#rQm4QyJ6s7g
#xUe1Kk4n5jA
#4fX9m7PZq1s
#s0kKZk9f0Lw
#3V4sR8nF2zY
#0mC6Y7kqfRA
#ZK1Bv5xYpF4
#7rJ0qKX2mW8
#JXz4F8RZkqE
#6R3vFZK2H9q
#D2mF4Kp8q7E
#K9pZx5L4F2r
#M4xZp9q2L5E
#8FZK3m4q9xR
#5pFZ4L9Kx2E
#H4qZpF9m5xL
#m8ZKq5F9pL4
#Zp9Fq5K4xLm
#4FZqKp9m5xL
#9mZK4Fq5pLx
#q5ZK4F9mLpX
#L4ZK5F9pqmX
#pF9ZK4q5LmX
#9P3qC-21scQ
#CYR2Dwvqqfs
#57oNlwKiUVM

In [None]:
# I need the following from TWIML: 
# AI Regulation and Automated Decisioning with Peter van der Putten: https://www.youtube.com/watch?v=l0qFvnlCHI0
# RAG Risks: Why Retrieval-Augmented LLMs Are Not Safer with Sebastian Gehrmann: https://www.youtube.com/watch?v=fS34g5gdPsQ
# Ensuring Privacy for Any LLM with Patricia Thaine: https://www.youtube.com/watch?v=nEbMiczJ_gc
# AI Agents: Substance or Snake Oil with Arvind Narayanan: https://www.youtube.com/watch?v=HScABWB98Kw
# Assessing the Risks of Open AI Models with Sayash Kapoor: https://www.youtube.com/watch?v=GmypOIq1LV8 # done
# The Decentralized Future of Private AI with Illia Polosukhin: https://www.youtube.com/watch?v=zisE1p2plxk