Importing libraries

In [None]:
!pip install google-api-python-client youtube-transcript-api pandas tqdm


Collecting youtube-transcript-api
  Downloading youtube_transcript_api-0.6.3-py3-none-any.whl.metadata (17 kB)
Downloading youtube_transcript_api-0.6.3-py3-none-any.whl (622 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m622.3/622.3 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: youtube-transcript-api
Successfully installed youtube-transcript-api-0.6.3


Setting YouTube API Key

In [None]:
from googleapiclient.discovery import build
from youtube_transcript_api import YouTubeTranscriptApi
import pandas as pd
from tqdm import tqdm

# Set your YouTube API Key
API_KEY = "AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8"
YOUTUBE  = build("youtube", "v3", developerKey=API_KEY)


Getting Video_id's for each topic

In [None]:
def get_video_ids(topic, max_results=100):
    """Fetches video IDs for a given topic."""
    try:
        request = YOUTUBE.search().list(
            q=topic,
            part="id",
            type="video",
            maxResults=max_results
        )
        response = request.execute()
        video_ids = [item["id"]["videoId"] for item in response.get("items", [])]
        return video_ids
    except Exception as e:
        print(f"⚠️ Error fetching video IDs for {topic}: {e}")
        return []

In [None]:
def check_video_has_captions(video_id):
    """Checks if a YouTube video has captions enabled."""
    try:
        request = YOUTUBE.videos().list(
            part="contentDetails",
            id=video_id
        )
        response = request.execute()
        if "caption" in response["items"][0]["contentDetails"]:
            return response["items"][0]["contentDetails"]["caption"] == "true"
    except Exception as e:
        print(f"⚠️ Error checking captions for video {video_id}: {e}")
    return False

In [None]:
def get_video_ids_with_captions(topic, max_results=100):
    """Fetches video IDs and filters only those with captions enabled."""
    video_ids = get_video_ids(topic, max_results)
    valid_videos = [vid for vid in video_ids if check_video_has_captions(vid)]
    return valid_videos

Getting Transcript for each topic

In [None]:
def get_transcript(video_id):
    """Fetches the transcript of a YouTube video using YouTube Transcript API."""
    from youtube_transcript_api import YouTubeTranscriptApi
    try:
        transcript = YouTubeTranscriptApi.get_transcript(video_id)
        return " ".join([entry["text"] for entry in transcript])
    except Exception as e:
        print(f"⚠️ No transcript found for video {video_id}: {e}")
        return None

In [None]:
def fetch_transcripts(topics):
    """Fetches transcripts for videos related to given topics."""
    dataset = []
    print("\n📌 Fetching Data by Topic...\n")

    for topic in tqdm(topics, desc="Fetching Data by Topic"):
        print(f"\n🔍 Fetching videos for: {topic}")

        video_ids = get_video_ids_with_captions(topic, max_results=50)
        print(f"📌 Found {len(video_ids)} valid videos for {topic}")

        if not video_ids:
            print(f"⚠️ Skipping {topic} due to no valid videos.")
            continue

        for video_id in tqdm(video_ids, desc=f"Fetching Transcripts for {topic}"):
            transcript = get_transcript(video_id)
            if transcript:
                dataset.append({
                    "Topic": topic,
                    "Video_ID": video_id,
                    "Transcript": transcript
                })

        print(f"✅ Completed fetching for: {topic}\n")

    return dataset

  Importing Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install google-api-python-client




###**Fetching transcripts on the topics and storing them in a json format**

In [None]:
import json

topics = [
    # Science & Technology
    "Artificial Intelligence", "Machine Learning", "Deep Learning", "Computer Vision", "Natural Language Processing",
    "Quantum Computing", "Cybersecurity", "Cloud Computing", "Edge Computing", "Internet of Things (IoT)",
    "5G Technology", "Augmented Reality (AR)", "Virtual Reality (VR)", "Space Exploration", "Astronomy",
    "Robotics", "Blockchain Technology", "Biotechnology", "Genetics", "Bioinformatics",

    # Environment & Climate
    "Climate Change", "Sustainable Energy", "Wildlife Conservation", "Environmental Pollution",
    "Ocean Conservation", "Deforestation", "Carbon Footprint Reduction", "Renewable Energy", "Green Technology",

    # Health & Medicine
    "Health and Medicine", "Mental Health", "Nutrition and Diet", "Epidemiology",
    "Public Health & Healthcare Systems", "Alternative Medicine", "Neuroscience", "Biomedical Engineering",
    "Longevity & Aging", "Genetic Engineering", "Pharmaceutical Innovations",

    # Education & Career
    "Education", "Learning Strategies", "Career Development", "Soft Skills",
    "EdTech (Education Technology)", "Job Market Trends", "Online Learning Platforms", "Higher Education Trends",
    "Interview Preparation", "Remote Work Culture", "Freelancing & Gig Economy",

    # Finance & Business
    "Finance", "Stock Market", "Cryptocurrency", "Entrepreneurship",
    "E-Commerce & Digital Marketing", "Personal Finance", "Investment Strategies", "Real Estate Market",
    "Economic Trends", "Wealth Management", "Startups & Venture Capital",

    # Social & Cultural Topics
    "History", "Geopolitics", "Psychology", "Philosophy", "Sociology",
    "Cultural Anthropology", "Ethics & Morality", "Linguistics", "Political Science", "Human Rights",
    "Social Movements", "Gender Studies", "Artificial Intelligence in Society",

    # Entertainment & Media
    "Movies and TV Shows", "Gaming", "Music Industry", "Sports Analysis",
    "Influencer Culture & Social Media Trends", "Streaming Services", "Book Reviews & Literature",
    "Film Industry Trends", "Esports & Competitive Gaming",

    # Everyday Life
    "Self-Improvement", "Productivity Hacks", "Travel", "Food and Cooking",
    "Minimalism & Lifestyle Design", "Home Automation", "Parenting & Child Development", "Mental Resilience",
    "Hobby Development", "Personal Branding",

    # Miscellaneous & Emerging Trends
    "Future of Work", "Metaverse", "Transhumanism", "Sustainable Fashion",
    "Smart Cities", "AI Ethics & Regulation", "Space Colonization", "Data Privacy & Security"
]


# 🛠️ Ensure fetch_transcripts returns valid data
dataset = fetch_transcripts(topics)

# 🛠️ Debugging: Check if transcripts are being collected
print("\n🔎 Checking fetched data sample:")
print(json.dumps(dataset, indent=4, ensure_ascii=False)[:500])  # Print first 500 characters for preview

# 🛠️ Ensure at least some data is collected before saving
if dataset and any(dataset):
    output_file = "/content/drive/MyDrive/Youtube_Large_Transcripts.json"

    with open(output_file, "w", encoding="utf-8") as f:
        json.dump(dataset, f, indent=4, ensure_ascii=False)

    print(f"\n✅ 📁 Dataset saved successfully at {output_file}!")
    print(f"📊 Total topics with transcripts: {len(dataset)}")
else:
    print("\n⚠️ No valid data collected. JSON file not created.")


📌 Fetching Data by Topic...



Fetching Data by Topic:   0%|          | 0/102 [00:00<?, ?it/s]


🔍 Fetching videos for: Artificial Intelligence
📌 Found 25 valid videos for Artificial Intelligence



Fetching Transcripts for Artificial Intelligence:   0%|          | 0/25 [00:00<?, ?it/s][A
Fetching Transcripts for Artificial Intelligence:   4%|▍         | 1/25 [00:00<00:13,  1.84it/s][A
Fetching Transcripts for Artificial Intelligence:   8%|▊         | 2/25 [00:01<00:15,  1.47it/s][A
Fetching Transcripts for Artificial Intelligence:  12%|█▏        | 3/25 [00:01<00:14,  1.56it/s][A
Fetching Transcripts for Artificial Intelligence:  16%|█▌        | 4/25 [00:02<00:14,  1.48it/s][A
Fetching Transcripts for Artificial Intelligence:  20%|██        | 5/25 [00:03<00:13,  1.45it/s][A
Fetching Transcripts for Artificial Intelligence:  24%|██▍       | 6/25 [00:03<00:12,  1.50it/s][A
Fetching Transcripts for Artificial Intelligence:  28%|██▊       | 7/25 [00:04<00:12,  1.48it/s][A
Fetching Transcripts for Artificial Intelligence:  32%|███▏      | 8/25 [00:05<00:12,  1.32it/s][A
Fetching Transcripts for Artificial Intelligence:  36%|███▌      | 9/25 [00:06<00:14,  1.14it/s][A
Fetchin

⚠️ No transcript found for video r7jYipQy_h4: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=r7jYipQy_h4! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (r7jYipQy_h4) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Artificial Intelligence:  60%|██████    | 15/25 [00:10<00:07,  1.42it/s][A
Fetching Transcripts for Artificial Intelligence:  64%|██████▍   | 16/25 [00:11<00:06,  1.43it/s][A
Fetching Transcripts for Artificial Intelligence:  68%|██████▊   | 17/25 [00:12<00:05,  1.46it/s][A
Fetching Transcripts for Artificial Intelligence:  72%|███████▏  | 18/25 [00:12<00:04,  1.50it/s][A
Fetching Transcripts for Artificial Intelligence:  76%|███████▌  | 19/25 [00:13<00:04,  1.47it/s][A
Fetching Transcripts for Artificial Intelligence:  80%|████████  | 20/25 [00:14<00:03,  1.45it/s][A
Fetching Transcripts for Artificial Intelligence:  84%|████████▍ | 21/25 [00:15<00:02,  1.50it/s][A
Fetching Transcripts for Artificial Intelligence:  88%|████████▊ | 22/25 [00:15<00:01,  1.58it/s][A
Fetching Transcripts for Artificial Intelligence:  92%|█████████▏| 23/25 [00:16<00:01,  1.58it/s][A
Fetching Transcripts for Artificial Intelligence:  96%|█████████▌| 24/25 [00:16<00:00,  1.

✅ Completed fetching for: Artificial Intelligence


🔍 Fetching videos for: Machine Learning
📌 Found 11 valid videos for Machine Learning



Fetching Transcripts for Machine Learning:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Machine Learning:   9%|▉         | 1/11 [00:00<00:05,  1.81it/s][A

⚠️ No transcript found for video NdNyYcAJQr8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=NdNyYcAJQr8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (NdNyYcAJQr8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - es ("Spanish")[TRANSLATABLE]

(GENERATED)
 - es ("Spanish (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 


Fetching Transcripts for Machine Learning:  18%|█▊        | 2/11 [00:01<00:05,  1.56it/s][A
Fetching Transcripts for Machine Learning:  27%|██▋       | 3/11 [00:01<00:04,  1.64it/s][A
Fetching Transcripts for Machine Learning:  36%|███▋      | 4/11 [00:02<00:05,  1.37it/s][A
Fetching Transcripts for Machine Learning:  45%|████▌     | 5/11 [00:03<00:04,  1.47it/s][A
Fetching Transcripts for Machine Learning:  55%|█████▍    | 6/11 [00:03<00:03,  1.52it/s][A

⚠️ No transcript found for video AuozO2JdPrw: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=AuozO2JdPrw! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (AuozO2JdPrw) transcripts are available in the following languages:

(MANUALLY CREATED)
 - my ("Burmese")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("Divehi")
 - nl ("Dut


Fetching Transcripts for Machine Learning:  64%|██████▎   | 7/11 [00:04<00:02,  1.59it/s][A
Fetching Transcripts for Machine Learning:  73%|███████▎  | 8/11 [00:05<00:01,  1.63it/s][A
Fetching Transcripts for Machine Learning:  82%|████████▏ | 9/11 [00:05<00:01,  1.55it/s][A
Fetching Transcripts for Machine Learning:  91%|█████████ | 10/11 [00:06<00:00,  1.50it/s][A
Fetching Transcripts for Machine Learning: 100%|██████████| 11/11 [00:07<00:00,  1.54it/s]
Fetching Data by Topic:   2%|▏         | 2/102 [00:29<23:15, 13.96s/it]

✅ Completed fetching for: Machine Learning


🔍 Fetching videos for: Deep Learning
📌 Found 21 valid videos for Deep Learning



Fetching Transcripts for Deep Learning:   0%|          | 0/21 [00:00<?, ?it/s][A
Fetching Transcripts for Deep Learning:   5%|▍         | 1/21 [00:00<00:11,  1.79it/s][A
Fetching Transcripts for Deep Learning:  10%|▉         | 2/21 [00:01<00:11,  1.72it/s][A
Fetching Transcripts for Deep Learning:  14%|█▍        | 3/21 [00:01<00:10,  1.69it/s][A
Fetching Transcripts for Deep Learning:  19%|█▉        | 4/21 [00:02<00:10,  1.62it/s][A
Fetching Transcripts for Deep Learning:  24%|██▍       | 5/21 [00:03<00:10,  1.47it/s][A
Fetching Transcripts for Deep Learning:  29%|██▊       | 6/21 [00:03<00:10,  1.47it/s][A
Fetching Transcripts for Deep Learning:  33%|███▎      | 7/21 [00:05<00:11,  1.17it/s][A
Fetching Transcripts for Deep Learning:  38%|███▊      | 8/21 [00:05<00:10,  1.19it/s][A
Fetching Transcripts for Deep Learning:  43%|████▎     | 9/21 [00:06<00:09,  1.25it/s][A
Fetching Transcripts for Deep Learning:  48%|████▊     | 10/21 [00:07<00:08,  1.26it/s][A
Fetching Transcr

⚠️ No transcript found for video trWrEWfhTVg: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=trWrEWfhTVg! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (trWrEWfhTVg) transcripts are available in the following languages:

(MANUALLY CREATED)
 - fr ("French")[TRANSLATABLE]

(GENERATED)
 - fr ("French (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - 


Fetching Transcripts for Deep Learning:  57%|█████▋    | 12/21 [00:08<00:06,  1.39it/s][A

⚠️ No transcript found for video XUFLq6dKQok: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=XUFLq6dKQok! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (XUFLq6dKQok) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]
 - fr-FR ("French (France)")[TRANSLATABLE]

(GENERATED)
 - fr ("French (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditio


Fetching Transcripts for Deep Learning:  62%|██████▏   | 13/21 [00:09<00:05,  1.37it/s][A
Fetching Transcripts for Deep Learning:  67%|██████▋   | 14/21 [00:10<00:05,  1.24it/s][A
Fetching Transcripts for Deep Learning:  71%|███████▏  | 15/21 [00:11<00:05,  1.16it/s][A
Fetching Transcripts for Deep Learning:  76%|███████▌  | 16/21 [00:12<00:04,  1.23it/s][A
Fetching Transcripts for Deep Learning:  81%|████████  | 17/21 [00:12<00:02,  1.35it/s][A
Fetching Transcripts for Deep Learning:  86%|████████▌ | 18/21 [00:13<00:02,  1.35it/s][A
Fetching Transcripts for Deep Learning:  90%|█████████ | 19/21 [00:14<00:01,  1.03it/s][A
Fetching Transcripts for Deep Learning:  95%|█████████▌| 20/21 [00:15<00:00,  1.19it/s][A
Fetching Transcripts for Deep Learning: 100%|██████████| 21/21 [00:16<00:00,  1.29it/s]
Fetching Data by Topic:   3%|▎         | 3/102 [00:47<26:04, 15.80s/it]

✅ Completed fetching for: Deep Learning


🔍 Fetching videos for: Computer Vision
📌 Found 17 valid videos for Computer Vision



Fetching Transcripts for Computer Vision:   0%|          | 0/17 [00:00<?, ?it/s][A
Fetching Transcripts for Computer Vision:   6%|▌         | 1/17 [00:00<00:10,  1.51it/s][A
Fetching Transcripts for Computer Vision:  12%|█▏        | 2/17 [00:01<00:10,  1.45it/s][A
Fetching Transcripts for Computer Vision:  18%|█▊        | 3/17 [00:02<00:09,  1.45it/s][A
Fetching Transcripts for Computer Vision:  24%|██▎       | 4/17 [00:02<00:08,  1.56it/s][A

⚠️ No transcript found for video wVE8SFMSBJ0: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=wVE8SFMSBJ0! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (wVE8SFMSBJ0) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Computer Vision:  29%|██▉       | 5/17 [00:03<00:08,  1.45it/s][A
Fetching Transcripts for Computer Vision:  35%|███▌      | 6/17 [00:04<00:07,  1.44it/s][A
Fetching Transcripts for Computer Vision:  41%|████      | 7/17 [00:05<00:09,  1.06it/s][A
Fetching Transcripts for Computer Vision:  47%|████▋     | 8/17 [00:06<00:07,  1.21it/s][A
Fetching Transcripts for Computer Vision:  53%|█████▎    | 9/17 [00:06<00:06,  1.32it/s][A
Fetching Transcripts for Computer Vision:  59%|█████▉    | 10/17 [00:07<00:05,  1.39it/s][A
Fetching Transcripts for Computer Vision:  65%|██████▍   | 11/17 [00:07<00:04,  1.50it/s][A
Fetching Transcripts for Computer Vision:  71%|███████   | 12/17 [00:08<00:03,  1.56it/s][A
Fetching Transcripts for Computer Vision:  76%|███████▋  | 13/17 [00:09<00:02,  1.55it/s][A
Fetching Transcripts for Computer Vision:  82%|████████▏ | 14/17 [00:10<00:02,  1.40it/s][A
Fetching Transcripts for Computer Vision:  88%|████████▊ | 15/17 [00:10<00

✅ Completed fetching for: Computer Vision


🔍 Fetching videos for: Natural Language Processing
📌 Found 14 valid videos for Natural Language Processing



Fetching Transcripts for Natural Language Processing:   0%|          | 0/14 [00:00<?, ?it/s][A
Fetching Transcripts for Natural Language Processing:   7%|▋         | 1/14 [00:00<00:08,  1.55it/s][A
Fetching Transcripts for Natural Language Processing:  14%|█▍        | 2/14 [00:01<00:07,  1.66it/s][A
Fetching Transcripts for Natural Language Processing:  21%|██▏       | 3/14 [00:02<00:09,  1.19it/s][A
Fetching Transcripts for Natural Language Processing:  29%|██▊       | 4/14 [00:02<00:07,  1.34it/s][A
Fetching Transcripts for Natural Language Processing:  36%|███▌      | 5/14 [00:03<00:06,  1.36it/s][A
Fetching Transcripts for Natural Language Processing:  43%|████▎     | 6/14 [00:04<00:05,  1.40it/s][A
Fetching Transcripts for Natural Language Processing:  50%|█████     | 7/14 [00:05<00:04,  1.41it/s][A
Fetching Transcripts for Natural Language Processing:  57%|█████▋    | 8/14 [00:05<00:03,  1.52it/s][A
Fetching Transcripts for Natural Language Processing:  64%|██████▍   | 

⚠️ No transcript found for video QwBaFEeUUMA: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=QwBaFEeUUMA! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (QwBaFEeUUMA) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Natural Language Processing:  86%|████████▌ | 12/14 [00:07<00:01,  1.67it/s][A

⚠️ No transcript found for video 5sLYAQS9sWQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=5sLYAQS9sWQ! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (5sLYAQS9sWQ) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Natural Language Processing:  93%|█████████▎| 13/14 [00:08<00:00,  1.61it/s][A
Fetching Transcripts for Natural Language Processing: 100%|██████████| 14/14 [00:09<00:00,  1.49it/s]
Fetching Data by Topic:   5%|▍         | 5/102 [01:12<21:57, 13.58s/it]

✅ Completed fetching for: Natural Language Processing


🔍 Fetching videos for: Quantum Computing
📌 Found 20 valid videos for Quantum Computing



Fetching Transcripts for Quantum Computing:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching Transcripts for Quantum Computing:   5%|▌         | 1/20 [00:00<00:10,  1.75it/s][A
Fetching Transcripts for Quantum Computing:  10%|█         | 2/20 [00:01<00:09,  1.81it/s][A
Fetching Transcripts for Quantum Computing:  15%|█▌        | 3/20 [00:01<00:09,  1.82it/s][A
Fetching Transcripts for Quantum Computing:  20%|██        | 4/20 [00:02<00:09,  1.75it/s][A
Fetching Transcripts for Quantum Computing:  25%|██▌       | 5/20 [00:02<00:09,  1.59it/s][A
Fetching Transcripts for Quantum Computing:  30%|███       | 6/20 [00:03<00:08,  1.60it/s][A
Fetching Transcripts for Quantum Computing:  35%|███▌      | 7/20 [00:04<00:07,  1.68it/s][A
Fetching Transcripts for Quantum Computing:  40%|████      | 8/20 [00:04<00:07,  1.64it/s][A
Fetching Transcripts for Quantum Computing:  45%|████▌     | 9/20 [00:05<00:06,  1.64it/s][A
Fetching Transcripts for Quantum Computing:  50%|█████     | 10/20 

✅ Completed fetching for: Quantum Computing


🔍 Fetching videos for: Cybersecurity
📌 Found 15 valid videos for Cybersecurity



Fetching Transcripts for Cybersecurity:   0%|          | 0/15 [00:00<?, ?it/s][A
Fetching Transcripts for Cybersecurity:   7%|▋         | 1/15 [00:00<00:07,  1.86it/s][A
Fetching Transcripts for Cybersecurity:  13%|█▎        | 2/15 [00:01<00:07,  1.72it/s][A
Fetching Transcripts for Cybersecurity:  20%|██        | 3/15 [00:01<00:07,  1.64it/s][A
Fetching Transcripts for Cybersecurity:  27%|██▋       | 4/15 [00:02<00:07,  1.48it/s][A
Fetching Transcripts for Cybersecurity:  33%|███▎      | 5/15 [00:03<00:07,  1.38it/s][A
Fetching Transcripts for Cybersecurity:  40%|████      | 6/15 [00:03<00:06,  1.46it/s][A
Fetching Transcripts for Cybersecurity:  47%|████▋     | 7/15 [00:04<00:05,  1.39it/s][A
Fetching Transcripts for Cybersecurity:  53%|█████▎    | 8/15 [00:05<00:05,  1.19it/s][A
Fetching Transcripts for Cybersecurity:  60%|██████    | 9/15 [00:06<00:04,  1.24it/s][A
Fetching Transcripts for Cybersecurity:  67%|██████▋   | 10/15 [00:07<00:03,  1.29it/s][A
Fetching Transcr

✅ Completed fetching for: Cybersecurity


🔍 Fetching videos for: Cloud Computing
📌 Found 7 valid videos for Cloud Computing



Fetching Transcripts for Cloud Computing:   0%|          | 0/7 [00:00<?, ?it/s][A
Fetching Transcripts for Cloud Computing:  14%|█▍        | 1/7 [00:00<00:03,  1.74it/s][A
Fetching Transcripts for Cloud Computing:  29%|██▊       | 2/7 [00:01<00:03,  1.43it/s][A
Fetching Transcripts for Cloud Computing:  43%|████▎     | 3/7 [00:02<00:02,  1.47it/s][A
Fetching Transcripts for Cloud Computing:  57%|█████▋    | 4/7 [00:02<00:02,  1.28it/s][A
Fetching Transcripts for Cloud Computing:  71%|███████▏  | 5/7 [00:03<00:01,  1.24it/s][A
Fetching Transcripts for Cloud Computing:  86%|████████▌ | 6/7 [00:04<00:00,  1.31it/s][A

⚠️ No transcript found for video RwbIMBSr8o8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=RwbIMBSr8o8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (RwbIMBSr8o8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - fr ("French")[TRANSLATABLE]

(GENERATED)
 - fr ("French (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - 


Fetching Transcripts for Cloud Computing: 100%|██████████| 7/7 [00:05<00:00,  1.36it/s]
Fetching Data by Topic:   8%|▊         | 8/102 [01:46<17:53, 11.42s/it]

✅ Completed fetching for: Cloud Computing


🔍 Fetching videos for: Edge Computing
📌 Found 15 valid videos for Edge Computing



Fetching Transcripts for Edge Computing:   0%|          | 0/15 [00:00<?, ?it/s][A
Fetching Transcripts for Edge Computing:   7%|▋         | 1/15 [00:00<00:09,  1.54it/s][A

⚠️ No transcript found for video 3hScMLH7B4o: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=3hScMLH7B4o! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (3hScMLH7B4o) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]
 - fr ("French")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Cze


Fetching Transcripts for Edge Computing:  13%|█▎        | 2/15 [00:01<00:08,  1.49it/s][A
Fetching Transcripts for Edge Computing:  20%|██        | 3/15 [00:01<00:07,  1.53it/s][A

⚠️ No transcript found for video qn6GDlMgoPA: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=qn6GDlMgoPA! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (qn6GDlMgoPA) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Edge Computing:  27%|██▋       | 4/15 [00:02<00:08,  1.37it/s][A
Fetching Transcripts for Edge Computing:  33%|███▎      | 5/15 [00:03<00:06,  1.51it/s][A

⚠️ No transcript found for video u8lZVCjy9z8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=u8lZVCjy9z8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (u8lZVCjy9z8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - zh-TW ("Chinese (Taiwan)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("Divehi")


Fetching Transcripts for Edge Computing:  40%|████      | 6/15 [00:04<00:06,  1.50it/s][A
Fetching Transcripts for Edge Computing:  47%|████▋     | 7/15 [00:04<00:05,  1.53it/s][A
Fetching Transcripts for Edge Computing:  53%|█████▎    | 8/15 [00:05<00:04,  1.53it/s][A
Fetching Transcripts for Edge Computing:  60%|██████    | 9/15 [00:05<00:03,  1.56it/s][A
Fetching Transcripts for Edge Computing:  67%|██████▋   | 10/15 [00:06<00:03,  1.48it/s][A
Fetching Transcripts for Edge Computing:  73%|███████▎  | 11/15 [00:07<00:02,  1.45it/s][A
Fetching Transcripts for Edge Computing:  80%|████████  | 12/15 [00:08<00:02,  1.49it/s][A
Fetching Transcripts for Edge Computing:  87%|████████▋ | 13/15 [00:08<00:01,  1.44it/s][A
Fetching Transcripts for Edge Computing:  93%|█████████▎| 14/15 [00:09<00:00,  1.36it/s][A
Fetching Transcripts for Edge Computing: 100%|██████████| 15/15 [00:10<00:00,  1.47it/s]
Fetching Data by Topic:   9%|▉         | 9/102 [01:59<18:05, 11.68s/it]

✅ Completed fetching for: Edge Computing


🔍 Fetching videos for: Internet of Things (IoT)
📌 Found 11 valid videos for Internet of Things (IoT)



Fetching Transcripts for Internet of Things (IoT):   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Internet of Things (IoT):   9%|▉         | 1/11 [00:00<00:06,  1.53it/s][A
Fetching Transcripts for Internet of Things (IoT):  18%|█▊        | 2/11 [00:01<00:05,  1.58it/s][A

⚠️ No transcript found for video Fj02iTrWUx0: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=Fj02iTrWUx0! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (Fj02iTrWUx0) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-IN ("English (India)")[TRANSLATABLE]

(GENERATED)
 - vi ("Vietnamese (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - 


Fetching Transcripts for Internet of Things (IoT):  27%|██▋       | 3/11 [00:01<00:05,  1.54it/s][A
Fetching Transcripts for Internet of Things (IoT):  36%|███▋      | 4/11 [00:02<00:04,  1.55it/s][A
Fetching Transcripts for Internet of Things (IoT):  45%|████▌     | 5/11 [00:03<00:03,  1.54it/s][A
Fetching Transcripts for Internet of Things (IoT):  55%|█████▍    | 6/11 [00:04<00:03,  1.26it/s][A
Fetching Transcripts for Internet of Things (IoT):  64%|██████▎   | 7/11 [00:04<00:02,  1.45it/s][A
Fetching Transcripts for Internet of Things (IoT):  73%|███████▎  | 8/11 [00:05<00:02,  1.38it/s][A
Fetching Transcripts for Internet of Things (IoT):  82%|████████▏ | 9/11 [00:06<00:01,  1.47it/s][A

⚠️ No transcript found for video HmbUJEShA-8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=HmbUJEShA-8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (HmbUJEShA-8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Internet of Things (IoT):  91%|█████████ | 10/11 [00:06<00:00,  1.56it/s][A

⚠️ No transcript found for video yLZbzbO_7yQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=yLZbzbO_7yQ! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (yLZbzbO_7yQ) transcripts are available in the following languages:

(MANUALLY CREATED)
 - zh ("Chinese")[TRANSLATABLE]
 - de ("German")[TRANSLATABLE]

(GENERATED)
 - de ("German (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr 


Fetching Transcripts for Internet of Things (IoT): 100%|██████████| 11/11 [00:07<00:00,  1.48it/s]
Fetching Data by Topic:  10%|▉         | 10/102 [02:08<16:53, 11.01s/it]

✅ Completed fetching for: Internet of Things (IoT)


🔍 Fetching videos for: 5G Technology
📌 Found 10 valid videos for 5G Technology



Fetching Transcripts for 5G Technology:   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for 5G Technology:  10%|█         | 1/10 [00:00<00:06,  1.37it/s][A
Fetching Transcripts for 5G Technology:  20%|██        | 2/10 [00:01<00:05,  1.50it/s][A
Fetching Transcripts for 5G Technology:  30%|███       | 3/10 [00:01<00:04,  1.60it/s][A
Fetching Transcripts for 5G Technology:  40%|████      | 4/10 [00:02<00:03,  1.55it/s][A
Fetching Transcripts for 5G Technology:  50%|█████     | 5/10 [00:03<00:03,  1.57it/s][A
Fetching Transcripts for 5G Technology:  60%|██████    | 6/10 [00:03<00:02,  1.53it/s][A
Fetching Transcripts for 5G Technology:  70%|███████   | 7/10 [00:04<00:02,  1.44it/s][A
Fetching Transcripts for 5G Technology:  80%|████████  | 8/10 [00:05<00:01,  1.43it/s][A
Fetching Transcripts for 5G Technology:  90%|█████████ | 9/10 [00:05<00:00,  1.51it/s][A
Fetching Transcripts for 5G Technology: 100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
Fetching Data by To

⚠️ No transcript found for video FnPakOat_rY: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=FnPakOat_rY! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (FnPakOat_rY) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Augmented Reality (AR):   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for Augmented Reality (AR):  10%|█         | 1/10 [00:00<00:05,  1.78it/s][A

⚠️ No transcript found for video QpbJwad6v_s: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=QpbJwad6v_s! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (QpbJwad6v_s) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Augmented Reality (AR):  20%|██        | 2/10 [00:01<00:05,  1.58it/s][A
Fetching Transcripts for Augmented Reality (AR):  30%|███       | 3/10 [00:01<00:04,  1.48it/s][A
Fetching Transcripts for Augmented Reality (AR):  40%|████      | 4/10 [00:02<00:03,  1.56it/s][A
Fetching Transcripts for Augmented Reality (AR):  50%|█████     | 5/10 [00:03<00:03,  1.54it/s][A
Fetching Transcripts for Augmented Reality (AR):  60%|██████    | 6/10 [00:03<00:02,  1.55it/s][A
Fetching Transcripts for Augmented Reality (AR):  70%|███████   | 7/10 [00:04<00:02,  1.47it/s][A
Fetching Transcripts for Augmented Reality (AR):  80%|████████  | 8/10 [00:05<00:01,  1.44it/s][A
Fetching Transcripts for Augmented Reality (AR):  90%|█████████ | 9/10 [00:05<00:00,  1.47it/s][A
Fetching Transcripts for Augmented Reality (AR): 100%|██████████| 10/10 [00:06<00:00,  1.50it/s]
Fetching Data by Topic:  12%|█▏        | 12/102 [02:25<14:34,  9.72s/it]

✅ Completed fetching for: Augmented Reality (AR)


🔍 Fetching videos for: Virtual Reality (VR)
📌 Found 6 valid videos for Virtual Reality (VR)



Fetching Transcripts for Virtual Reality (VR):   0%|          | 0/6 [00:00<?, ?it/s][A
Fetching Transcripts for Virtual Reality (VR):  17%|█▋        | 1/6 [00:00<00:03,  1.49it/s][A
Fetching Transcripts for Virtual Reality (VR):  33%|███▎      | 2/6 [00:01<00:02,  1.44it/s][A
Fetching Transcripts for Virtual Reality (VR):  50%|█████     | 3/6 [00:02<00:02,  1.48it/s][A
Fetching Transcripts for Virtual Reality (VR):  67%|██████▋   | 4/6 [00:02<00:01,  1.49it/s][A
Fetching Transcripts for Virtual Reality (VR):  83%|████████▎ | 5/6 [00:03<00:00,  1.57it/s][A
Fetching Transcripts for Virtual Reality (VR): 100%|██████████| 6/6 [00:03<00:00,  1.54it/s]
Fetching Data by Topic:  13%|█▎        | 13/102 [02:31<12:40,  8.54s/it]

✅ Completed fetching for: Virtual Reality (VR)


🔍 Fetching videos for: Space Exploration
📌 Found 17 valid videos for Space Exploration



Fetching Transcripts for Space Exploration:   0%|          | 0/17 [00:00<?, ?it/s][A
Fetching Transcripts for Space Exploration:   6%|▌         | 1/17 [00:00<00:09,  1.74it/s][A
Fetching Transcripts for Space Exploration:  12%|█▏        | 2/17 [00:01<00:11,  1.31it/s][A
Fetching Transcripts for Space Exploration:  18%|█▊        | 3/17 [00:02<00:09,  1.43it/s][A
Fetching Transcripts for Space Exploration:  24%|██▎       | 4/17 [00:02<00:09,  1.34it/s][A
Fetching Transcripts for Space Exploration:  29%|██▉       | 5/17 [00:03<00:09,  1.31it/s][A
Fetching Transcripts for Space Exploration:  35%|███▌      | 6/17 [00:04<00:07,  1.44it/s][A
Fetching Transcripts for Space Exploration:  41%|████      | 7/17 [00:04<00:06,  1.44it/s][A
Fetching Transcripts for Space Exploration:  47%|████▋     | 8/17 [00:05<00:06,  1.49it/s][A
Fetching Transcripts for Space Exploration:  53%|█████▎    | 9/17 [00:06<00:05,  1.45it/s][A
Fetching Transcripts for Space Exploration:  59%|█████▉    | 10/17 

✅ Completed fetching for: Space Exploration


🔍 Fetching videos for: Astronomy
📌 Found 11 valid videos for Astronomy



Fetching Transcripts for Astronomy:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Astronomy:   9%|▉         | 1/11 [00:00<00:06,  1.44it/s][A
Fetching Transcripts for Astronomy:  18%|█▊        | 2/11 [00:01<00:06,  1.36it/s][A
Fetching Transcripts for Astronomy:  27%|██▋       | 3/11 [00:02<00:05,  1.38it/s][A
Fetching Transcripts for Astronomy:  36%|███▋      | 4/11 [00:02<00:04,  1.50it/s][A
Fetching Transcripts for Astronomy:  45%|████▌     | 5/11 [00:03<00:03,  1.60it/s][A
Fetching Transcripts for Astronomy:  55%|█████▍    | 6/11 [00:03<00:03,  1.56it/s][A
Fetching Transcripts for Astronomy:  64%|██████▎   | 7/11 [00:05<00:03,  1.27it/s][A
Fetching Transcripts for Astronomy:  73%|███████▎  | 8/11 [00:05<00:02,  1.37it/s][A
Fetching Transcripts for Astronomy:  82%|████████▏ | 9/11 [00:06<00:01,  1.39it/s][A
Fetching Transcripts for Astronomy:  91%|█████████ | 10/11 [00:06<00:00,  1.45it/s][A
Fetching Transcripts for Astronomy: 100%|██████████| 11/11 [

✅ Completed fetching for: Astronomy


🔍 Fetching videos for: Robotics
📌 Found 10 valid videos for Robotics



Fetching Transcripts for Robotics:   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for Robotics:  10%|█         | 1/10 [00:00<00:05,  1.54it/s][A
Fetching Transcripts for Robotics:  20%|██        | 2/10 [00:01<00:05,  1.52it/s][A
Fetching Transcripts for Robotics:  30%|███       | 3/10 [00:01<00:04,  1.64it/s][A
Fetching Transcripts for Robotics:  40%|████      | 4/10 [00:02<00:03,  1.57it/s][A
Fetching Transcripts for Robotics:  50%|█████     | 5/10 [00:03<00:03,  1.53it/s][A
Fetching Transcripts for Robotics:  60%|██████    | 6/10 [00:03<00:02,  1.53it/s][A
Fetching Transcripts for Robotics:  70%|███████   | 7/10 [00:04<00:02,  1.49it/s][A
Fetching Transcripts for Robotics:  80%|████████  | 8/10 [00:05<00:01,  1.50it/s][A
Fetching Transcripts for Robotics:  90%|█████████ | 9/10 [00:05<00:00,  1.53it/s][A
Fetching Transcripts for Robotics: 100%|██████████| 10/10 [00:06<00:00,  1.52it/s]
Fetching Data by Topic:  16%|█▌        | 16/102 [03:03<13:35,  9.48s/it]

✅ Completed fetching for: Robotics


🔍 Fetching videos for: Blockchain Technology
📌 Found 12 valid videos for Blockchain Technology



Fetching Transcripts for Blockchain Technology:   0%|          | 0/12 [00:00<?, ?it/s][A
Fetching Transcripts for Blockchain Technology:   8%|▊         | 1/12 [00:00<00:06,  1.76it/s][A
Fetching Transcripts for Blockchain Technology:  17%|█▋        | 2/12 [00:01<00:06,  1.56it/s][A
Fetching Transcripts for Blockchain Technology:  25%|██▌       | 3/12 [00:01<00:05,  1.67it/s][A
Fetching Transcripts for Blockchain Technology:  33%|███▎      | 4/12 [00:02<00:04,  1.69it/s][A
Fetching Transcripts for Blockchain Technology:  42%|████▏     | 5/12 [00:02<00:03,  1.81it/s][A
Fetching Transcripts for Blockchain Technology:  50%|█████     | 6/12 [00:03<00:03,  1.74it/s][A
Fetching Transcripts for Blockchain Technology:  58%|█████▊    | 7/12 [00:04<00:03,  1.61it/s][A
Fetching Transcripts for Blockchain Technology:  67%|██████▋   | 8/12 [00:04<00:02,  1.52it/s][A
Fetching Transcripts for Blockchain Technology:  75%|███████▌  | 9/12 [00:05<00:02,  1.44it/s][A
Fetching Transcripts for Bl

✅ Completed fetching for: Blockchain Technology


🔍 Fetching videos for: Biotechnology
📌 Found 12 valid videos for Biotechnology



Fetching Transcripts for Biotechnology:   0%|          | 0/12 [00:00<?, ?it/s][A
Fetching Transcripts for Biotechnology:   8%|▊         | 1/12 [00:00<00:07,  1.46it/s][A
Fetching Transcripts for Biotechnology:  17%|█▋        | 2/12 [00:01<00:06,  1.52it/s][A
Fetching Transcripts for Biotechnology:  25%|██▌       | 3/12 [00:01<00:05,  1.55it/s][A
Fetching Transcripts for Biotechnology:  33%|███▎      | 4/12 [00:02<00:05,  1.59it/s][A
Fetching Transcripts for Biotechnology:  42%|████▏     | 5/12 [00:03<00:04,  1.56it/s][A
Fetching Transcripts for Biotechnology:  50%|█████     | 6/12 [00:03<00:04,  1.49it/s][A
Fetching Transcripts for Biotechnology:  58%|█████▊    | 7/12 [00:04<00:03,  1.50it/s][A
Fetching Transcripts for Biotechnology:  67%|██████▋   | 8/12 [00:05<00:02,  1.42it/s][A
Fetching Transcripts for Biotechnology:  75%|███████▌  | 9/12 [00:05<00:01,  1.54it/s][A
Fetching Transcripts for Biotechnology:  83%|████████▎ | 10/12 [00:06<00:01,  1.49it/s][A
Fetching Transcr

✅ Completed fetching for: Biotechnology


🔍 Fetching videos for: Genetics
📌 Found 23 valid videos for Genetics



Fetching Transcripts for Genetics:   0%|          | 0/23 [00:00<?, ?it/s][A
Fetching Transcripts for Genetics:   4%|▍         | 1/23 [00:00<00:13,  1.59it/s][A
Fetching Transcripts for Genetics:   9%|▊         | 2/23 [00:01<00:12,  1.75it/s][A
Fetching Transcripts for Genetics:  13%|█▎        | 3/23 [00:01<00:13,  1.53it/s][A
Fetching Transcripts for Genetics:  17%|█▋        | 4/23 [00:02<00:13,  1.45it/s][A
Fetching Transcripts for Genetics:  22%|██▏       | 5/23 [00:03<00:11,  1.55it/s][A
Fetching Transcripts for Genetics:  26%|██▌       | 6/23 [00:03<00:10,  1.56it/s][A
Fetching Transcripts for Genetics:  30%|███       | 7/23 [00:04<00:10,  1.54it/s][A
Fetching Transcripts for Genetics:  35%|███▍      | 8/23 [00:05<00:09,  1.52it/s][A
Fetching Transcripts for Genetics:  39%|███▉      | 9/23 [00:05<00:08,  1.58it/s][A
Fetching Transcripts for Genetics:  43%|████▎     | 10/23 [00:06<00:09,  1.43it/s][A
Fetching Transcripts for Genetics:  48%|████▊     | 11/23 [00:07<00:07,

⚠️ No transcript found for video NeeaP8pp9HI: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=NeeaP8pp9HI! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (NeeaP8pp9HI) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Genetics:  52%|█████▏    | 12/23 [00:07<00:07,  1.44it/s][A
Fetching Transcripts for Genetics:  57%|█████▋    | 13/23 [00:08<00:06,  1.48it/s][A

⚠️ No transcript found for video YnJPbphsoMY: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=YnJPbphsoMY! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (YnJPbphsoMY) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Genetics:  61%|██████    | 14/23 [00:09<00:06,  1.36it/s][A
Fetching Transcripts for Genetics:  65%|██████▌   | 15/23 [00:10<00:05,  1.40it/s][A
Fetching Transcripts for Genetics:  70%|██████▉   | 16/23 [00:10<00:04,  1.51it/s][A
Fetching Transcripts for Genetics:  74%|███████▍  | 17/23 [00:11<00:03,  1.50it/s][A
Fetching Transcripts for Genetics:  78%|███████▊  | 18/23 [00:11<00:03,  1.57it/s][A
Fetching Transcripts for Genetics:  83%|████████▎ | 19/23 [00:12<00:02,  1.65it/s][A
Fetching Transcripts for Genetics:  87%|████████▋ | 20/23 [00:13<00:01,  1.66it/s][A
Fetching Transcripts for Genetics:  91%|█████████▏| 21/23 [00:13<00:01,  1.73it/s][A
Fetching Transcripts for Genetics:  96%|█████████▌| 22/23 [00:14<00:00,  1.63it/s][A
Fetching Transcripts for Genetics: 100%|██████████| 23/23 [00:14<00:00,  1.54it/s]
Fetching Data by Topic:  19%|█▊        | 19/102 [03:39<16:15, 11.75s/it]

✅ Completed fetching for: Genetics


🔍 Fetching videos for: Bioinformatics
📌 Found 5 valid videos for Bioinformatics



Fetching Transcripts for Bioinformatics:   0%|          | 0/5 [00:00<?, ?it/s][A
Fetching Transcripts for Bioinformatics:  20%|██        | 1/5 [00:00<00:02,  1.40it/s][A
Fetching Transcripts for Bioinformatics:  40%|████      | 2/5 [00:01<00:01,  1.52it/s][A
Fetching Transcripts for Bioinformatics:  60%|██████    | 3/5 [00:01<00:01,  1.68it/s][A

⚠️ No transcript found for video o-WFU5ovaTc: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=o-WFU5ovaTc! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (o-WFU5ovaTc) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Bioinformatics:  80%|████████  | 4/5 [00:02<00:00,  1.64it/s][A
Fetching Transcripts for Bioinformatics: 100%|██████████| 5/5 [00:03<00:00,  1.58it/s]
Fetching Data by Topic:  20%|█▉        | 20/102 [03:44<13:20,  9.76s/it]

✅ Completed fetching for: Bioinformatics


🔍 Fetching videos for: Climate Change
📌 Found 23 valid videos for Climate Change



Fetching Transcripts for Climate Change:   0%|          | 0/23 [00:00<?, ?it/s][A
Fetching Transcripts for Climate Change:   4%|▍         | 1/23 [00:00<00:14,  1.56it/s][A
Fetching Transcripts for Climate Change:   9%|▊         | 2/23 [00:01<00:13,  1.58it/s][A
Fetching Transcripts for Climate Change:  13%|█▎        | 3/23 [00:01<00:12,  1.64it/s][A
Fetching Transcripts for Climate Change:  17%|█▋        | 4/23 [00:02<00:11,  1.62it/s][A
Fetching Transcripts for Climate Change:  22%|██▏       | 5/23 [00:03<00:11,  1.61it/s][A
Fetching Transcripts for Climate Change:  26%|██▌       | 6/23 [00:03<00:10,  1.64it/s][A
Fetching Transcripts for Climate Change:  30%|███       | 7/23 [00:04<00:10,  1.59it/s][A

⚠️ No transcript found for video JZv0qxrs_LI: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=JZv0qxrs_LI! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (JZv0qxrs_LI) transcripts are available in the following languages:

(MANUALLY CREATED)
 - hi ("Hindi")[TRANSLATABLE]

(GENERATED)
 - hi ("Hindi (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da


Fetching Transcripts for Climate Change:  35%|███▍      | 8/23 [00:04<00:09,  1.60it/s][A
Fetching Transcripts for Climate Change:  39%|███▉      | 9/23 [00:05<00:08,  1.63it/s][A
Fetching Transcripts for Climate Change:  43%|████▎     | 10/23 [00:06<00:07,  1.67it/s][A
Fetching Transcripts for Climate Change:  48%|████▊     | 11/23 [00:06<00:07,  1.58it/s][A
Fetching Transcripts for Climate Change:  52%|█████▏    | 12/23 [00:07<00:07,  1.54it/s][A
Fetching Transcripts for Climate Change:  57%|█████▋    | 13/23 [00:08<00:06,  1.44it/s][A
Fetching Transcripts for Climate Change:  61%|██████    | 14/23 [00:09<00:06,  1.38it/s][A
Fetching Transcripts for Climate Change:  65%|██████▌   | 15/23 [00:09<00:05,  1.42it/s][A
Fetching Transcripts for Climate Change:  70%|██████▉   | 16/23 [00:10<00:04,  1.48it/s][A
Fetching Transcripts for Climate Change:  74%|███████▍  | 17/23 [00:11<00:04,  1.49it/s][A
Fetching Transcripts for Climate Change:  78%|███████▊  | 18/23 [00:11<00:03,  1.

✅ Completed fetching for: Climate Change


🔍 Fetching videos for: Sustainable Energy
📌 Found 16 valid videos for Sustainable Energy



Fetching Transcripts for Sustainable Energy:   0%|          | 0/16 [00:00<?, ?it/s][A
Fetching Transcripts for Sustainable Energy:   6%|▋         | 1/16 [00:00<00:08,  1.80it/s][A
Fetching Transcripts for Sustainable Energy:  12%|█▎        | 2/16 [00:01<00:08,  1.67it/s][A
Fetching Transcripts for Sustainable Energy:  19%|█▉        | 3/16 [00:01<00:08,  1.61it/s][A
Fetching Transcripts for Sustainable Energy:  25%|██▌       | 4/16 [00:02<00:07,  1.58it/s][A
Fetching Transcripts for Sustainable Energy:  31%|███▏      | 5/16 [00:03<00:06,  1.62it/s][A
Fetching Transcripts for Sustainable Energy:  38%|███▊      | 6/16 [00:03<00:05,  1.67it/s][A
Fetching Transcripts for Sustainable Energy:  44%|████▍     | 7/16 [00:04<00:05,  1.69it/s][A
Fetching Transcripts for Sustainable Energy:  50%|█████     | 8/16 [00:04<00:04,  1.64it/s][A
Fetching Transcripts for Sustainable Energy:  56%|█████▋    | 9/16 [00:05<00:04,  1.55it/s][A
Fetching Transcripts for Sustainable Energy:  62%|██████▎

✅ Completed fetching for: Sustainable Energy


🔍 Fetching videos for: Wildlife Conservation
📌 Found 6 valid videos for Wildlife Conservation



Fetching Transcripts for Wildlife Conservation:   0%|          | 0/6 [00:00<?, ?it/s][A
Fetching Transcripts for Wildlife Conservation:  17%|█▋        | 1/6 [00:00<00:03,  1.59it/s][A
Fetching Transcripts for Wildlife Conservation:  33%|███▎      | 2/6 [00:01<00:02,  1.78it/s][A
Fetching Transcripts for Wildlife Conservation:  50%|█████     | 3/6 [00:01<00:01,  1.62it/s][A
Fetching Transcripts for Wildlife Conservation:  67%|██████▋   | 4/6 [00:02<00:01,  1.55it/s][A
Fetching Transcripts for Wildlife Conservation:  83%|████████▎ | 5/6 [00:03<00:00,  1.60it/s][A
Fetching Transcripts for Wildlife Conservation: 100%|██████████| 6/6 [00:03<00:00,  1.63it/s]
Fetching Data by Topic:  23%|██▎       | 23/102 [04:18<13:12, 10.03s/it]

✅ Completed fetching for: Wildlife Conservation


🔍 Fetching videos for: Environmental Pollution
📌 Found 8 valid videos for Environmental Pollution



Fetching Transcripts for Environmental Pollution:   0%|          | 0/8 [00:00<?, ?it/s][A
Fetching Transcripts for Environmental Pollution:  12%|█▎        | 1/8 [00:00<00:03,  1.87it/s][A
Fetching Transcripts for Environmental Pollution:  25%|██▌       | 2/8 [00:01<00:03,  1.89it/s][A

⚠️ No transcript found for video B1SdFjna7zQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=B1SdFjna7zQ! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (B1SdFjna7zQ) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("


Fetching Transcripts for Environmental Pollution:  38%|███▊      | 3/8 [00:01<00:02,  1.83it/s][A
Fetching Transcripts for Environmental Pollution:  50%|█████     | 4/8 [00:02<00:01,  2.03it/s][A
Fetching Transcripts for Environmental Pollution:  62%|██████▎   | 5/8 [00:02<00:01,  1.81it/s][A
Fetching Transcripts for Environmental Pollution:  75%|███████▌  | 6/8 [00:03<00:01,  1.54it/s][A
Fetching Transcripts for Environmental Pollution:  88%|████████▊ | 7/8 [00:04<00:00,  1.58it/s][A
Fetching Transcripts for Environmental Pollution: 100%|██████████| 8/8 [00:05<00:00,  1.51it/s]
Fetching Data by Topic:  24%|██▎       | 24/102 [04:26<11:53,  9.15s/it]

✅ Completed fetching for: Environmental Pollution


🔍 Fetching videos for: Ocean Conservation
📌 Found 34 valid videos for Ocean Conservation



Fetching Transcripts for Ocean Conservation:   0%|          | 0/34 [00:00<?, ?it/s][A
Fetching Transcripts for Ocean Conservation:   3%|▎         | 1/34 [00:00<00:24,  1.33it/s][A
Fetching Transcripts for Ocean Conservation:   6%|▌         | 2/34 [00:01<00:21,  1.50it/s][A
Fetching Transcripts for Ocean Conservation:   9%|▉         | 3/34 [00:02<00:20,  1.50it/s][A
Fetching Transcripts for Ocean Conservation:  12%|█▏        | 4/34 [00:02<00:19,  1.54it/s][A
Fetching Transcripts for Ocean Conservation:  15%|█▍        | 5/34 [00:03<00:20,  1.42it/s][A
Fetching Transcripts for Ocean Conservation:  18%|█▊        | 6/34 [00:04<00:19,  1.46it/s][A
Fetching Transcripts for Ocean Conservation:  21%|██        | 7/34 [00:04<00:17,  1.52it/s][A
Fetching Transcripts for Ocean Conservation:  24%|██▎       | 8/34 [00:05<00:16,  1.61it/s][A
Fetching Transcripts for Ocean Conservation:  26%|██▋       | 9/34 [00:05<00:15,  1.62it/s][A
Fetching Transcripts for Ocean Conservation:  29%|██▉    

✅ Completed fetching for: Ocean Conservation


🔍 Fetching videos for: Deforestation
📌 Found 11 valid videos for Deforestation



Fetching Transcripts for Deforestation:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Deforestation:   9%|▉         | 1/11 [00:00<00:05,  1.69it/s][A
Fetching Transcripts for Deforestation:  18%|█▊        | 2/11 [00:01<00:05,  1.56it/s][A
Fetching Transcripts for Deforestation:  27%|██▋       | 3/11 [00:02<00:05,  1.35it/s][A
Fetching Transcripts for Deforestation:  36%|███▋      | 4/11 [00:02<00:04,  1.41it/s][A
Fetching Transcripts for Deforestation:  45%|████▌     | 5/11 [00:03<00:04,  1.29it/s][A
Fetching Transcripts for Deforestation:  55%|█████▍    | 6/11 [00:04<00:03,  1.43it/s][A
Fetching Transcripts for Deforestation:  64%|██████▎   | 7/11 [00:05<00:02,  1.37it/s][A
Fetching Transcripts for Deforestation:  73%|███████▎  | 8/11 [00:05<00:02,  1.48it/s][A
Fetching Transcripts for Deforestation:  82%|████████▏ | 9/11 [00:06<00:01,  1.49it/s][A
Fetching Transcripts for Deforestation:  91%|█████████ | 10/11 [00:06<00:00,  1.45it/s][A
Fetching Transcr

✅ Completed fetching for: Deforestation


🔍 Fetching videos for: Carbon Footprint Reduction
📌 Found 10 valid videos for Carbon Footprint Reduction



Fetching Transcripts for Carbon Footprint Reduction:   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  10%|█         | 1/10 [00:00<00:05,  1.60it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  20%|██        | 2/10 [00:01<00:04,  1.64it/s][A

⚠️ No transcript found for video bYb7YLsXvzg: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=bYb7YLsXvzg! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (bYb7YLsXvzg) transcripts are available in the following languages:

(MANUALLY CREATED)
 - bg ("Bulgarian")[TRANSLATABLE]
 - en-CA ("English (Canada)")[TRANSLATABLE]
 - it ("Italian")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 -


Fetching Transcripts for Carbon Footprint Reduction:  30%|███       | 3/10 [00:01<00:04,  1.56it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  40%|████      | 4/10 [00:02<00:03,  1.64it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  50%|█████     | 5/10 [00:03<00:03,  1.55it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  60%|██████    | 6/10 [00:03<00:02,  1.64it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  70%|███████   | 7/10 [00:04<00:01,  1.78it/s][A

⚠️ No transcript found for video YbEFJd-fJpQ: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=YbEFJd-fJpQ! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (YbEFJd-fJpQ) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("


Fetching Transcripts for Carbon Footprint Reduction:  80%|████████  | 8/10 [00:04<00:01,  1.79it/s][A
Fetching Transcripts for Carbon Footprint Reduction:  90%|█████████ | 9/10 [00:05<00:00,  1.66it/s][A
Fetching Transcripts for Carbon Footprint Reduction: 100%|██████████| 10/10 [00:05<00:00,  1.67it/s]
Fetching Data by Topic:  26%|██▋       | 27/102 [05:07<13:46, 11.02s/it]

✅ Completed fetching for: Carbon Footprint Reduction


🔍 Fetching videos for: Renewable Energy
📌 Found 27 valid videos for Renewable Energy



Fetching Transcripts for Renewable Energy:   0%|          | 0/27 [00:00<?, ?it/s][A
Fetching Transcripts for Renewable Energy:   4%|▎         | 1/27 [00:00<00:15,  1.63it/s][A
Fetching Transcripts for Renewable Energy:   7%|▋         | 2/27 [00:01<00:14,  1.69it/s][A
Fetching Transcripts for Renewable Energy:  11%|█         | 3/27 [00:01<00:14,  1.66it/s][A
Fetching Transcripts for Renewable Energy:  15%|█▍        | 4/27 [00:02<00:13,  1.66it/s][A
Fetching Transcripts for Renewable Energy:  19%|█▊        | 5/27 [00:02<00:13,  1.68it/s][A
Fetching Transcripts for Renewable Energy:  22%|██▏       | 6/27 [00:03<00:12,  1.70it/s][A
Fetching Transcripts for Renewable Energy:  26%|██▌       | 7/27 [00:04<00:11,  1.69it/s][A
Fetching Transcripts for Renewable Energy:  30%|██▉       | 8/27 [00:04<00:11,  1.63it/s][A
Fetching Transcripts for Renewable Energy:  33%|███▎      | 9/27 [00:05<00:12,  1.47it/s][A
Fetching Transcripts for Renewable Energy:  37%|███▋      | 10/27 [00:06<00:1

✅ Completed fetching for: Renewable Energy


🔍 Fetching videos for: Green Technology
📌 Found 8 valid videos for Green Technology



Fetching Transcripts for Green Technology:   0%|          | 0/8 [00:00<?, ?it/s][A
Fetching Transcripts for Green Technology:  12%|█▎        | 1/8 [00:00<00:04,  1.46it/s][A
Fetching Transcripts for Green Technology:  25%|██▌       | 2/8 [00:01<00:04,  1.25it/s][A
Fetching Transcripts for Green Technology:  38%|███▊      | 3/8 [00:02<00:03,  1.37it/s][A
Fetching Transcripts for Green Technology:  50%|█████     | 4/8 [00:02<00:02,  1.44it/s][A
Fetching Transcripts for Green Technology:  62%|██████▎   | 5/8 [00:03<00:02,  1.41it/s][A
Fetching Transcripts for Green Technology:  75%|███████▌  | 6/8 [00:04<00:01,  1.44it/s][A
Fetching Transcripts for Green Technology:  88%|████████▊ | 7/8 [00:04<00:00,  1.50it/s][A
Fetching Transcripts for Green Technology: 100%|██████████| 8/8 [00:05<00:00,  1.45it/s]
Fetching Data by Topic:  28%|██▊       | 29/102 [05:33<14:05, 11.59s/it]

✅ Completed fetching for: Green Technology


🔍 Fetching videos for: Health and Medicine
📌 Found 20 valid videos for Health and Medicine



Fetching Transcripts for Health and Medicine:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching Transcripts for Health and Medicine:   5%|▌         | 1/20 [00:00<00:15,  1.24it/s][A
Fetching Transcripts for Health and Medicine:  10%|█         | 2/20 [00:01<00:12,  1.42it/s][A
Fetching Transcripts for Health and Medicine:  15%|█▌        | 3/20 [00:02<00:11,  1.47it/s][A
Fetching Transcripts for Health and Medicine:  20%|██        | 4/20 [00:02<00:10,  1.54it/s][A
Fetching Transcripts for Health and Medicine:  25%|██▌       | 5/20 [00:03<00:08,  1.69it/s][A
Fetching Transcripts for Health and Medicine:  30%|███       | 6/20 [00:03<00:09,  1.56it/s][A
Fetching Transcripts for Health and Medicine:  35%|███▌      | 7/20 [00:04<00:08,  1.55it/s][A
Fetching Transcripts for Health and Medicine:  40%|████      | 8/20 [00:05<00:07,  1.55it/s][A
Fetching Transcripts for Health and Medicine:  45%|████▌     | 9/20 [00:05<00:06,  1.60it/s][A
Fetching Transcripts for Health and Medicine:  

✅ Completed fetching for: Health and Medicine


🔍 Fetching videos for: Mental Health
📌 Found 29 valid videos for Mental Health



Fetching Transcripts for Mental Health:   0%|          | 0/29 [00:00<?, ?it/s][A
Fetching Transcripts for Mental Health:   3%|▎         | 1/29 [00:00<00:21,  1.33it/s][A
Fetching Transcripts for Mental Health:   7%|▋         | 2/29 [00:01<00:18,  1.48it/s][A
Fetching Transcripts for Mental Health:  10%|█         | 3/29 [00:02<00:17,  1.51it/s][A
Fetching Transcripts for Mental Health:  14%|█▍        | 4/29 [00:02<00:16,  1.55it/s][A
Fetching Transcripts for Mental Health:  17%|█▋        | 5/29 [00:03<00:15,  1.57it/s][A
Fetching Transcripts for Mental Health:  21%|██        | 6/29 [00:03<00:14,  1.56it/s][A
Fetching Transcripts for Mental Health:  24%|██▍       | 7/29 [00:04<00:14,  1.54it/s][A
Fetching Transcripts for Mental Health:  28%|██▊       | 8/29 [00:05<00:13,  1.55it/s][A
Fetching Transcripts for Mental Health:  31%|███       | 9/29 [00:05<00:12,  1.58it/s][A
Fetching Transcripts for Mental Health:  34%|███▍      | 10/29 [00:06<00:11,  1.64it/s][A

⚠️ No transcript found for video 8x-WxYcSBo8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=8x-WxYcSBo8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (8x-WxYcSBo8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("


Fetching Transcripts for Mental Health:  38%|███▊      | 11/29 [00:07<00:11,  1.58it/s][A
Fetching Transcripts for Mental Health:  41%|████▏     | 12/29 [00:07<00:10,  1.59it/s][A
Fetching Transcripts for Mental Health:  45%|████▍     | 13/29 [00:08<00:10,  1.55it/s][A
Fetching Transcripts for Mental Health:  48%|████▊     | 14/29 [00:09<00:09,  1.51it/s][A
Fetching Transcripts for Mental Health:  52%|█████▏    | 15/29 [00:09<00:09,  1.49it/s][A
Fetching Transcripts for Mental Health:  55%|█████▌    | 16/29 [00:10<00:08,  1.54it/s][A

⚠️ No transcript found for video wOGqlVqyvCM: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=wOGqlVqyvCM! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (wOGqlVqyvCM) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]
 - de ("German")[TRANSLATABLE]
 - ko ("Korean")[TRANSLATABLE]
 - pt-BR ("Portuguese (Brazil)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Ha


Fetching Transcripts for Mental Health:  59%|█████▊    | 17/29 [00:11<00:07,  1.54it/s][A
Fetching Transcripts for Mental Health:  62%|██████▏   | 18/29 [00:11<00:07,  1.51it/s][A
Fetching Transcripts for Mental Health:  66%|██████▌   | 19/29 [00:12<00:06,  1.51it/s][A
Fetching Transcripts for Mental Health:  69%|██████▉   | 20/29 [00:13<00:06,  1.49it/s][A
Fetching Transcripts for Mental Health:  72%|███████▏  | 21/29 [00:13<00:04,  1.62it/s][A
Fetching Transcripts for Mental Health:  76%|███████▌  | 22/29 [00:14<00:04,  1.71it/s][A

⚠️ No transcript found for video -fQ50a-m92Y: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=-fQ50a-m92Y! This is most likely caused by:

Subtitles are disabled for this video

If you are sure that the described cause is not responsible for this error and that a transcript should be retrievable, please create an issue at https://github.com/jdepoix/youtube-transcript-api/issues. Please add which version of youtube_transcript_api you are using and provide the information needed to replicate the error. Also make sure that there are no open issues which already describe your problem!



Fetching Transcripts for Mental Health:  79%|███████▉  | 23/29 [00:14<00:03,  1.58it/s][A
Fetching Transcripts for Mental Health:  83%|████████▎ | 24/29 [00:15<00:03,  1.61it/s][A
Fetching Transcripts for Mental Health:  86%|████████▌ | 25/29 [00:15<00:02,  1.63it/s][A
Fetching Transcripts for Mental Health:  90%|████████▉ | 26/29 [00:16<00:01,  1.58it/s][A
Fetching Transcripts for Mental Health:  93%|█████████▎| 27/29 [00:17<00:01,  1.57it/s][A
Fetching Transcripts for Mental Health:  97%|█████████▋| 28/29 [00:18<00:00,  1.52it/s][A
Fetching Transcripts for Mental Health: 100%|██████████| 29/29 [00:18<00:00,  1.55it/s]
Fetching Data by Topic:  30%|███       | 31/102 [06:08<17:36, 14.88s/it]

✅ Completed fetching for: Mental Health


🔍 Fetching videos for: Nutrition and Diet
📌 Found 11 valid videos for Nutrition and Diet



Fetching Transcripts for Nutrition and Diet:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Nutrition and Diet:   9%|▉         | 1/11 [00:00<00:07,  1.39it/s][A
Fetching Transcripts for Nutrition and Diet:  18%|█▊        | 2/11 [00:01<00:06,  1.34it/s][A
Fetching Transcripts for Nutrition and Diet:  27%|██▋       | 3/11 [00:02<00:05,  1.45it/s][A
Fetching Transcripts for Nutrition and Diet:  36%|███▋      | 4/11 [00:02<00:04,  1.46it/s][A
Fetching Transcripts for Nutrition and Diet:  45%|████▌     | 5/11 [00:03<00:03,  1.59it/s][A
Fetching Transcripts for Nutrition and Diet:  55%|█████▍    | 6/11 [00:04<00:03,  1.43it/s][A
Fetching Transcripts for Nutrition and Diet:  64%|██████▎   | 7/11 [00:04<00:02,  1.45it/s][A
Fetching Transcripts for Nutrition and Diet:  73%|███████▎  | 8/11 [00:05<00:01,  1.54it/s][A
Fetching Transcripts for Nutrition and Diet:  82%|████████▏ | 9/11 [00:05<00:01,  1.56it/s][A
Fetching Transcripts for Nutrition and Diet:  91%|███████

✅ Completed fetching for: Nutrition and Diet


🔍 Fetching videos for: Epidemiology
📌 Found 18 valid videos for Epidemiology



Fetching Transcripts for Epidemiology:   0%|          | 0/18 [00:00<?, ?it/s][A
Fetching Transcripts for Epidemiology:   6%|▌         | 1/18 [00:00<00:10,  1.57it/s][A
Fetching Transcripts for Epidemiology:  11%|█         | 2/18 [00:01<00:08,  1.81it/s][A
Fetching Transcripts for Epidemiology:  17%|█▋        | 3/18 [00:01<00:09,  1.67it/s][A
Fetching Transcripts for Epidemiology:  22%|██▏       | 4/18 [00:02<00:08,  1.71it/s][A
Fetching Transcripts for Epidemiology:  28%|██▊       | 5/18 [00:03<00:08,  1.62it/s][A
Fetching Transcripts for Epidemiology:  33%|███▎      | 6/18 [00:03<00:07,  1.71it/s][A
Fetching Transcripts for Epidemiology:  39%|███▉      | 7/18 [00:04<00:06,  1.65it/s][A
Fetching Transcripts for Epidemiology:  44%|████▍     | 8/18 [00:04<00:06,  1.55it/s][A
Fetching Transcripts for Epidemiology:  50%|█████     | 9/18 [00:05<00:05,  1.60it/s][A
Fetching Transcripts for Epidemiology:  56%|█████▌    | 10/18 [00:05<00:04,  1.75it/s][A
Fetching Transcripts for Ep

✅ Completed fetching for: Epidemiology


🔍 Fetching videos for: Public Health & Healthcare Systems
📌 Found 19 valid videos for Public Health & Healthcare Systems



Fetching Transcripts for Public Health & Healthcare Systems:   0%|          | 0/19 [00:00<?, ?it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:   5%|▌         | 1/19 [00:00<00:13,  1.30it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  11%|█         | 2/19 [00:01<00:12,  1.39it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  16%|█▌        | 3/19 [00:02<00:12,  1.31it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  21%|██        | 4/19 [00:02<00:10,  1.41it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  26%|██▋       | 5/19 [00:03<00:10,  1.36it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  32%|███▏      | 6/19 [00:04<00:09,  1.37it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  37%|███▋      | 7/19 [00:05<00:08,  1.34it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  42%|████▏     | 8/19 [00:05<00:07,  1.42it/s][A
Fetching

⚠️ No transcript found for video uB29PPTxV6Y: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=uB29PPTxV6Y! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (uB29PPTxV6Y) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Public Health & Healthcare Systems:  63%|██████▎   | 12/19 [00:08<00:04,  1.54it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  68%|██████▊   | 13/19 [00:09<00:04,  1.49it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  74%|███████▎  | 14/19 [00:09<00:03,  1.53it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  79%|███████▉  | 15/19 [00:10<00:02,  1.52it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  84%|████████▍ | 16/19 [00:11<00:02,  1.42it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  89%|████████▉ | 17/19 [00:11<00:01,  1.37it/s][A
Fetching Transcripts for Public Health & Healthcare Systems:  95%|█████████▍| 18/19 [00:12<00:00,  1.47it/s][A
Fetching Transcripts for Public Health & Healthcare Systems: 100%|██████████| 19/19 [00:13<00:00,  1.44it/s]
Fetching Data by Topic:  33%|███▎      | 34/102 [06:45<15:18, 13.51s/it]

✅ Completed fetching for: Public Health & Healthcare Systems


🔍 Fetching videos for: Alternative Medicine
📌 Found 16 valid videos for Alternative Medicine



Fetching Transcripts for Alternative Medicine:   0%|          | 0/16 [00:00<?, ?it/s][A
Fetching Transcripts for Alternative Medicine:   6%|▋         | 1/16 [00:00<00:06,  2.28it/s][A
Fetching Transcripts for Alternative Medicine:  12%|█▎        | 2/16 [00:01<00:08,  1.73it/s][A
Fetching Transcripts for Alternative Medicine:  19%|█▉        | 3/16 [00:01<00:08,  1.49it/s][A
Fetching Transcripts for Alternative Medicine:  25%|██▌       | 4/16 [00:02<00:08,  1.48it/s][A
Fetching Transcripts for Alternative Medicine:  31%|███▏      | 5/16 [00:03<00:07,  1.55it/s][A
Fetching Transcripts for Alternative Medicine:  38%|███▊      | 6/16 [00:03<00:06,  1.59it/s][A
Fetching Transcripts for Alternative Medicine:  44%|████▍     | 7/16 [00:04<00:06,  1.43it/s][A
Fetching Transcripts for Alternative Medicine:  50%|█████     | 8/16 [00:05<00:05,  1.40it/s][A
Fetching Transcripts for Alternative Medicine:  56%|█████▋    | 9/16 [00:06<00:04,  1.40it/s][A
Fetching Transcripts for Alternative 

✅ Completed fetching for: Alternative Medicine


🔍 Fetching videos for: Neuroscience
📌 Found 30 valid videos for Neuroscience



Fetching Transcripts for Neuroscience:   0%|          | 0/30 [00:00<?, ?it/s][A
Fetching Transcripts for Neuroscience:   3%|▎         | 1/30 [00:00<00:20,  1.44it/s][A
Fetching Transcripts for Neuroscience:   7%|▋         | 2/30 [00:01<00:18,  1.52it/s][A
Fetching Transcripts for Neuroscience:  10%|█         | 3/30 [00:01<00:17,  1.52it/s][A
Fetching Transcripts for Neuroscience:  13%|█▎        | 4/30 [00:02<00:16,  1.57it/s][A
Fetching Transcripts for Neuroscience:  17%|█▋        | 5/30 [00:03<00:15,  1.60it/s][A
Fetching Transcripts for Neuroscience:  20%|██        | 6/30 [00:03<00:15,  1.58it/s][A
Fetching Transcripts for Neuroscience:  23%|██▎       | 7/30 [00:04<00:13,  1.65it/s][A
Fetching Transcripts for Neuroscience:  27%|██▋       | 8/30 [00:05<00:14,  1.50it/s][A
Fetching Transcripts for Neuroscience:  30%|███       | 9/30 [00:05<00:14,  1.47it/s][A
Fetching Transcripts for Neuroscience:  33%|███▎      | 10/30 [00:06<00:13,  1.51it/s][A
Fetching Transcripts for Ne

✅ Completed fetching for: Neuroscience


🔍 Fetching videos for: Biomedical Engineering
📌 Found 9 valid videos for Biomedical Engineering



Fetching Transcripts for Biomedical Engineering:   0%|          | 0/9 [00:00<?, ?it/s][A
Fetching Transcripts for Biomedical Engineering:  11%|█         | 1/9 [00:00<00:04,  1.66it/s][A
Fetching Transcripts for Biomedical Engineering:  22%|██▏       | 2/9 [00:01<00:04,  1.55it/s][A
Fetching Transcripts for Biomedical Engineering:  33%|███▎      | 3/9 [00:01<00:03,  1.56it/s][A
Fetching Transcripts for Biomedical Engineering:  44%|████▍     | 4/9 [00:02<00:03,  1.50it/s][A
Fetching Transcripts for Biomedical Engineering:  56%|█████▌    | 5/9 [00:03<00:02,  1.49it/s][A
Fetching Transcripts for Biomedical Engineering:  67%|██████▋   | 6/9 [00:03<00:02,  1.50it/s][A
Fetching Transcripts for Biomedical Engineering:  78%|███████▊  | 7/9 [00:04<00:01,  1.52it/s][A
Fetching Transcripts for Biomedical Engineering:  89%|████████▉ | 8/9 [00:05<00:00,  1.60it/s][A
Fetching Transcripts for Biomedical Engineering: 100%|██████████| 9/9 [00:05<00:00,  1.57it/s]
Fetching Data by Topic:  36%|█

✅ Completed fetching for: Biomedical Engineering


🔍 Fetching videos for: Longevity & Aging
📌 Found 17 valid videos for Longevity & Aging



Fetching Transcripts for Longevity & Aging:   0%|          | 0/17 [00:00<?, ?it/s][A
Fetching Transcripts for Longevity & Aging:   6%|▌         | 1/17 [00:00<00:11,  1.36it/s][A
Fetching Transcripts for Longevity & Aging:  12%|█▏        | 2/17 [00:01<00:11,  1.34it/s][A
Fetching Transcripts for Longevity & Aging:  18%|█▊        | 3/17 [00:02<00:12,  1.15it/s][A
Fetching Transcripts for Longevity & Aging:  24%|██▎       | 4/17 [00:03<00:10,  1.20it/s][A
Fetching Transcripts for Longevity & Aging:  29%|██▉       | 5/17 [00:03<00:09,  1.30it/s][A
Fetching Transcripts for Longevity & Aging:  35%|███▌      | 6/17 [00:04<00:07,  1.43it/s][A
Fetching Transcripts for Longevity & Aging:  41%|████      | 7/17 [00:05<00:06,  1.46it/s][A
Fetching Transcripts for Longevity & Aging:  47%|████▋     | 8/17 [00:05<00:06,  1.46it/s][A
Fetching Transcripts for Longevity & Aging:  53%|█████▎    | 9/17 [00:06<00:05,  1.42it/s][A
Fetching Transcripts for Longevity & Aging:  59%|█████▉    | 10/17 

⚠️ No transcript found for video cLZEEOZlTzo: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=cLZEEOZlTzo! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (cLZEEOZlTzo) transcripts are available in the following languages:

(MANUALLY CREATED)
 - zh-TW ("Chinese (Taiwan)")[TRANSLATABLE]
 - el ("Greek")[TRANSLATABLE]
 - iw ("Hebrew")[TRANSLATABLE]
 - ja ("Japanese")[TRANSLATABLE]
 - es ("Spanish")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)"


Fetching Transcripts for Longevity & Aging:  71%|███████   | 12/17 [00:08<00:03,  1.52it/s][A
Fetching Transcripts for Longevity & Aging:  76%|███████▋  | 13/17 [00:09<00:02,  1.55it/s][A
Fetching Transcripts for Longevity & Aging:  82%|████████▏ | 14/17 [00:09<00:02,  1.44it/s][A
Fetching Transcripts for Longevity & Aging:  88%|████████▊ | 15/17 [00:10<00:01,  1.41it/s][A
Fetching Transcripts for Longevity & Aging:  94%|█████████▍| 16/17 [00:11<00:00,  1.42it/s][A
Fetching Transcripts for Longevity & Aging: 100%|██████████| 17/17 [00:12<00:00,  1.41it/s]
Fetching Data by Topic:  37%|███▋      | 38/102 [07:41<14:24, 13.51s/it]

✅ Completed fetching for: Longevity & Aging


🔍 Fetching videos for: Genetic Engineering
📌 Found 24 valid videos for Genetic Engineering



Fetching Transcripts for Genetic Engineering:   0%|          | 0/24 [00:00<?, ?it/s][A
Fetching Transcripts for Genetic Engineering:   4%|▍         | 1/24 [00:00<00:14,  1.54it/s][A
Fetching Transcripts for Genetic Engineering:   8%|▊         | 2/24 [00:01<00:14,  1.49it/s][A
Fetching Transcripts for Genetic Engineering:  12%|█▎        | 3/24 [00:01<00:13,  1.54it/s][A
Fetching Transcripts for Genetic Engineering:  17%|█▋        | 4/24 [00:02<00:12,  1.61it/s][A
Fetching Transcripts for Genetic Engineering:  21%|██        | 5/24 [00:03<00:12,  1.55it/s][A
Fetching Transcripts for Genetic Engineering:  25%|██▌       | 6/24 [00:03<00:10,  1.64it/s][A
Fetching Transcripts for Genetic Engineering:  29%|██▉       | 7/24 [00:04<00:10,  1.59it/s][A
Fetching Transcripts for Genetic Engineering:  33%|███▎      | 8/24 [00:05<00:10,  1.57it/s][A
Fetching Transcripts for Genetic Engineering:  38%|███▊      | 9/24 [00:05<00:09,  1.61it/s][A
Fetching Transcripts for Genetic Engineering:  

✅ Completed fetching for: Genetic Engineering


🔍 Fetching videos for: Pharmaceutical Innovations
📌 Found 4 valid videos for Pharmaceutical Innovations



Fetching Transcripts for Pharmaceutical Innovations:   0%|          | 0/4 [00:00<?, ?it/s][A
Fetching Transcripts for Pharmaceutical Innovations:  25%|██▌       | 1/4 [00:00<00:01,  1.68it/s][A
Fetching Transcripts for Pharmaceutical Innovations:  50%|█████     | 2/4 [00:01<00:01,  1.62it/s][A
Fetching Transcripts for Pharmaceutical Innovations:  75%|███████▌  | 3/4 [00:01<00:00,  1.55it/s][A
Fetching Transcripts for Pharmaceutical Innovations: 100%|██████████| 4/4 [00:02<00:00,  1.56it/s]
Fetching Data by Topic:  39%|███▉      | 40/102 [08:02<11:55, 11.54s/it]

✅ Completed fetching for: Pharmaceutical Innovations


🔍 Fetching videos for: Education
📌 Found 16 valid videos for Education



Fetching Transcripts for Education:   0%|          | 0/16 [00:00<?, ?it/s][A
Fetching Transcripts for Education:   6%|▋         | 1/16 [00:00<00:10,  1.50it/s][A
Fetching Transcripts for Education:  12%|█▎        | 2/16 [00:01<00:08,  1.57it/s][A
Fetching Transcripts for Education:  19%|█▉        | 3/16 [00:02<00:08,  1.46it/s][A
Fetching Transcripts for Education:  25%|██▌       | 4/16 [00:02<00:07,  1.53it/s][A
Fetching Transcripts for Education:  31%|███▏      | 5/16 [00:03<00:07,  1.52it/s][A
Fetching Transcripts for Education:  38%|███▊      | 6/16 [00:03<00:06,  1.61it/s][A
Fetching Transcripts for Education:  44%|████▍     | 7/16 [00:04<00:05,  1.53it/s][A
Fetching Transcripts for Education:  50%|█████     | 8/16 [00:05<00:05,  1.59it/s][A
Fetching Transcripts for Education:  56%|█████▋    | 9/16 [00:05<00:04,  1.59it/s][A
Fetching Transcripts for Education:  62%|██████▎   | 10/16 [00:06<00:03,  1.50it/s][A
Fetching Transcripts for Education:  69%|██████▉   | 11/16 [

✅ Completed fetching for: Education


🔍 Fetching videos for: Learning Strategies
📌 Found 19 valid videos for Learning Strategies



Fetching Transcripts for Learning Strategies:   0%|          | 0/19 [00:00<?, ?it/s][A
Fetching Transcripts for Learning Strategies:   5%|▌         | 1/19 [00:00<00:10,  1.74it/s][A
Fetching Transcripts for Learning Strategies:  11%|█         | 2/19 [00:01<00:10,  1.69it/s][A
Fetching Transcripts for Learning Strategies:  16%|█▌        | 3/19 [00:01<00:09,  1.71it/s][A
Fetching Transcripts for Learning Strategies:  21%|██        | 4/19 [00:02<00:08,  1.71it/s][A
Fetching Transcripts for Learning Strategies:  26%|██▋       | 5/19 [00:02<00:08,  1.70it/s][A
Fetching Transcripts for Learning Strategies:  32%|███▏      | 6/19 [00:03<00:07,  1.69it/s][A
Fetching Transcripts for Learning Strategies:  37%|███▋      | 7/19 [00:04<00:07,  1.68it/s][A
Fetching Transcripts for Learning Strategies:  42%|████▏     | 8/19 [00:04<00:06,  1.59it/s][A
Fetching Transcripts for Learning Strategies:  47%|████▋     | 9/19 [00:05<00:06,  1.60it/s][A
Fetching Transcripts for Learning Strategies:  

✅ Completed fetching for: Learning Strategies


🔍 Fetching videos for: Career Development
📌 Found 14 valid videos for Career Development



Fetching Transcripts for Career Development:   0%|          | 0/14 [00:00<?, ?it/s][A
Fetching Transcripts for Career Development:   7%|▋         | 1/14 [00:00<00:08,  1.62it/s][A
Fetching Transcripts for Career Development:  14%|█▍        | 2/14 [00:01<00:08,  1.41it/s][A
Fetching Transcripts for Career Development:  21%|██▏       | 3/14 [00:02<00:07,  1.44it/s][A
Fetching Transcripts for Career Development:  29%|██▊       | 4/14 [00:02<00:06,  1.59it/s][A
Fetching Transcripts for Career Development:  36%|███▌      | 5/14 [00:03<00:05,  1.52it/s][A
Fetching Transcripts for Career Development:  43%|████▎     | 6/14 [00:04<00:05,  1.49it/s][A
Fetching Transcripts for Career Development:  50%|█████     | 7/14 [00:04<00:04,  1.42it/s][A
Fetching Transcripts for Career Development:  57%|█████▋    | 8/14 [00:05<00:04,  1.48it/s][A
Fetching Transcripts for Career Development:  64%|██████▍   | 9/14 [00:06<00:03,  1.47it/s][A
Fetching Transcripts for Career Development:  71%|███████

✅ Completed fetching for: Career Development


🔍 Fetching videos for: Soft Skills
📌 Found 15 valid videos for Soft Skills



Fetching Transcripts for Soft Skills:   0%|          | 0/15 [00:00<?, ?it/s][A
Fetching Transcripts for Soft Skills:   7%|▋         | 1/15 [00:00<00:08,  1.56it/s][A
Fetching Transcripts for Soft Skills:  13%|█▎        | 2/15 [00:01<00:07,  1.76it/s][A
Fetching Transcripts for Soft Skills:  20%|██        | 3/15 [00:01<00:07,  1.69it/s][A
Fetching Transcripts for Soft Skills:  27%|██▋       | 4/15 [00:02<00:06,  1.72it/s][A
Fetching Transcripts for Soft Skills:  33%|███▎      | 5/15 [00:02<00:05,  1.88it/s][A
Fetching Transcripts for Soft Skills:  40%|████      | 6/15 [00:03<00:05,  1.68it/s][A
Fetching Transcripts for Soft Skills:  47%|████▋     | 7/15 [00:04<00:05,  1.57it/s][A
Fetching Transcripts for Soft Skills:  53%|█████▎    | 8/15 [00:04<00:03,  1.79it/s][A
Fetching Transcripts for Soft Skills:  60%|██████    | 9/15 [00:05<00:03,  1.54it/s][A
Fetching Transcripts for Soft Skills:  67%|██████▋   | 10/15 [00:06<00:03,  1.53it/s][A
Fetching Transcripts for Soft Skills: 

⚠️ No transcript found for video w6QbsjvCk1s: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=w6QbsjvCk1s! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (w6QbsjvCk1s) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for Soft Skills:  93%|█████████▎| 14/15 [00:08<00:00,  1.73it/s][A
Fetching Transcripts for Soft Skills: 100%|██████████| 15/15 [00:08<00:00,  1.72it/s]
Fetching Data by Topic:  43%|████▎     | 44/102 [08:50<11:13, 11.62s/it]

✅ Completed fetching for: Soft Skills


🔍 Fetching videos for: EdTech (Education Technology)
📌 Found 10 valid videos for EdTech (Education Technology)



Fetching Transcripts for EdTech (Education Technology):   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for EdTech (Education Technology):  10%|█         | 1/10 [00:00<00:05,  1.63it/s][A
Fetching Transcripts for EdTech (Education Technology):  20%|██        | 2/10 [00:01<00:04,  1.60it/s][A

⚠️ No transcript found for video PN1StZzlmbU: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=PN1StZzlmbU! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (PN1StZzlmbU) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("D


Fetching Transcripts for EdTech (Education Technology):  30%|███       | 3/10 [00:01<00:04,  1.57it/s][A
Fetching Transcripts for EdTech (Education Technology):  40%|████      | 4/10 [00:02<00:03,  1.59it/s][A
Fetching Transcripts for EdTech (Education Technology):  50%|█████     | 5/10 [00:03<00:02,  1.71it/s][A
Fetching Transcripts for EdTech (Education Technology):  60%|██████    | 6/10 [00:03<00:02,  1.65it/s][A
Fetching Transcripts for EdTech (Education Technology):  70%|███████   | 7/10 [00:04<00:01,  1.57it/s][A
Fetching Transcripts for EdTech (Education Technology):  80%|████████  | 8/10 [00:05<00:01,  1.53it/s][A
Fetching Transcripts for EdTech (Education Technology):  90%|█████████ | 9/10 [00:05<00:00,  1.57it/s][A
Fetching Transcripts for EdTech (Education Technology): 100%|██████████| 10/10 [00:06<00:00,  1.61it/s]
Fetching Data by Topic:  44%|████▍     | 45/102 [08:58<10:00, 10.53s/it]

⚠️ No transcript found for video yz0mbQjwmVI: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=yz0mbQjwmVI! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (yz0mbQjwmVI) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-US ("English (United States)")[TRANSLATABLE]

(GENERATED)
 - ro ("Romanian (auto-generated)")[TRANSLATABLE]

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian


Fetching Transcripts for Job Market Trends:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Job Market Trends:   9%|▉         | 1/11 [00:00<00:06,  1.49it/s][A
Fetching Transcripts for Job Market Trends:  18%|█▊        | 2/11 [00:01<00:07,  1.24it/s][A
Fetching Transcripts for Job Market Trends:  27%|██▋       | 3/11 [00:02<00:05,  1.37it/s][A
Fetching Transcripts for Job Market Trends:  36%|███▋      | 4/11 [00:02<00:05,  1.39it/s][A
Fetching Transcripts for Job Market Trends:  45%|████▌     | 5/11 [00:03<00:04,  1.35it/s][A
Fetching Transcripts for Job Market Trends:  55%|█████▍    | 6/11 [00:04<00:03,  1.36it/s][A
Fetching Transcripts for Job Market Trends:  64%|██████▎   | 7/11 [00:05<00:02,  1.39it/s][A
Fetching Transcripts for Job Market Trends:  73%|███████▎  | 8/11 [00:05<00:02,  1.45it/s][A
Fetching Transcripts for Job Market Trends:  82%|████████▏ | 9/11 [00:06<00:01,  1.40it/s][A
Fetching Transcripts for Job Market Trends:  91%|█████████ | 10/11 

✅ Completed fetching for: Job Market Trends


🔍 Fetching videos for: Online Learning Platforms
📌 Found 11 valid videos for Online Learning Platforms



Fetching Transcripts for Online Learning Platforms:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Online Learning Platforms:   9%|▉         | 1/11 [00:00<00:06,  1.51it/s][A
Fetching Transcripts for Online Learning Platforms:  18%|█▊        | 2/11 [00:01<00:05,  1.51it/s][A
Fetching Transcripts for Online Learning Platforms:  27%|██▋       | 3/11 [00:02<00:05,  1.42it/s][A
Fetching Transcripts for Online Learning Platforms:  36%|███▋      | 4/11 [00:02<00:04,  1.51it/s][A
Fetching Transcripts for Online Learning Platforms:  45%|████▌     | 5/11 [00:03<00:04,  1.48it/s][A
Fetching Transcripts for Online Learning Platforms:  55%|█████▍    | 6/11 [00:04<00:03,  1.49it/s][A
Fetching Transcripts for Online Learning Platforms:  64%|██████▎   | 7/11 [00:04<00:03,  1.33it/s][A
Fetching Transcripts for Online Learning Platforms:  73%|███████▎  | 8/11 [00:05<00:02,  1.39it/s][A
Fetching Transcripts for Online Learning Platforms:  82%|████████▏ | 9/11 [00:06<00:01,  

✅ Completed fetching for: Online Learning Platforms


🔍 Fetching videos for: Higher Education Trends
📌 Found 9 valid videos for Higher Education Trends



Fetching Transcripts for Higher Education Trends:   0%|          | 0/9 [00:00<?, ?it/s][A
Fetching Transcripts for Higher Education Trends:  11%|█         | 1/9 [00:00<00:05,  1.46it/s][A
Fetching Transcripts for Higher Education Trends:  22%|██▏       | 2/9 [00:01<00:04,  1.54it/s][A
Fetching Transcripts for Higher Education Trends:  33%|███▎      | 3/9 [00:01<00:03,  1.52it/s][A
Fetching Transcripts for Higher Education Trends:  44%|████▍     | 4/9 [00:02<00:03,  1.58it/s][A
Fetching Transcripts for Higher Education Trends:  56%|█████▌    | 5/9 [00:03<00:02,  1.49it/s][A
Fetching Transcripts for Higher Education Trends:  67%|██████▋   | 6/9 [00:04<00:02,  1.44it/s][A
Fetching Transcripts for Higher Education Trends:  78%|███████▊  | 7/9 [00:04<00:01,  1.49it/s][A
Fetching Transcripts for Higher Education Trends:  89%|████████▉ | 8/9 [00:05<00:00,  1.45it/s][A
Fetching Transcripts for Higher Education Trends: 100%|██████████| 9/9 [00:06<00:00,  1.45it/s]
Fetching Data by Top

✅ Completed fetching for: Higher Education Trends


🔍 Fetching videos for: Interview Preparation
📌 Found 13 valid videos for Interview Preparation



Fetching Transcripts for Interview Preparation:   0%|          | 0/13 [00:00<?, ?it/s][A
Fetching Transcripts for Interview Preparation:   8%|▊         | 1/13 [00:00<00:07,  1.54it/s][A
Fetching Transcripts for Interview Preparation:  15%|█▌        | 2/13 [00:01<00:06,  1.58it/s][A
Fetching Transcripts for Interview Preparation:  23%|██▎       | 3/13 [00:01<00:06,  1.63it/s][A
Fetching Transcripts for Interview Preparation:  31%|███       | 4/13 [00:02<00:05,  1.53it/s][A
Fetching Transcripts for Interview Preparation:  38%|███▊      | 5/13 [00:03<00:05,  1.54it/s][A
Fetching Transcripts for Interview Preparation:  46%|████▌     | 6/13 [00:03<00:04,  1.52it/s][A
Fetching Transcripts for Interview Preparation:  54%|█████▍    | 7/13 [00:04<00:03,  1.66it/s][A
Fetching Transcripts for Interview Preparation:  62%|██████▏   | 8/13 [00:05<00:03,  1.64it/s][A
Fetching Transcripts for Interview Preparation:  69%|██████▉   | 9/13 [00:05<00:02,  1.52it/s][A
Fetching Transcripts for In

✅ Completed fetching for: Interview Preparation


🔍 Fetching videos for: Remote Work Culture
📌 Found 10 valid videos for Remote Work Culture



Fetching Transcripts for Remote Work Culture:   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for Remote Work Culture:  10%|█         | 1/10 [00:00<00:06,  1.31it/s][A
Fetching Transcripts for Remote Work Culture:  20%|██        | 2/10 [00:01<00:05,  1.51it/s][A
Fetching Transcripts for Remote Work Culture:  30%|███       | 3/10 [00:02<00:04,  1.48it/s][A
Fetching Transcripts for Remote Work Culture:  40%|████      | 4/10 [00:02<00:03,  1.56it/s][A
Fetching Transcripts for Remote Work Culture:  50%|█████     | 5/10 [00:03<00:03,  1.66it/s][A
Fetching Transcripts for Remote Work Culture:  60%|██████    | 6/10 [00:03<00:02,  1.67it/s][A
Fetching Transcripts for Remote Work Culture:  70%|███████   | 7/10 [00:04<00:01,  1.55it/s][A
Fetching Transcripts for Remote Work Culture:  80%|████████  | 8/10 [00:05<00:01,  1.60it/s][A
Fetching Transcripts for Remote Work Culture:  90%|█████████ | 9/10 [00:05<00:00,  1.57it/s][A
Fetching Transcripts for Remote Work Culture: 1

✅ Completed fetching for: Remote Work Culture


🔍 Fetching videos for: Freelancing & Gig Economy
📌 Found 10 valid videos for Freelancing & Gig Economy



Fetching Transcripts for Freelancing & Gig Economy:   0%|          | 0/10 [00:00<?, ?it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  10%|█         | 1/10 [00:00<00:05,  1.76it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  20%|██        | 2/10 [00:01<00:04,  1.63it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  30%|███       | 3/10 [00:01<00:04,  1.64it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  40%|████      | 4/10 [00:02<00:03,  1.60it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  50%|█████     | 5/10 [00:03<00:03,  1.50it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  60%|██████    | 6/10 [00:03<00:02,  1.55it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  70%|███████   | 7/10 [00:04<00:01,  1.62it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  80%|████████  | 8/10 [00:05<00:01,  1.58it/s][A
Fetching Transcripts for Freelancing & Gig Economy:  90%|█████████ | 9/10 [00:05<00:00,  

✅ Completed fetching for: Freelancing & Gig Economy


🔍 Fetching videos for: Finance
📌 Found 11 valid videos for Finance



Fetching Transcripts for Finance:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Finance:   9%|▉         | 1/11 [00:00<00:05,  1.85it/s][A
Fetching Transcripts for Finance:  18%|█▊        | 2/11 [00:01<00:05,  1.62it/s][A
Fetching Transcripts for Finance:  27%|██▋       | 3/11 [00:01<00:04,  1.66it/s][A
Fetching Transcripts for Finance:  36%|███▋      | 4/11 [00:02<00:04,  1.67it/s][A
Fetching Transcripts for Finance:  45%|████▌     | 5/11 [00:02<00:03,  1.70it/s][A
Fetching Transcripts for Finance:  55%|█████▍    | 6/11 [00:03<00:02,  1.76it/s][A
Fetching Transcripts for Finance:  64%|██████▎   | 7/11 [00:04<00:02,  1.74it/s][A
Fetching Transcripts for Finance:  73%|███████▎  | 8/11 [00:04<00:01,  1.67it/s][A
Fetching Transcripts for Finance:  82%|████████▏ | 9/11 [00:05<00:01,  1.58it/s][A
Fetching Transcripts for Finance:  91%|█████████ | 10/11 [00:06<00:00,  1.59it/s][A
Fetching Transcripts for Finance: 100%|██████████| 11/11 [00:06<00:00,  1.66it/s]


✅ Completed fetching for: Finance


🔍 Fetching videos for: Stock Market
📌 Found 16 valid videos for Stock Market



Fetching Transcripts for Stock Market:   0%|          | 0/16 [00:00<?, ?it/s][A
Fetching Transcripts for Stock Market:   6%|▋         | 1/16 [00:00<00:10,  1.44it/s][A
Fetching Transcripts for Stock Market:  12%|█▎        | 2/16 [00:01<00:10,  1.30it/s][A
Fetching Transcripts for Stock Market:  19%|█▉        | 3/16 [00:02<00:09,  1.32it/s][A
Fetching Transcripts for Stock Market:  25%|██▌       | 4/16 [00:02<00:08,  1.38it/s][A
Fetching Transcripts for Stock Market:  31%|███▏      | 5/16 [00:03<00:07,  1.43it/s][A
Fetching Transcripts for Stock Market:  38%|███▊      | 6/16 [00:04<00:07,  1.42it/s][A
Fetching Transcripts for Stock Market:  44%|████▍     | 7/16 [00:04<00:05,  1.53it/s][A
Fetching Transcripts for Stock Market:  50%|█████     | 8/16 [00:05<00:05,  1.53it/s][A

⚠️ No transcript found for video ZCFkWDdmXG8: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=ZCFkWDdmXG8! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (ZCFkWDdmXG8) transcripts are available in the following languages:

(MANUALLY CREATED)
 - ar ("Arabic")[TRANSLATABLE]
 - nl-NL ("Dutch (Netherlands)")[TRANSLATABLE]
 - en-US ("English (United States)")[TRANSLATABLE]
 - fr ("French")[TRANSLATABLE]
 - de ("German")[TRANSLATABLE]
 - iw ("Hebrew")[TRANSLATABLE]
 - id ("Indonesian")[TRANSLATABLE]
 - it ("Italian")[TRANSLATABLE]
 - ja ("Japanese")[TRANSLATABLE]
 - ko ("Korean")[TRANSLATABLE]
 - pl ("Polish")[TRANSLATABLE]
 - pt-BR ("Portuguese (Brazil)")[TRANSLATABLE]
 - ro ("Romanian")[TRANSLATABLE]
 - es ("Spanish")[TRANSLATABLE]
 - sv ("Swedish")[TRANSLATABLE]
 - th ("Thai")[TRANSLATABLE]
 - tr ("Turkish")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")


Fetching Transcripts for Stock Market:  56%|█████▋    | 9/16 [00:06<00:05,  1.37it/s][A
Fetching Transcripts for Stock Market:  62%|██████▎   | 10/16 [00:06<00:04,  1.46it/s][A
Fetching Transcripts for Stock Market:  69%|██████▉   | 11/16 [00:07<00:03,  1.49it/s][A
Fetching Transcripts for Stock Market:  75%|███████▌  | 12/16 [00:08<00:02,  1.50it/s][A
Fetching Transcripts for Stock Market:  81%|████████▏ | 13/16 [00:08<00:01,  1.59it/s][A
Fetching Transcripts for Stock Market:  88%|████████▊ | 14/16 [00:09<00:01,  1.64it/s][A
Fetching Transcripts for Stock Market:  94%|█████████▍| 15/16 [00:10<00:00,  1.63it/s][A
Fetching Transcripts for Stock Market: 100%|██████████| 16/16 [00:10<00:00,  1.50it/s]
Fetching Data by Topic:  52%|█████▏    | 53/102 [10:13<08:02,  9.84s/it]

✅ Completed fetching for: Stock Market


🔍 Fetching videos for: Cryptocurrency
📌 Found 4 valid videos for Cryptocurrency



Fetching Transcripts for Cryptocurrency:   0%|          | 0/4 [00:00<?, ?it/s][A
Fetching Transcripts for Cryptocurrency:  25%|██▌       | 1/4 [00:00<00:01,  1.66it/s][A
Fetching Transcripts for Cryptocurrency:  50%|█████     | 2/4 [00:01<00:01,  1.42it/s][A
Fetching Transcripts for Cryptocurrency:  75%|███████▌  | 3/4 [00:02<00:00,  1.48it/s][A
Fetching Transcripts for Cryptocurrency: 100%|██████████| 4/4 [00:02<00:00,  1.54it/s]
Fetching Data by Topic:  53%|█████▎    | 54/102 [10:17<06:32,  8.17s/it]

✅ Completed fetching for: Cryptocurrency


🔍 Fetching videos for: Entrepreneurship
📌 Found 5 valid videos for Entrepreneurship



Fetching Transcripts for Entrepreneurship:   0%|          | 0/5 [00:00<?, ?it/s][A
Fetching Transcripts for Entrepreneurship:  20%|██        | 1/5 [00:00<00:02,  1.77it/s][A
Fetching Transcripts for Entrepreneurship:  40%|████      | 2/5 [00:01<00:01,  1.75it/s][A
Fetching Transcripts for Entrepreneurship:  60%|██████    | 3/5 [00:01<00:01,  1.69it/s][A
Fetching Transcripts for Entrepreneurship:  80%|████████  | 4/5 [00:02<00:00,  1.64it/s][A
Fetching Transcripts for Entrepreneurship: 100%|██████████| 5/5 [00:03<00:00,  1.63it/s]
Fetching Data by Topic:  54%|█████▍    | 55/102 [10:22<05:34,  7.13s/it]

✅ Completed fetching for: Entrepreneurship


🔍 Fetching videos for: E-Commerce & Digital Marketing
📌 Found 20 valid videos for E-Commerce & Digital Marketing



Fetching Transcripts for E-Commerce & Digital Marketing:   0%|          | 0/20 [00:00<?, ?it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:   5%|▌         | 1/20 [00:00<00:12,  1.58it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  10%|█         | 2/20 [00:01<00:12,  1.47it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  15%|█▌        | 3/20 [00:01<00:09,  1.71it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  20%|██        | 4/20 [00:02<00:08,  1.78it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  25%|██▌       | 5/20 [00:03<00:09,  1.62it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  30%|███       | 6/20 [00:03<00:08,  1.61it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  35%|███▌      | 7/20 [00:04<00:08,  1.53it/s][A
Fetching Transcripts for E-Commerce & Digital Marketing:  40%|████      | 8/20 [00:05<00:07,  1.51it/s][A
Fetching Transcripts for E-Commerce & Digita

✅ Completed fetching for: E-Commerce & Digital Marketing


🔍 Fetching videos for: Personal Finance
📌 Found 8 valid videos for Personal Finance



Fetching Transcripts for Personal Finance:   0%|          | 0/8 [00:00<?, ?it/s][A
Fetching Transcripts for Personal Finance:  12%|█▎        | 1/8 [00:00<00:03,  1.78it/s][A
Fetching Transcripts for Personal Finance:  25%|██▌       | 2/8 [00:01<00:03,  1.79it/s][A
Fetching Transcripts for Personal Finance:  38%|███▊      | 3/8 [00:01<00:02,  1.82it/s][A
Fetching Transcripts for Personal Finance:  50%|█████     | 4/8 [00:02<00:02,  1.80it/s][A
Fetching Transcripts for Personal Finance:  62%|██████▎   | 5/8 [00:02<00:01,  1.64it/s][A
Fetching Transcripts for Personal Finance:  75%|███████▌  | 6/8 [00:03<00:01,  1.59it/s][A
Fetching Transcripts for Personal Finance:  88%|████████▊ | 7/8 [00:04<00:00,  1.57it/s][A
Fetching Transcripts for Personal Finance: 100%|██████████| 8/8 [00:04<00:00,  1.62it/s]
Fetching Data by Topic:  56%|█████▌    | 57/102 [10:43<06:25,  8.57s/it]

✅ Completed fetching for: Personal Finance


🔍 Fetching videos for: Investment Strategies
📌 Found 5 valid videos for Investment Strategies



Fetching Transcripts for Investment Strategies:   0%|          | 0/5 [00:00<?, ?it/s][A
Fetching Transcripts for Investment Strategies:  20%|██        | 1/5 [00:00<00:02,  1.61it/s][A
Fetching Transcripts for Investment Strategies:  40%|████      | 2/5 [00:01<00:02,  1.47it/s][A
Fetching Transcripts for Investment Strategies:  60%|██████    | 3/5 [00:01<00:01,  1.50it/s][A
Fetching Transcripts for Investment Strategies:  80%|████████  | 4/5 [00:02<00:00,  1.56it/s][A
Fetching Transcripts for Investment Strategies: 100%|██████████| 5/5 [00:03<00:00,  1.36it/s]
Fetching Data by Topic:  57%|█████▋    | 58/102 [10:49<05:36,  7.65s/it]

✅ Completed fetching for: Investment Strategies


🔍 Fetching videos for: Real Estate Market
📌 Found 9 valid videos for Real Estate Market



Fetching Transcripts for Real Estate Market:   0%|          | 0/9 [00:00<?, ?it/s][A
Fetching Transcripts for Real Estate Market:  11%|█         | 1/9 [00:00<00:04,  1.62it/s][A
Fetching Transcripts for Real Estate Market:  22%|██▏       | 2/9 [00:01<00:04,  1.61it/s][A
Fetching Transcripts for Real Estate Market:  33%|███▎      | 3/9 [00:01<00:03,  1.55it/s][A
Fetching Transcripts for Real Estate Market:  44%|████▍     | 4/9 [00:02<00:03,  1.48it/s][A
Fetching Transcripts for Real Estate Market:  56%|█████▌    | 5/9 [00:03<00:02,  1.47it/s][A
Fetching Transcripts for Real Estate Market:  67%|██████▋   | 6/9 [00:04<00:02,  1.42it/s][A
Fetching Transcripts for Real Estate Market:  78%|███████▊  | 7/9 [00:04<00:01,  1.44it/s][A
Fetching Transcripts for Real Estate Market:  89%|████████▉ | 8/9 [00:05<00:00,  1.43it/s][A
Fetching Transcripts for Real Estate Market: 100%|██████████| 9/9 [00:06<00:00,  1.43it/s]
Fetching Data by Topic:  58%|█████▊    | 59/102 [10:57<05:31,  7.72s/i

✅ Completed fetching for: Real Estate Market


🔍 Fetching videos for: Economic Trends
📌 Found 15 valid videos for Economic Trends



Fetching Transcripts for Economic Trends:   0%|          | 0/15 [00:00<?, ?it/s][A
Fetching Transcripts for Economic Trends:   7%|▋         | 1/15 [00:00<00:08,  1.62it/s][A
Fetching Transcripts for Economic Trends:  13%|█▎        | 2/15 [00:01<00:09,  1.44it/s][A
Fetching Transcripts for Economic Trends:  20%|██        | 3/15 [00:02<00:08,  1.38it/s][A
Fetching Transcripts for Economic Trends:  27%|██▋       | 4/15 [00:02<00:07,  1.48it/s][A
Fetching Transcripts for Economic Trends:  33%|███▎      | 5/15 [00:03<00:06,  1.52it/s][A
Fetching Transcripts for Economic Trends:  40%|████      | 6/15 [00:04<00:06,  1.46it/s][A
Fetching Transcripts for Economic Trends:  47%|████▋     | 7/15 [00:04<00:05,  1.48it/s][A
Fetching Transcripts for Economic Trends:  53%|█████▎    | 8/15 [00:05<00:04,  1.48it/s][A
Fetching Transcripts for Economic Trends:  60%|██████    | 9/15 [00:06<00:03,  1.51it/s][A
Fetching Transcripts for Economic Trends:  67%|██████▋   | 10/15 [00:06<00:03,  1.46it/

✅ Completed fetching for: Economic Trends


🔍 Fetching videos for: Wealth Management
📌 Found 11 valid videos for Wealth Management



Fetching Transcripts for Wealth Management:   0%|          | 0/11 [00:00<?, ?it/s][A
Fetching Transcripts for Wealth Management:   9%|▉         | 1/11 [00:00<00:05,  1.83it/s][A
Fetching Transcripts for Wealth Management:  18%|█▊        | 2/11 [00:01<00:05,  1.67it/s][A
Fetching Transcripts for Wealth Management:  27%|██▋       | 3/11 [00:01<00:04,  1.68it/s][A
Fetching Transcripts for Wealth Management:  36%|███▋      | 4/11 [00:02<00:04,  1.69it/s][A
Fetching Transcripts for Wealth Management:  45%|████▌     | 5/11 [00:03<00:03,  1.57it/s][A
Fetching Transcripts for Wealth Management:  55%|█████▍    | 6/11 [00:03<00:03,  1.61it/s][A
Fetching Transcripts for Wealth Management:  64%|██████▎   | 7/11 [00:04<00:02,  1.56it/s][A
Fetching Transcripts for Wealth Management:  73%|███████▎  | 8/11 [00:05<00:01,  1.54it/s][A
Fetching Transcripts for Wealth Management:  82%|████████▏ | 9/11 [00:05<00:01,  1.50it/s][A
Fetching Transcripts for Wealth Management:  91%|█████████ | 10/11 

✅ Completed fetching for: Wealth Management


🔍 Fetching videos for: Startups & Venture Capital
📌 Found 19 valid videos for Startups & Venture Capital



Fetching Transcripts for Startups & Venture Capital:   0%|          | 0/19 [00:00<?, ?it/s][A
Fetching Transcripts for Startups & Venture Capital:   5%|▌         | 1/19 [00:00<00:12,  1.45it/s][A
Fetching Transcripts for Startups & Venture Capital:  11%|█         | 2/19 [00:01<00:10,  1.64it/s][A
Fetching Transcripts for Startups & Venture Capital:  16%|█▌        | 3/19 [00:01<00:08,  1.80it/s][A
Fetching Transcripts for Startups & Venture Capital:  21%|██        | 4/19 [00:02<00:09,  1.52it/s][A
Fetching Transcripts for Startups & Venture Capital:  26%|██▋       | 5/19 [00:03<00:08,  1.67it/s][A
Fetching Transcripts for Startups & Venture Capital:  32%|███▏      | 6/19 [00:03<00:08,  1.53it/s][A
Fetching Transcripts for Startups & Venture Capital:  37%|███▋      | 7/19 [00:04<00:07,  1.52it/s][A
Fetching Transcripts for Startups & Venture Capital:  42%|████▏     | 8/19 [00:05<00:07,  1.48it/s][A
Fetching Transcripts for Startups & Venture Capital:  47%|████▋     | 9/19 [00:0

✅ Completed fetching for: Startups & Venture Capital


🔍 Fetching videos for: History
📌 Found 6 valid videos for History



Fetching Transcripts for History:   0%|          | 0/6 [00:00<?, ?it/s][A
Fetching Transcripts for History:  17%|█▋        | 1/6 [00:00<00:03,  1.34it/s][A
Fetching Transcripts for History:  33%|███▎      | 2/6 [00:01<00:03,  1.28it/s][A
Fetching Transcripts for History:  50%|█████     | 3/6 [00:02<00:02,  1.36it/s][A
Fetching Transcripts for History:  67%|██████▋   | 4/6 [00:02<00:01,  1.36it/s][A
Fetching Transcripts for History:  83%|████████▎ | 5/6 [00:03<00:00,  1.40it/s][A
Fetching Transcripts for History: 100%|██████████| 6/6 [00:04<00:00,  1.41it/s]
Fetching Data by Topic:  62%|██████▏   | 63/102 [11:37<05:50,  8.98s/it]

✅ Completed fetching for: History


🔍 Fetching videos for: Geopolitics
📌 Found 5 valid videos for Geopolitics



Fetching Transcripts for Geopolitics:   0%|          | 0/5 [00:00<?, ?it/s][A
Fetching Transcripts for Geopolitics:  20%|██        | 1/5 [00:00<00:02,  1.58it/s][A

⚠️ No transcript found for video zZJFozFsnIU: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=zZJFozFsnIU! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (zZJFozFsnIU) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("


Fetching Transcripts for Geopolitics:  40%|████      | 2/5 [00:01<00:01,  1.58it/s][A
Fetching Transcripts for Geopolitics:  60%|██████    | 3/5 [00:01<00:01,  1.60it/s][A
Fetching Transcripts for Geopolitics:  80%|████████  | 4/5 [00:02<00:00,  1.50it/s][A
Fetching Transcripts for Geopolitics: 100%|██████████| 5/5 [00:03<00:00,  1.56it/s]
Fetching Data by Topic:  63%|██████▎   | 64/102 [11:42<04:54,  7.74s/it]

✅ Completed fetching for: Geopolitics


🔍 Fetching videos for: Psychology
📌 Found 23 valid videos for Psychology



Fetching Transcripts for Psychology:   0%|          | 0/23 [00:00<?, ?it/s][A
Fetching Transcripts for Psychology:   4%|▍         | 1/23 [00:00<00:15,  1.44it/s][A
Fetching Transcripts for Psychology:   9%|▊         | 2/23 [00:01<00:14,  1.44it/s][A
Fetching Transcripts for Psychology:  13%|█▎        | 3/23 [00:02<00:13,  1.51it/s][A
Fetching Transcripts for Psychology:  17%|█▋        | 4/23 [00:02<00:12,  1.58it/s][A
Fetching Transcripts for Psychology:  22%|██▏       | 5/23 [00:03<00:11,  1.53it/s][A

⚠️ No transcript found for video jLWpuLH3t6k: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=jLWpuLH3t6k! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (jLWpuLH3t6k) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]
 - es ("Spanish")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("C


Fetching Transcripts for Psychology:  26%|██▌       | 6/23 [00:03<00:11,  1.54it/s][A
Fetching Transcripts for Psychology:  30%|███       | 7/23 [00:04<00:11,  1.40it/s][A
Fetching Transcripts for Psychology:  35%|███▍      | 8/23 [00:05<00:10,  1.41it/s][A
Fetching Transcripts for Psychology:  39%|███▉      | 9/23 [00:06<00:09,  1.47it/s][A
Fetching Transcripts for Psychology:  43%|████▎     | 10/23 [00:06<00:08,  1.47it/s][A
Fetching Transcripts for Psychology:  48%|████▊     | 11/23 [00:07<00:07,  1.56it/s][A
Fetching Transcripts for Psychology:  52%|█████▏    | 12/23 [00:08<00:07,  1.53it/s][A

⚠️ No transcript found for video a05QDMbR2hA: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=a05QDMbR2hA! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (a05QDMbR2hA) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("Czech")
 - da ("Danish")
 - dv ("


Fetching Transcripts for Psychology:  57%|█████▋    | 13/23 [00:08<00:06,  1.53it/s][A

⚠️ No transcript found for video gtQGfFj8iTY: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=gtQGfFj8iTY! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (gtQGfFj8iTY) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]
 - es ("Spanish")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("C


Fetching Transcripts for Psychology:  61%|██████    | 14/23 [00:09<00:05,  1.58it/s][A
Fetching Transcripts for Psychology:  65%|██████▌   | 15/23 [00:09<00:05,  1.59it/s][A
Fetching Transcripts for Psychology:  70%|██████▉   | 16/23 [00:10<00:04,  1.63it/s][A
Fetching Transcripts for Psychology:  74%|███████▍  | 17/23 [00:11<00:04,  1.44it/s][A
Fetching Transcripts for Psychology:  78%|███████▊  | 18/23 [00:11<00:03,  1.49it/s][A
Fetching Transcripts for Psychology:  83%|████████▎ | 19/23 [00:12<00:02,  1.51it/s][A
Fetching Transcripts for Psychology:  87%|████████▋ | 20/23 [00:13<00:01,  1.55it/s][A
Fetching Transcripts for Psychology:  91%|█████████▏| 21/23 [00:13<00:01,  1.50it/s][A

⚠️ No transcript found for video N1qPhlGE86M: 
Could not retrieve a transcript for the video https://www.youtube.com/watch?v=N1qPhlGE86M! This is most likely caused by:

No transcripts were found for any of the requested language codes: ('en',)

For this video (N1qPhlGE86M) transcripts are available in the following languages:

(MANUALLY CREATED)
 - en-GB ("English (United Kingdom)")[TRANSLATABLE]
 - es ("Spanish")[TRANSLATABLE]

(GENERATED)
None

(TRANSLATION LANGUAGES)
 - ab ("Abkhazian")
 - aa ("Afar")
 - af ("Afrikaans")
 - ak ("Akan")
 - sq ("Albanian")
 - am ("Amharic")
 - ar ("Arabic")
 - hy ("Armenian")
 - as ("Assamese")
 - ay ("Aymara")
 - az ("Azerbaijani")
 - bn ("Bangla")
 - ba ("Bashkir")
 - eu ("Basque")
 - be ("Belarusian")
 - bho ("Bhojpuri")
 - bs ("Bosnian")
 - br ("Breton")
 - bg ("Bulgarian")
 - my ("Burmese")
 - ca ("Catalan")
 - ceb ("Cebuano")
 - zh-Hans ("Chinese (Simplified)")
 - zh-Hant ("Chinese (Traditional)")
 - co ("Corsican")
 - hr ("Croatian")
 - cs ("C


Fetching Transcripts for Psychology:  96%|█████████▌| 22/23 [00:14<00:00,  1.48it/s][A
Fetching Transcripts for Psychology: 100%|██████████| 23/23 [00:15<00:00,  1.51it/s]
Fetching Data by Topic:  64%|██████▎   | 65/102 [11:59<06:28, 10.49s/it]

✅ Completed fetching for: Psychology


🔍 Fetching videos for: Philosophy
📌 Found 33 valid videos for Philosophy



Fetching Transcripts for Philosophy:   0%|          | 0/33 [00:00<?, ?it/s][A
Fetching Transcripts for Philosophy:   3%|▎         | 1/33 [00:00<00:25,  1.23it/s][A
Fetching Transcripts for Philosophy:   6%|▌         | 2/33 [00:01<00:25,  1.20it/s][A
Fetching Transcripts for Philosophy:   9%|▉         | 3/33 [00:02<00:21,  1.39it/s][A
Fetching Transcripts for Philosophy:  12%|█▏        | 4/33 [00:02<00:19,  1.48it/s][A
Fetching Transcripts for Philosophy:  15%|█▌        | 5/33 [00:03<00:17,  1.61it/s][A
Fetching Transcripts for Philosophy:  18%|█▊        | 6/33 [00:03<00:16,  1.64it/s][A
Fetching Transcripts for Philosophy:  21%|██        | 7/33 [00:04<00:15,  1.63it/s][A
Fetching Transcripts for Philosophy:  24%|██▍       | 8/33 [00:05<00:15,  1.59it/s][A
Fetching Transcripts for Philosophy:  27%|██▋       | 9/33 [00:05<00:14,  1.60it/s][A
Fetching Transcripts for Philosophy:  30%|███       | 10/33 [00:06<00:14,  1.59it/s][A
Fetching Transcripts for Philosophy:  33%|███▎   

✅ Completed fetching for: Philosophy


🔍 Fetching videos for: Sociology
⚠️ Error fetching video IDs for Sociology: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Sociology&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
📌 Found 0 valid videos for Sociology
⚠️ Skipping Sociology due to no valid videos.

🔍 Fetching videos for: Cultural Anthropology
⚠️ Error fetching video IDs for Cultural Anthropology: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Cultural+Anthropology&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json re



⚠️ Error fetching video IDs for Artificial Intelligence in Society: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Artificial+Intelligence+in+Society&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
📌 Found 0 valid videos for Artificial Intelligence in Society
⚠️ Skipping Artificial Intelligence in Society due to no valid videos.

🔍 Fetching videos for: Movies and TV Shows
⚠️ Error fetching video IDs for Movies and TV Shows: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Movies+and+TV+Shows&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnr



⚠️ Error fetching video IDs for Esports & Competitive Gaming: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Esports+%26+Competitive+Gaming&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
📌 Found 0 valid videos for Esports & Competitive Gaming
⚠️ Skipping Esports & Competitive Gaming due to no valid videos.

🔍 Fetching videos for: Self-Improvement
⚠️ Error fetching video IDs for Self-Improvement: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Self-Improvement&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "T

Fetching Data by Topic: 100%|██████████| 102/102 [12:24<00:00,  7.30s/it]


⚠️ Error fetching video IDs for Personal Branding: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Personal+Branding&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">
📌 Found 0 valid videos for Personal Branding
⚠️ Skipping Personal Branding due to no valid videos.

🔍 Fetching videos for: Future of Work
⚠️ Error fetching video IDs for Future of Work: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/search?q=Future+of+Work&part=id&type=video&maxResults=50&key=AIzaSyBGs5GCNqpn5HTFiU5ODPBnrWRwn0LUjW8&alt=json returned "The request cannot be completed because you have exce

Counting no.of videos per topic

In [None]:
import json
from collections import defaultdict

# Load dataset from JSON file
with open("/content/drive/MyDrive/Youtube_Large_Transcripts.json", "r", encoding="utf-8") as f:
    dataset = json.load(f)

# Count videos per topic
topic_counts = defaultdict(int)

for entry in dataset:
    topic_counts[entry["Topic"]] += 1

# Print results
print("📊 Video Counts per Topic:")
for topic, count in topic_counts.items():
    print(f"{topic}: {count} videos")


📊 Video Counts per Topic:
Artificial Intelligence: 24 videos
Machine Learning: 9 videos
Deep Learning: 19 videos
Computer Vision: 16 videos
Natural Language Processing: 12 videos
Quantum Computing: 20 videos
Cybersecurity: 15 videos
Cloud Computing: 6 videos
Edge Computing: 12 videos
Internet of Things (IoT): 8 videos
5G Technology: 9 videos
Augmented Reality (AR): 9 videos
Virtual Reality (VR): 6 videos
Space Exploration: 17 videos
Astronomy: 11 videos
Robotics: 10 videos
Blockchain Technology: 12 videos
Biotechnology: 12 videos
Genetics: 21 videos
Bioinformatics: 4 videos
Climate Change: 22 videos
Sustainable Energy: 16 videos
Wildlife Conservation: 6 videos
Environmental Pollution: 7 videos
Ocean Conservation: 34 videos
Deforestation: 11 videos
Carbon Footprint Reduction: 8 videos
Renewable Energy: 27 videos
Green Technology: 8 videos
Health and Medicine: 20 videos
Mental Health: 26 videos
Nutrition and Diet: 11 videos
Epidemiology: 18 videos
Public Health & Healthcare Systems: 18 v

In [None]:
!pip install youtube-search-python youtube-transcript-api




In [None]:
!pip install transformers torch jsonlines nltk


Collecting jsonlines
  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecti

###**Youtube Transcript Preprocessing and Summarisation using diffrent models**

### **BART (Bidirectional and Auto-Regressive Transformers)**  
🔹 **Architecture:**  
- Encoder-Decoder Transformer model.  
- Uses a bidirectional encoder (like BERT) and an autoregressive decoder (like GPT).  

🔹 **Strengths:**  
- Handles noisy text well, good for abstractive summarization.  
- Recovers missing words and corrects sentence corruption.  

🔹 **Used for:**  
- General text summarization, paraphrasing, text generation, and question answering.  



### **Pegasus (Pre-training with Extracted Gap-Sentences for Abstractive Summarization)**  
🔹 **Architecture:**  
- Encoder-Decoder Transformer similar to T5.  
- Pretrained with "Gap Sentence Generation" (GSG) - key sentences from documents are masked, and the model learns to reconstruct them.  

🔹 **Strengths:**  
- Optimized for summarizing long documents.  
- Excels in low-data settings, effectively extracts key sentences.  

🔹 **Used for:**  
- News, research papers, article summarization.  


### **T5 (Text-to-Text Transfer Transformer)**  
🔹 **Architecture:**  
- Fully encoder-decoder Transformer.  
- Converts all NLP tasks into a text-to-text format (input text → output text).  
- Uses a multi-task pretraining approach.  

🔹 **Strengths:**  
- Flexible for multiple NLP tasks, good at rephrasing.  
- Handles various text-to-text transformations.  

🔹 **Used for:**  
- Summarization, translation, question answering, text classification.

In [None]:
import json
import re
import nltk
import torch
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer

nltk.download("punkt")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load JSON data
def load_json(file_path):
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        with open(file_path, "r", encoding="utf-8") as f:
            return json.load(f)
    return []

# Preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"[^a-zA-Z0-9.,!?]", " ", text)
    return text.strip()

# Summarization function
def summarize(text, model, tokenizer, max_length=120, min_length=40):
    if len(text.split()) < 10:
        return "Text too short for summarization."

    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt",
                              max_length=300, truncation=True).to(device)

    with torch.no_grad():
        summary_ids = model.generate(
            inputs, max_length=max_length, min_length=min_length,
            length_penalty=2.0, num_beams=4, early_stopping=True
        )

    torch.cuda.empty_cache()  # Clear GPU cache
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Load model function
from transformers import AutoTokenizer
def load_model(model_class, tokenizer_name):
    print(f"Loading {tokenizer_name}...")
    model = model_class.from_pretrained(tokenizer_name).to(device)
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_name, legacy=False)
    return model, tokenizer

# Clear GPU cache before loading
torch.cuda.empty_cache()

# Load models
bart_model, bart_tokenizer = load_model(BartForConditionalGeneration, "facebook/bart-base")
pegasus_model, pegasus_tokenizer = load_model(PegasusForConditionalGeneration, "google/pegasus-newsroom")
t5_model, t5_tokenizer = load_model(T5ForConditionalGeneration, "t5-small")

# Process transcripts and summarize
def process_and_summarize(input_path, output_path):
    torch.cuda.empty_cache()

    data = load_json(input_path)
    summarized_data = load_json(output_path)  # Load existing data for resuming

    processed_videos = {entry["Video_ID"] for entry in summarized_data}  # Track processed videos

    total_videos = len(data)
    print(f"Total videos to process: {total_videos}")

    for idx, entry in enumerate(data):
        video_id = entry.get("Video_ID", "Unknown Video ID")
        topic = entry.get("Topic", "Unknown Topic")
        transcript = entry.get("Transcript", "")

        if video_id in processed_videos:
            continue  # Skip already processed videos

        if not transcript.strip():
            print(f"Skipping empty transcript for Video ID: {video_id}")
            continue

        processed_videos.add(video_id)  # Mark as processed
        cleaned_text = preprocess_text(transcript)

        print(f"Processing {idx+1}/{total_videos}: {video_id} - {topic[:30]}...")

        bart_summary = summarize(cleaned_text, bart_model, bart_tokenizer)
        pegasus_summary = summarize(cleaned_text, pegasus_model, pegasus_tokenizer)
        t5_summary = summarize(cleaned_text, t5_model, t5_tokenizer)

        summarized_entry = {
            "Topic": topic,
            "Video_ID": video_id,
            "Transcript": transcript,
            "BART_Summary": bart_summary,
            "Pegasus_Summary": pegasus_summary,
            "T5_Summary": t5_summary
        }
        summarized_data.append(summarized_entry)

        # **SAVE PROGRESS EVERY 10 ITERATIONS**
        if (idx + 1) % 10 == 0:
            with open(output_path, "w", encoding="utf-8") as f:
                json.dump(summarized_data, f, indent=4)
            print(f"✅ Saved progress at {idx+1}/{total_videos}")

        torch.cuda.empty_cache()  # Clear cache every 10 iterations

    # Final save
    with open(output_path, "w", encoding="utf-8") as f:
        json.dump(summarized_data, f, indent=4)

    print(f"✅ Summarization complete. Results saved to {output_path}")

# Run summarization
input_json = "/content/drive/MyDrive/Youtube_Large_Transcripts.json"
output_json = "/content/drive/MyDrive/summarized_transcripts.json"
process_and_summarize(input_json, output_json)


Using device: cuda
Loading facebook/bart-base...


model.safetensors:  55%|#####4    | 304M/558M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

Loading google/pegasus-newsroom...


config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Error while downloading from https://cdn-lfs.hf.co/google/pegasus-newsroom/87c67691330b04bdc9226247bb257a3a321460790219fd5a4132a434b03c28e9?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27pytorch_model.bin%3B+filename%3D%22pytorch_model.bin%22%3B&response-content-type=application%2Foctet-stream&Expires=1741797987&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MTc5Nzk4N319LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy5oZi5jby9nb29nbGUvcGVnYXN1cy1uZXdzcm9vbS84N2M2NzY5MTMzMGIwNGJkYzkyMjYyNDdiYjI1N2EzYTMyMTQ2MDc5MDIxOWZkNWE0MTMyYTQzNGIwM2MyOGU5P3Jlc3BvbnNlLWNvbnRlbnQtZGlzcG9zaXRpb249KiZyZXNwb25zZS1jb250ZW50LXR5cGU9KiJ9XX0_&Signature=g1CdTs8sqaurMUH3Izq3rv7CGwnpczWO5N42hQBVe6PBqb01ref9vZWMEcZYPmp9MIEuiAFgBi93du-XGBQZ1jlKTtC7Lwrk8f%7EPigbmGoU5NFhCYkPC-WyfWzBO-BeZ3zOmJq9V0-pDukvMxpncglP20XtVG%7E5jTtp0TWvsM5Rk5umylnjdsWWTYNsmS2U0eZoMBHw8MNN65gYwVgNAoQLwONo%7EkqYDi8nhy8hFFbT6N-xuTLdJBm1kQy0ZJZT5HiBk8V1uOTBuu3rRlf8-3jdb%7EXvIQwli7qdkqn4tu%7En1ine

pytorch_model.bin:  44%|####3     | 996M/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-newsroom and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

Loading t5-small...


config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

Total videos to process: 902
Processing 1/902: X_wLVgMzSH4 - Artificial Intelligence...
Processing 2/902: D2JY38VShxI - Artificial Intelligence...
Processing 3/902: Yq0QkCxoTHM - Artificial Intelligence...
Processing 4/902: tFx_UNW9I1U - Artificial Intelligence...
Processing 5/902: Sqa8Zo2XWc4 - Artificial Intelligence...
Processing 6/902: qYNweeDHiyU - Artificial Intelligence...
Processing 7/902: KKNCiRWd_j0 - Artificial Intelligence...
Processing 8/902: JMUxmLyrhSk - Artificial Intelligence...
Processing 9/902: 5NgNicANyqM - Artificial Intelligence...
Processing 10/902: lPvqvt55l3A - Artificial Intelligence...
✅ Saved progress at 10/902
Processing 11/902: JMLsHI8aV0g - Artificial Intelligence...
Processing 12/902: s0dMTAQM4cw - Artificial Intelligence...
Processing 13/902: 64E9O1Gv99o - Artificial Intelligence...
Processing 14/902: fa8k8IQ1_X0 - Artificial Intelligence...
Processing 15/902: Tq6_7XyCyyI - Artificial Intelligence...
Processing 16/902: PQgUHLPqIAA - Artificial Intellige

Using Rouge score and bert score and cosine similarity to evaluate each summary

###  **Technologies and Concepts Used in the Code:**



### **1. Natural Language Processing (NLP)**
- **Sentence-BERT (paraphrase-MiniLM-L6-v2)**  
   - Embeds transcripts and summaries into dense vectors for semantic similarity.  
   - **Use Case:** Used in compute_cosine_similarity() for similarity comparison.  

- **ROUGE (Recall-Oriented Understudy for Gisting Evaluation)**  
   - Measures overlap between generated and reference text using `rouge1`, `rouge2`, and `rougeL` scores.  
   - **Use Case:** Evaluates the quality of summaries using `compute_rouge()`.  

- **BERTScore (`bert-base-uncased`)**  
   - Measures semantic similarity between reference and generated text at the token level.  
   - **Use Case:** Computes BERT-based similarity in `compute_bertscore()`.  



### **2. Text Similarity and Evaluation**
- **Cosine Similarity (`sklearn.metrics.pairwise.cosine_similarity`)**  
   - Measures cosine distance between vector embeddings of reference and generated summaries.  
   - **Use Case:** Used in `compute_cosine_similarity()` to quantify semantic closeness.  

- **Model Selection Logic**  
   - Automatically selects the best summary based on the highest average score.  
   - **Use Case:** `get_best_summary()` determines the best-performing model.  




In [None]:
!pip install rouge-score


Collecting rouge-score
  Downloading rouge_score-0.1.2.tar.gz (17 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: rouge-score
  Building wheel for rouge-score (setup.py) ... [?25l[?25hdone
  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=4a1cacb594a287fef56ac0ea8078143e59fcdfe294a61d6401d78e8b4da9e0ed
  Stored in directory: /root/.cache/pip/wheels/1e/19/43/8a442dc83660ca25e163e1bd1f89919284ab0d0c1475475148
Successfully built rouge-score
Installing collected packages: rouge-score
Successfully installed rouge-score-0.1.2


In [None]:
!pip install bert-score


Collecting bert-score
  Downloading bert_score-0.3.13-py3-none-any.whl.metadata (15 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.0.0->bert-score)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch>=1.0.

In [None]:
import json
import torch
import gc
import warnings
import multiprocessing
from rouge_score import rouge_scorer
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
from bert_score import score as bert_score
from concurrent.futures import ThreadPoolExecutor  # Switched to ThreadPoolExecutor

# Suppress warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Set multiprocessing start method
multiprocessing.set_start_method("spawn", force=True)

# Check device
device = "cuda" if torch.cuda.is_available() else "cpu"

# Load model once to avoid repeated loading
embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2").to(device)

def load_json(file_path):
    """ Load JSON data from file """
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

def compute_rouge(reference, generated):
    """ Compute ROUGE scores """
    scorer = rouge_scorer.RougeScorer(["rouge1", "rouge2", "rougeL"], use_stemmer=True)
    scores = scorer.score(reference, generated)
    return {metric: scores[metric].fmeasure for metric in scores}

def compute_bertscore(reference, generated, model="bert-base-uncased"):
    """ Compute BERTScore """
    try:
        with torch.no_grad():
            P, R, F1 = bert_score([generated], [reference], model_type=model, lang="en", device=device)
        return F1.item()
    except Exception as e:
        print(f"BERTScore error: {e}")
        return 0.0

def compute_cosine_similarity(reference, generated, model):
    """ Compute cosine similarity between sentence embeddings """
    try:
        with torch.no_grad():
            embeddings = model.encode([reference, generated], convert_to_tensor=True).cpu()
        similarity = cosine_similarity(embeddings[0].numpy().reshape(1, -1),
                                       embeddings[1].numpy().reshape(1, -1))
        return similarity[0][0]
    except Exception as e:
        print(f"Cosine Similarity error: {e}")
        return 0.0

def get_best_summary(evaluation):
    """ Identify the best summary based on average scores """
    best_model = None
    best_score = -float("inf")

    for model_name, scores in evaluation.items():
        if model_name not in ["Video_ID", "Topic", "Best Model", "Best Score"]:
            avg_score = sum(scores.values()) / len(scores)
            if avg_score > best_score:
                best_score = avg_score
                best_model = model_name

    return best_model, round(best_score, 4)

def process_entry(entry):
    """ Process a single transcript entry and compute evaluation metrics """
    try:
        transcript = entry["Transcript"]
        summaries = {
            "BART": entry["BART_Summary"],
            "Pegasus": entry["Pegasus_Summary"],
            "T5": entry["T5_Summary"]
        }

        evaluation = {"Video_ID": entry["Video_ID"], "Topic": entry["Topic"]}

        for model_name, summary in summaries.items():
            rouge_scores = compute_rouge(transcript, summary)
            bertscore = compute_bertscore(transcript, summary)
            cosine_sim = compute_cosine_similarity(transcript, summary, embedding_model)

            evaluation[model_name] = {
                "ROUGE-1": round(rouge_scores["rouge1"], 4),
                "ROUGE-2": round(rouge_scores["rouge2"], 4),
                "ROUGE-L": round(rouge_scores["rougeL"], 4),
                "BERTScore": round(bertscore, 4),
                "Cosine Similarity": round(cosine_sim, 4)
            }

        evaluation["Best Model"], evaluation["Best Score"] = get_best_summary(evaluation)

        return evaluation

    except Exception as e:
        print(f"Error processing entry: {e}")
        return None

def evaluate_summaries_parallel(file_path, max_workers=4):
    """ Evaluate summaries using parallel processing """
    data = load_json(file_path)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        results = list(executor.map(process_entry, data))

    return [r for r in results if r is not None]

# Run evaluation
summary_json = "/content/drive/MyDrive/LLM PROJECT/summarized_transcripts.json"
evaluation_results = evaluate_summaries_parallel(summary_json, max_workers=4)

# Save results
evaluation_output = "/content/drive/MyDrive/LLM PROJECT/summarization_evaluation.json"
with open(evaluation_output, "w", encoding="utf-8") as f:
    json.dump(evaluation_results, f, indent=4)

print(f"✅ Evaluation complete. Results saved to {evaluation_output}")


TypeError: Object of type float32 is not JSON serializable

In [None]:
import json
import numpy as np

# Convert all NumPy float32 values to Python float
def convert_to_serializable(obj):
    if isinstance(obj, np.float32):
        return float(obj)
    elif isinstance(obj, dict):
        return {key: convert_to_serializable(value) for key, value in obj.items()}
    elif isinstance(obj, list):
        return [convert_to_serializable(item) for item in obj]
    return obj

evaluation_results = convert_to_serializable(evaluation_results)

# Save to JSON
evaluation_output = "/content/drive/MyDrive/LLM PROJECT/summarization_evaluation.json"
with open(evaluation_output, "w", encoding="utf-8") as f:
    json.dump(evaluation_results, f, indent=4)

print(f"✅ Evaluation complete. Results saved to {evaluation_output}")


✅ Evaluation complete. Results saved to /content/drive/MyDrive/LLM PROJECT/summarization_evaluation.json


In [None]:
import json

# Load evaluation results
evaluation_output = "/content/drive/MyDrive/LLM PROJECT/summarization_evaluation.json"

with open(evaluation_output, "r", encoding="utf-8") as f:
    evaluation_results = json.load(f)


# Count best summaries for each model
best_counts = {"BART": 0, "Pegasus": 0, "T5": 0}

for transcript in evaluation_results:
    best_model = transcript.get("Best Model")  # Extract "Best Model" field
    if best_model in best_counts:
        best_counts[best_model] += 1

# Print results
print(f"Best summary count per model:")
for model, count in best_counts.items():
    print(f"{model}: {count}")


Best summary count per model:
BART: 851
Pegasus: 17
T5: 10


Code to test on a custom transcript

In [None]:
import torch
import re
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"[^a-zA-Z0-9.,!?]", " ", text)
    return text.strip()

# Summarization function
def summarize(text, model, tokenizer, max_length=120, min_length=40):
    if len(text.split()) < 10:
        return "Text too short for summarization."

    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt",
                              max_length=300, truncation=True).to(device)

    with torch.no_grad():
        summary_ids = model.generate(
            inputs, max_length=max_length, min_length=min_length,
            length_penalty=2.0, num_beams=4, early_stopping=True
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Load models
bart_model, bart_tokenizer = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device), BartTokenizer.from_pretrained("facebook/bart-base")
pegasus_model, pegasus_tokenizer = PegasusForConditionalGeneration.from_pretrained("google/pegasus-newsroom").to(device), PegasusTokenizer.from_pretrained("google/pegasus-newsroom")
t5_model, t5_tokenizer = T5ForConditionalGeneration.from_pretrained("t5-small").to(device), T5Tokenizer.from_pretrained("t5-small")

# **INPUT YOUR OWN TRANSCRIPT**
custom_transcript = """
Ladies and gentlemen, today we gather to celebrate the art of storytelling. Stories have been the bedrock of human culture for millennia, shaping our beliefs, traditions, and identities. From ancient myths to modern novels, storytelling transcends time and space, connecting us to our past and guiding us into the future. In this digital age, while the mediums may have evolved, the essence of a compelling narrative remains unchanged. Let us delve into the magic of storytelling and its enduring power to inspire, educate, and unite humanity.
"""

# Preprocess and summarize
cleaned_text = preprocess_text(custom_transcript)
print(f"\n📌 **Testing on Custom Transcript:**\n")
print(f"📝 **Original Transcript (First 500 chars):**\n{custom_transcript[:500]}...\n")

bart_summary = summarize(cleaned_text, bart_model, bart_tokenizer)
pegasus_summary = summarize(cleaned_text, pegasus_model, pegasus_tokenizer)
t5_summary = summarize(cleaned_text, t5_model, t5_tokenizer)

# Display results
print("\n=== 🔍 Generated Summaries ===")
print(f"\n🔹 **BART Summary:**\n{bart_summary}")
print(f"\n🔹 **Pegasus Summary:**\n{pegasus_summary}")
print(f"\n🔹 **T5 Summary:**\n{t5_summary}")


Using device: cpu


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/1.72k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/558M [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.12k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-newsroom and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


generation_config.json:   0%|          | 0.00/280 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/87.0 [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/1.91M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/65.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.28G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/242M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.32k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565



📌 **Testing on Custom Transcript:**

📝 **Original Transcript (First 500 chars):**

Ladies and gentlemen, today we gather to celebrate the art of storytelling. Stories have been the bedrock of human culture for millennia, shaping our beliefs, traditions, and identities. From ancient myths to modern novels, storytelling transcends time and space, connecting us to our past and guiding us into the future. In this digital age, while the mediums may have evolved, the essence of a compelling narrative remains unchanged. Let us delve into the magic of storytelling and its enduring po...


=== 🔍 Generated Summaries ===

🔹 **BART Summary:**
summarize: ladies and gentlemen, today we gather to celebrate the art of storytelling. stories have been the bedrock of human culture for millennia, shaping our beliefs, traditions, and identities. from ancient myths to modern novels, storytelling transcends time and space, connecting us to our past and guiding us into the future. in this digital age, while 

Code to Test on a Random Transcript

In [None]:
import json
import random
import torch
import re
from transformers import BartForConditionalGeneration, BartTokenizer
from transformers import PegasusForConditionalGeneration, PegasusTokenizer
from transformers import T5ForConditionalGeneration, T5Tokenizer

# Set device
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

# Load JSON data
def load_json(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        return json.load(f)

# Preprocess text
def preprocess_text(text):
    text = text.lower()
    text = re.sub(r"\s+", " ", text)
    text = re.sub(r"[^a-zA-Z0-9.,!?]", " ", text)
    return text.strip()

# Summarization function
def summarize(text, model, tokenizer, max_length=120, min_length=40):
    if len(text.split()) < 10:
        return "Text too short for summarization."

    inputs = tokenizer.encode("summarize: " + text, return_tensors="pt",
                              max_length=300, truncation=True).to(device)

    with torch.no_grad():
        summary_ids = model.generate(
            inputs, max_length=max_length, min_length=min_length,
            length_penalty=2.0, num_beams=4, early_stopping=True
        )

    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Load a single model function
def load_model(model_class, tokenizer_name):
    print(f"Loading {tokenizer_name}...")
    model = model_class.from_pretrained(tokenizer_name).to(device)
    tokenizer = tokenizer_class.from_pretrained(tokenizer_name, legacy=False)
    return model, tokenizer

# Load models
bart_model, bart_tokenizer = BartForConditionalGeneration.from_pretrained("facebook/bart-base").to(device), BartTokenizer.from_pretrained("facebook/bart-base")
pegasus_model, pegasus_tokenizer = PegasusForConditionalGeneration.from_pretrained("google/pegasus-newsroom").to(device), PegasusTokenizer.from_pretrained("google/pegasus-newsroom")
t5_model, t5_tokenizer = T5ForConditionalGeneration.from_pretrained("t5-small").to(device), T5Tokenizer.from_pretrained("t5-small")

# Load dataset
input_json = "/content/drive/MyDrive/Youtube_Large_Transcripts.json"
data = load_json(input_json)

# Pick a random transcript
random_entry = random.choice(data)
video_id = random_entry.get("Video_ID", "Unknown Video ID")
topic = random_entry.get("Topic", "Unknown Topic")
transcript = random_entry.get("Transcript", "")

# Preprocess and summarize
cleaned_text = preprocess_text(transcript)
print(f"\n📌 **Testing on Random Transcript:**\nVideo ID: {video_id}\nTopic: {topic}\n")
print(f"📝 **Original Transcript (First 500 chars):**\n{transcript[:500]}...\n")

bart_summary = summarize(cleaned_text, bart_model, bart_tokenizer)
pegasus_summary = summarize(cleaned_text, pegasus_model, pegasus_tokenizer)
t5_summary = summarize(cleaned_text, t5_model, t5_tokenizer)

# Display results
print("\n=== 🔍 Generated Summaries ===")
print(f"\n🔹 **BART Summary:**\n{bart_summary}")
print(f"\n🔹 **Pegasus Summary:**\n{pegasus_summary}")
print(f"\n🔹 **T5 Summary:**\n{t5_summary}")


Using device: cpu


Some weights of PegasusForConditionalGeneration were not initialized from the model checkpoint at google/pegasus-newsroom and are newly initialized: ['model.decoder.embed_positions.weight', 'model.encoder.embed_positions.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.



📌 **Testing on Random Transcript:**
Video ID: WlLekFZYGEA
Topic: Public Health & Healthcare Systems

📝 **Original Transcript (First 500 chars):**
DOLLARS DOLLARS OVER DOLLARS OVER 10 DOLLARS OVER 10 YEARS. DOLLARS OVER 10 YEARS. IN DOLLARS OVER 10 YEARS. IN
MUSKEGON DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS 13 DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS 13 ON DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS 13 ON YOUR DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS 13 ON YOUR
SIDE DOLLARS OVER 10 YEARS. IN
MUSKEGON JOHN MILLS 13 ON YOUR
SIDE HEALTH DOLL...


=== 🔍 Generated Summaries ===

🔹 **BART Summary:**
summarize: dollars dollars over dollars over 10 years. in muskegon john mills 13 on your side health care dollars over 5 years. dollars over 9 years. dollar over 10 year. in dollars over 8 years. $ over 10. in mkegon dollars over 11 years. and in mikegon john mill 13 over your dollars over 7 yea

Multi Video Comparison

In [1]:
!pip install spacy sentence-transformers scikit-learn vaderSentiment



Collecting vaderSentiment
  Downloading vaderSentiment-3.3.2-py2.py3-none-any.whl.metadata (572 bytes)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl

In [2]:
from huggingface_hub import login

# Paste your token here
login("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")


In [5]:
!python -m spacy download en_core_web_sm


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m39.5 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


**Summary of How Each Term is Calculated:**


best_video.json

1. **Learning_Outcome_Score** = Content_Depth_Score × Similarity_Score.  
2. **Similarity_Score** – Cosine similarity between BART, Pegasus, and T5 summaries.  
3. **Common_Points & Unique_Insights** – Extracted using SpaCy NER (top 5 & last 5 entities).  
4. **Sentiment_Score** – VADER compound score of the transcript.  
5. **Content_Depth_Score** – Word count of transcript ÷ 100.  
6. **Suggested_Improvements** – Keyword-based suggestions (examples, algorithms, content depth).  
Based on this the best video for each topic is chosen.


video_comparison.json
1. **Common_Points & Key_Concepts** – Top entities with frequency.  
2. **Real_World_Examples** – Maps `common_points` to a default count.  
3. **Terminology & Unique_Insights** – Less frequent entities with counts.  
4. **Sentiment** – Positive/Negative based on sentiment score.  
5. **Content_Depth_Score** – Same as above, scaled ×10.  
6. **Learning_Outcome_Focus** – Fixed scores (clarity, application, problem-solving).  
7. **Topic_Relevance_Score** – Similarity score ×10.  
8. **Engagement_Metrics** – Simulated values (likes, comments, retention).  
9. **Visual_Content** – Fixed counts of graphs, images, and animations.  
10. **Quality_Rating** – Similarity score ×9.  
11.**Suggested_Improvements** – Keyword-based suggestions (examples, algorithms, content depth).   

###**Terminologies and Concepts Used in the Code:**

###Natural Language Processing (NLP)

SpaCy (en_core_web_sm) – Extracts entities (NER) to identify key concepts and terminology.

Named Entity Recognition (NER) – Identifies entities like names, locations, and concepts to generate Common_Points and Unique_Insights.

###Sentiment Analysis

VADER Sentiment Analysis – Evaluates sentiment polarity of the transcript.

Sentiment Score – Ranges from -1 to +1, classifies as Positive/Negative.

###Text Summarization Models

BART, Pegasus, T5 Summaries – Precomputed summaries are analyzed to check consistency.

Summary Similarity – Uses SentenceTransformer (paraphrase-MiniLM-L6-v2) to calculate cosine similarity between summaries.

###Similarity and Embedding

Sentence Embeddings – Converts text to numerical vectors for comparison.

Cosine Similarity – Measures the degree of similarity between encoded summaries.

###Content Depth and Length Analysis

Content_Depth_Score – Estimated by dividing transcript word count by 100.

Learning_Outcome_Score – Combines content depth with summary similarity to evaluate learning potential.

###Concept Extraction and Real-World Examples

Key Concepts & Terminology – Extracted from transcript entities.

Real_World_Examples – Suggests adding examples when missing.

###Evaluation and Metrics

Engagement Metrics – Simulated likes, comments, and retention rates.

Visual Content – Counts graphs, images, and animations.

Quality Rating – Derived from similarity score to assess video quality.



In [15]:
import json
import spacy
import os
import numpy as np
from concurrent.futures import ThreadPoolExecutor
from sentence_transformers import SentenceTransformer, util
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
import torch

# Clear CUDA memory to prevent crashes
torch.cuda.empty_cache()

#  Load models efficiently
nlp = spacy.load("en_core_web_sm", disable=["parser"])  # Faster NER with transformer
model = SentenceTransformer('paraphrase-MiniLM-L6-v2', device='cuda' if torch.cuda.is_available() else 'cpu')
analyzer = SentimentIntensityAnalyzer()

#  Create empty JSON files if they don't exist
def create_empty_json(file_path):
    if not os.path.exists(file_path):
        with open(file_path, "w", encoding="utf-8") as f:
            json.dump({}, f, indent=4)

#  Load JSON data safely
def load_json(file_path):
    if os.path.exists(file_path) and os.path.getsize(file_path) > 0:
        with open(file_path, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}

#  Save JSON incrementally after every batch
def save_json(data, file_path):
    with open(file_path, "w", encoding="utf-8") as f:
        json.dump(data, f, indent=4)

# Extract key concepts with limited entity extraction
def extract_key_concepts(transcript):
    doc = nlp(transcript[:1000000])  # Limit transcript size for processing
    entities = [ent.text.lower() for ent in doc.ents if len(ent.text) > 2]
    common_points = list(set(entities[:5]))
    unique_insights = list(set(entities[-5:]))
    key_concepts = {entity: entities.count(entity) for entity in common_points}
    terminology = {entity: entities.count(entity) for entity in unique_insights}
    return common_points, unique_insights, key_concepts, terminology

# Calculate similarity with chunked processing
def calculate_similarity(summary_list):
    summary_list = [s for s in summary_list if s.strip()]
    if not summary_list:
        return 0.0
    embeddings = model.encode(summary_list, convert_to_tensor=True, batch_size=4)
    similarity_matrix = util.cos_sim(embeddings, embeddings)
    avg_similarity = similarity_matrix.mean().item()
    return round(avg_similarity, 2)

#  Analyze sentiment and content depth efficiently
def analyze_sentiment(transcript):
    sentiment = analyzer.polarity_scores(transcript[:1000000])  # Avoid exceeding limits
    sentiment_score = round(sentiment['compound'], 2)
    sentiment_label = "Positive" if sentiment_score > 0 else "Negative"
    content_depth_score = round(len(transcript.split()) / 100, 1)
    return sentiment_score, sentiment_label, content_depth_score

#  Suggest improvements
def suggest_improvements(transcript):
    improvements = []
    lower_transcript = transcript.lower()
    if "example" not in lower_transcript:
        improvements.append("Consider adding real-world examples.")
    if "algorithm" not in lower_transcript:
        improvements.append("Clarify concepts using algorithm-specific explanations.")
    if len(transcript.split()) < 500:
        improvements.append("Expand complex topics for better depth.")
    return ", ".join(improvements) or "No major improvements suggested."

# ✅ Process each transcript and generate results
def process_transcript(entry):
    topic = entry.get("Topic", "Unknown Topic")
    video_id = entry.get("Video_ID", "Unknown Video ID")
    transcript = entry.get("Transcript", "").strip()
    summaries = [entry.get("BART_Summary", ""), entry.get("Pegasus_Summary", ""), entry.get("T5_Summary", "")]

    if not transcript:
        return topic, video_id, None

    #  Extract key concepts
    common_points, unique_insights, key_concepts, terminology = extract_key_concepts(transcript)

    #  Calculate summary similarity
    similarity_score = calculate_similarity(summaries)

    # Analyze sentiment and content depth
    sentiment_score, sentiment_label, content_depth_score = analyze_sentiment(transcript)

    #  Suggest improvements
    suggested_improvements = suggest_improvements(transcript)

    # Prepare best_video.json data
    best_video_data = {
        "Video_ID": video_id,
        "Learning_Outcome_Score": round(content_depth_score * similarity_score, 2),
        "Similarity_Score": similarity_score,
        "Common_Points": common_points,
        "Unique_Insights": unique_insights,
        "Sentiment_Score": sentiment_score,
        "Content_Depth_Score": content_depth_score,
        "Suggested_Improvements": suggested_improvements,
    }

    # Prepare video_comparison.json data
    comparison_data = {
        "Common_Points": common_points,
        "Key_Concepts": key_concepts,
        "Real_World_Examples": {point: 1 for point in common_points},
        "Terminology": terminology,
        "Unique_Insights": unique_insights,
        "Sentiment": sentiment_label,
        "Content_Depth_Score": content_depth_score * 10,
        "Learning_Outcome_Focus": {"Explanation_Clarity": 9, "Real_World_Application": 7, "Problem_Solving": 8},
        "Topic_Relevance_Score": round(similarity_score * 10, 1),
        "Engagement_Metrics": {"Likes": 1500, "Comments": 200, "Retention_Rate": 85},
        "Visual_Content": {"Graphs": 3, "Images": 5, "Animations": 2},
        "Quality_Rating": round(similarity_score * 9, 1),
        "Suggested_Improvements": suggested_improvements,
    }

    return topic, video_id, (best_video_data, comparison_data)

# Main function with optimized batch processing
def compute_video_comparison(input_path, best_video_path, comparison_path, batch_size=20):
    #  Create empty files if they don’t exist
    create_empty_json(best_video_path)
    create_empty_json(comparison_path)

    #  Load existing results to avoid re-processing
    best_videos = load_json(best_video_path)
    comparisons = load_json(comparison_path)

    #  Read and process transcripts in batches
    with open(input_path, "r", encoding="utf-8") as f:
        data = json.load(f)

    #  Split data into batches
    for i in range(0, len(data), batch_size):
        batch = data[i:i + batch_size]

        #  Skip already processed videos
        batch_video_ids = [entry.get("Video_ID", "Unknown Video ID") for entry in batch]
        if all(video_id in {vid for topic in best_videos.values() for vid in topic} for video_id in batch_video_ids):
            print(f"✅ Skipping already processed batch {i // batch_size + 1}")
            continue

        # Parallel processing of batches
        with ThreadPoolExecutor(max_workers=8) as executor:
            results = list(executor.map(process_transcript, batch))

        # Update results and save after every batch
        for topic, video_id, result in results:
            if result:
                best_video_data, comparison_data = result
                best_videos.setdefault(topic, {})[video_id] = best_video_data
                comparisons.setdefault(topic, {})[video_id] = comparison_data

        # Save after every batch to avoid data loss
        save_json(best_videos, best_video_path)
        save_json(comparisons, comparison_path)

        print(f"✅ Batch {i // batch_size + 1} processed and saved successfully!")

    print("\n🎉 Execution complete! All batches processed and saved successfully!")

#  File paths
input_json = "/content/drive/MyDrive/LLM PROJECT/summarized_transcripts.json"
output_best_videos = "/content/drive/MyDrive/LLM PROJECT/best_video.json"
output_comparisons = "/content/drive/MyDrive/LLM PROJECT/video_comparison.json"

# Run with optimized batch size and crash-safe mechanism
compute_video_comparison(input_json, output_best_videos, output_comparisons, batch_size=20)


✅ Batch 1 processed and saved successfully!
✅ Batch 2 processed and saved successfully!
✅ Batch 3 processed and saved successfully!
✅ Batch 4 processed and saved successfully!
✅ Batch 5 processed and saved successfully!
✅ Batch 6 processed and saved successfully!
✅ Batch 7 processed and saved successfully!
✅ Batch 8 processed and saved successfully!
✅ Batch 9 processed and saved successfully!
✅ Batch 10 processed and saved successfully!
✅ Batch 11 processed and saved successfully!
✅ Batch 12 processed and saved successfully!
✅ Batch 13 processed and saved successfully!
✅ Batch 14 processed and saved successfully!
✅ Batch 15 processed and saved successfully!
✅ Batch 16 processed and saved successfully!
✅ Batch 17 processed and saved successfully!
✅ Batch 18 processed and saved successfully!
✅ Batch 19 processed and saved successfully!
✅ Batch 20 processed and saved successfully!
✅ Batch 21 processed and saved successfully!
✅ Batch 22 processed and saved successfully!
✅ Batch 23 processe

In [None]:
# # 🎯 Main execution
# input_json = "/content/drive/MyDrive/LLM PROJECT/summarized_transcripts.json"
# output_best_videos = "/content/drive/MyDrive//LLM PROJECT/best_video.json"
# output_comparisons = "/content/drive/MyDrive//LLM PROJECT/video_comparison.json"

Interactive Q/A Chatbot

In [1]:
!pip install faiss-cpu
!pip install numpy
!pip install sentence-transformers
!pip install rank-bm25
!pip install transformers
!pip install torch

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl (30.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.10.0
Collecting rank-bm25
  Downloading rank_bm25-0.2.2-py3-none-any.whl.metadata (3.2 kB)
Downloading rank_bm25-0.2.2-py3-none-any.whl (8.6 kB)
Installing collected packages: rank-bm25
Successfully installed rank-bm25-0.2.2


### **Technologies and Concepts Used in the Code:**


###**1. Natural Language Processing (NLP)**
- **Sentence-BERT (paraphrase-MiniLM-L6-v2)**  
   - Converts summaries and user queries into vector embeddings for similarity search.  
   - **Use Case:** Embeds summaries and query for FAISS-based search.  

- **T5 Model (t5-small)**  
   - Transformer-based model for text-to-text generation.  
   - **Use Case:** Generates answers from relevant summaries based on the user’s question.  

- **BM25 (Okapi BM25)**  
   - A probabilistic information retrieval model that scores relevance based on term frequency.  
   - **Use Case:** Scores text relevance between question and summaries.  



###**2. Text Summarization Models**
- **BART, Pegasus, and T5 Summaries**  
   - Pre-generated summaries stored in summarized_transcripts.json.  
   - **Use Case:** Selects the best summary (based on Best Model in summarization_evaluation.json) for query processing.  


###**3. Vector Search and Retrieval**
- **FAISS (Facebook AI Similarity Search)**  
   - High-performance library for dense vector similarity search.  
   - **Use Case:** Searches for the most relevant summaries using vector embeddings.  

- **Hybrid Search (FAISS + BM25)**  
   - Combines semantic similarity from FAISS and keyword relevance from BM25 for better retrieval.  
   - **Use Case:** Balances semantic and term-based matching to rank summaries effectively.  



### **4. Question Answering (QA)**
- **T5-Based QA Generation**  
   - Uses a pre-trained T5 model to generate answers given a **question + context**.  
   - **Use Case:** Generates detailed answers from the most relevant summaries.  



In [14]:
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from rank_bm25 import BM25Okapi
from transformers import T5ForConditionalGeneration, T5Tokenizer

# ✅ Load necessary models
encoder_model = SentenceTransformer('paraphrase-MiniLM-L6-v2')
t5_model_name = 't5-small'
t5_model = T5ForConditionalGeneration.from_pretrained(t5_model_name)
t5_tokenizer = T5Tokenizer.from_pretrained(t5_model_name)

# ✅ Load summarized transcripts
with open('/content/drive/MyDrive/LLM PROJECT/summarized_transcripts.json', 'r', encoding='utf-8') as f:
    summary_data = json.load(f)

# ✅ Load best model evaluation
with open('/content/drive/MyDrive/LLM PROJECT/summarization_evaluation.json', 'r', encoding='utf-8') as f:
    eval_data = json.load(f)

# ✅ Prepare best summaries for retrieval
summaries = []
summary_map = {}

for entry in eval_data:
    video_id = entry.get("Video_ID", "Unknown Video ID")
    best_model = entry.get("Best Model", "BART")
    # Retrieve best summary
    best_summary = next((item.get(best_model + "_Summary", "") for item in summary_data if item.get("Video_ID") == video_id), "")

    if best_summary:
        summaries.append(best_summary)
        summary_map[video_id] = best_summary

# ✅ Embed summaries using Sentence-BERT
summary_embeddings = encoder_model.encode(summaries, convert_to_tensor=False)

# ✅ Create FAISS Index
dimension = summary_embeddings.shape[1]
faiss_index = faiss.IndexFlatL2(dimension)
faiss_index.add(np.array(summary_embeddings))

# ✅ Prepare BM25 model
tokenized_summaries = [summary.split() for summary in summaries]
bm25 = BM25Okapi(tokenized_summaries)

# ✅ Generate answers using T5
def generate_answer(context, question):
    input_text = f"question: {question} context: {context}".strip()
    input_ids = t5_tokenizer.encode(input_text, return_tensors='pt', max_length=512, truncation=True)
    output_ids = t5_model.generate(input_ids, max_length=200, num_beams=5, early_stopping=True)
    answer = t5_tokenizer.decode(output_ids[0], skip_special_tokens=True)
    return answer

# ✅ Retrieve best matching summaries using Hybrid Search
def get_relevant_summary(question, top_k=3):
    query_embedding = encoder_model.encode(question, convert_to_tensor=False)
    distances, indices = faiss_index.search(np.array([query_embedding]), top_k)
    bm25_scores = bm25.get_scores(question.split())

    combined_results = {}
    for i, idx in enumerate(indices[0]):
        video_id = list(summary_map.keys())[idx]
        faiss_score = 1 / (distances[0][i] + 1e-5)
        bm25_score = bm25_scores[idx]
        combined_score = (0.7 * faiss_score) + (0.3 * bm25_score)
        combined_results[video_id] = combined_score

    sorted_results = sorted(combined_results.items(), key=lambda x: x[1], reverse=True)[:top_k]
    return [summary_map[key] for key, _ in sorted_results]

# 🎯 Interactive Q&A Chatbot
def interactive_qa():
    print("\n🎉 Welcome to the Student Q&A Chatbot! Ask questions related to video summaries.")
    while True:
        question = input("\n📝 Ask your question (type 'exit' to stop): ")
        if question.lower() == 'exit':
            print("👋 Exiting... Goodbye!")
            break

        relevant_summaries = get_relevant_summary(question)
        best_context = " ".join(relevant_summaries)
        answer = generate_answer(best_context[:1024], question)  # Optimize context length for T5
        print(f"\n💡 Answer: {answer}\n")

# 🚀 Run the chatbot
interactive_qa()



🎉 Welcome to the Student Q&A Chatbot! Ask questions related to video summaries.

📝 Ask your question (type 'exit' to stop): How does artificial intelligence enhance cybersecurity measures?

💡 Answer: security involves a lot of different elements


📝 Ask your question (type 'exit' to stop): What are the potential applications of quantum computing in healthcare?

💡 Answer: quantum computers aren t the next generation of supercomputers


📝 Ask your question (type 'exit' to stop): How can renewable energy effectively replace fossil fuels?

💡 Answer: solar energy exceeds all other energy by a factor of a trillion


📝 Ask your question (type 'exit' to stop): What are the consequences of deforestation on global ecosystems?

💡 Answer: forests are a vital part of the earth s ecosystem


📝 Ask your question (type 'exit' to stop): What are the latest advancements in vaccine development?

💡 Answer: recombinant dna technology is to harness the manufacturing power of the world s smallest factories 