In [1]:
!pip install google-api-python-client yt-dlp

Collecting yt-dlp
  Downloading yt_dlp-2025.5.22-py3-none-any.whl.metadata (174 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m174.3/174.3 kB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
Downloading yt_dlp-2025.5.22-py3-none-any.whl (3.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.3/3.3 MB[0m [31m62.2 MB/s[0m eta [36m0:00:00[0m:00:01[0m
[?25hInstalling collected packages: yt-dlp
Successfully installed yt-dlp-2025.5.22


In [43]:
import os
import json 
import shutil
import numpy as np
import re
from googleapiclient.discovery import build
from kaggle_secrets import UserSecretsClient
import yt_dlp
from IPython.display import FileLink
import warnings
warnings.filterwarnings("ignore")

In [3]:
# Configuration
user_secrets = UserSecretsClient()
API_KEY = user_secrets.get_secret("YOUTUBE_API_KEY")
ROOT_DIR = "/kaggle/working/youtube_data"
AUDIO_DIR = os.path.join(ROOT_DIR, "youtube_audio")
METADATA_DIR = os.path.join(ROOT_DIR, "youtube_metadata")
os.makedirs(AUDIO_DIR, exist_ok=True)
os.makedirs(METADATA_DIR, exist_ok=True)

In [4]:
SEARCH_QUERIES = [
    {"query": "Elon Musk TED talk", "speaker": "Elon Musk"},
    {"query": "Ryan Reynolds speech", "speaker": "Ryan Reynolds"},
    {"query": "Emma Watson speech", "speaker": "Emma Watson"},
    {"query": "Scarlett Johansson speech", "speaker": "Scarlett Johansson"},
    {"query": "Hrithik Roshan speech", "speaker": "Hrithik Roshan"},
    {"query": "Shah Rukh Khan speech interview", "speaker": "Shah Rukh Khan"},
    {"query": "Aishwarya Rai Cannes speech interview", "speaker": "Aishwarya Rai"},
    {"query": "Priyanka Chopra speech", "speaker": "Priyanka Chopra"}
]
MAX_RESULTS = 16

In [5]:
def initialize_youtube_api():
    try:
        return build('youtube', 'v3', developerKey=API_KEY, cache_discovery=False)
    except Exception as e:
        print(f"Error initializing YouTube API: {e}")
        return None

In [6]:
def sanitize_filename(title):
    return re.sub(r'[\\/|<>:\"*?]', '_', title)
#It replaces characters like \ / | < > : " * ? (which aren't allowed in filenames) with an underscore _

In [7]:
def is_likely_solo_content(title, description):
    """Check if video likely contains solo speech based on title and description."""
    title_lower = title.lower()
    description_lower = description.lower()
    solo_keywords = ["solo", "speech", "narration", "interview", "motivational", "talk", "address"]
    exclude_keywords = [
        "panel", "group", "cast", "multiple", "impersonation", "impressionist", "news", "report",
        "investigation", "concert", "performance", "music", "portuguese", "spanish", "italian"
    ]
    has_solo_content = any(keyword in title_lower or keyword in description_lower for keyword in solo_keywords)
    has_exclude_content = any(keyword in title_lower or keyword in description_lower for keyword in exclude_keywords)
    return has_solo_content and not has_exclude_content

In [8]:
#searches YouTube for videos based on a given query
def fetch_video_metadata(query, speaker, youtube):
    video_data = []
    try:
        request = youtube.search().list(
            part="snippet", q=query, type="video", videoDuration="medium", maxResults=MAX_RESULTS, relevanceLanguage="en"
        )
        response = request.execute()
        for item in response.get("items", []):
            title = sanitize_filename(item["snippet"]["title"])
            description = item["snippet"]["description"]
            video_id = item["id"]["videoId"]
            video_url = f"https://www.youtube.com/watch?v={video_id}"
            speaker_lower = speaker.lower()
            if (speaker_lower in title.lower() or speaker_lower in description.lower()) and \
               is_likely_solo_content(title, description):
                video_data.append({
                    "speaker": speaker, "title": title, "video_id": video_id,
                    "url": video_url, "description": description
                })
                print(f"Found: {title} ({speaker})")
    except Exception as e:
        print(f"Error searching for {query}: {e}")
    return video_data

In [9]:
#downloads the audio from a YouTube video using its video_id
def download_audio(video_id, speaker, title, audio_dir):
    url = f"https://www.youtube.com/watch?v={video_id}"
    speaker_audio_dir = os.path.join(audio_dir, speaker) #Creates a folder specifically for the speaker
    os.makedirs(speaker_audio_dir, exist_ok=True)
    filename_base = f"{speaker}_{title}"
    output_path = os.path.join(speaker_audio_dir, f"{filename_base}.wav")
    #If the audio file already exists and is not empty, skip downloading it again
    if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
        print(f"Skipping {filename_base}: already downloaded")
        return output_path
    ydl_opts = {
        'format': 'bestaudio', 'outtmpl': output_path.replace('.wav', ''),
        'postprocessors': [{'key': 'FFmpegExtractAudio', 'preferredcodec': 'wav', 'preferredquality': '192'}],
        'quiet': True,
    }
    try:
        with yt_dlp.YoutubeDL(ydl_opts) as ydl:
            ydl.download([url])
        if os.path.exists(output_path) and os.path.getsize(output_path) > 0:
            print(f"Downloaded: {output_path}")
            return output_path
        else:
            print(f"Error: {output_path} is empty or not created")
            return None
    except Exception as e:
        print(f"Error downloading {video_id}: {e}")
        return None

In [10]:
#saves video information (metadata) for each downloaded YouTube video into a .json file
def save_metadata(video_data, metadata_dir):
    for video in video_data:
        speaker = video["speaker"]
        video_id = video["video_id"]
        speaker_metadata_dir = os.path.join(metadata_dir, speaker)
        os.makedirs(speaker_metadata_dir, exist_ok=True)
        metadata_path = os.path.join(speaker_metadata_dir, f"{video_id}_metadata.json")
        if os.path.exists(metadata_path):
            print(f"Skipping metadata for {video_id}: already exists")
            continue
        try:
            with open(metadata_path, "w", encoding="utf-8") as f:
                json.dump(video, f, indent=4)
            print(f"Saved metadata: {metadata_path}")
        except Exception as e:
            print(f"Error saving metadata for {video_id}: {e}")

In [11]:
#controls the entire process: searching videos, downloading audio, saving metadata, and creating a zip file
def main():
    youtube = initialize_youtube_api() #YouTube API client
    if not youtube:
        return
    all_video_data = []
    for search in SEARCH_QUERIES:
        #Searches YouTube for videos using the query and speaker name
        videos = fetch_video_metadata(search["query"], search["speaker"], youtube) 
        for video in videos:
            #Downloads the audio in .wav format and stores it by speaker
            download_audio(video["video_id"], video["speaker"], video["title"], AUDIO_DIR)
        all_video_data.extend(videos)
    save_metadata(all_video_data, METADATA_DIR)

if __name__ == "__main__":
    main()

Found: Elon Musk&#39;s Speech Will Leave You SPEECHLESS _ One of the Most Eye Opening Speeches Ever 2022 (Elon Musk)
Found: Dare to Be Different_ Elon Musk _ Matthew Sotomey _ TEDxRiverHillHS (Elon Musk)
Found: ENGLISH SPEECH _ ELON MUSK_ Think Big &amp; Dream Even Bigger (English Subtitles) (Elon Musk)
Found: IT WILL GIVE YOU GOOSEBUMPS - Elon Musk Motivational Speech 2022 (Elon Musk)
Found: Elon Musk &amp; His Enterprises _ TJ Cheong _ TEDxYouth@IASA (Elon Musk)
Found: Elon Musk Monologue - SNL (Elon Musk)
Found: Elon Musk’s Incredible Speech on the Education System _ Eye Opening Video on Education (Elon Musk)
Downloaded: /kaggle/working/youtube_data/youtube_audio/Elon Musk/Elon Musk_Elon Musk&#39;s Speech Will Leave You SPEECHLESS _ One of the Most Eye Opening Speeches Ever 2022.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Elon Musk/Elon Musk_Dare to Be Different_ Elon Musk _ Matthew Sotomey _ TEDxRiverHillHS.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/E



Downloaded: /kaggle/working/youtube_data/youtube_audio/Elon Musk/Elon Musk_Elon Musk &amp; His Enterprises _ TJ Cheong _ TEDxYouth@IASA.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = 6CLu2w71jKWuqlq ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = MCc2gQJdSiNAh2L ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = Vvfd7dt8Bfwi-fi ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Elon Musk/Elon Musk_Elon Musk Monologue - SNL.wav




Downloaded: /kaggle/working/youtube_data/youtube_audio/Elon Musk/Elon Musk_Elon Musk’s Incredible Speech on the Education System _ Eye Opening Video on Education.wav
Found: Ryan Reynold’s Speech Will Leave You SPEECHLESS — Best Life Advice (Ryan Reynolds)
Found: Ryan Reynold’s Speech Will Leave You SPEECHLESS — Best Life Advice (Ryan Reynolds)
Found: Ryan Reynolds&#39; Speech NO ONE Wants To Hear — One Of The Most Eye-Opening Speeches (Ryan Reynolds)
Found: Ryan Reynolds Talks Wicked, Wizard Of Oz, Ariana Grande &amp; More At The National Board Of Review Gala (Ryan Reynolds)
Found: ENGLISH SPEECH _ RYAN REYNOLDS_ Deadpool Interview (English Subtitles) (Ryan Reynolds)
Found: Ryan Reynolds&#39; Speech at Green Day&#39;s Hollywood Walk of Fame Ceremony (Ryan Reynolds)
Found: The Best Life Advice From Ryan Reynolds&#39; Speech Will Leave You Speechless (Must Watch) (Ryan Reynolds)
Found: UNBELIEVABLE! Blake Lively &amp; Ryan Reynolds Give DELUSIONAL Speech At Time 100 Gala (Ryan Reynolds)


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = MaBAq4B8uPMMto3 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = ddGGGbq0pBle7Xd ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = wH_McFoip7mlxiR ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds Talks Wicked, Wizard Of Oz, Ariana Grande &amp; More At The National Board Of Review Gala.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_ENGLISH SPEECH _ RYAN REYNOLDS_ Deadpool Interview (English Subtitles).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds&#39; Speech at Green Day&#39;s Hollywood Walk of Fame Ceremony.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_The Best Life Advice From Ryan Reynolds&#39; Speech Will Leave You Speechless (Must Watch).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_UNBELIEVABLE! Blake Lively &amp; Ryan Reynolds Give DELUSIONAL Speech At Time 100 Gala.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds - You&#39;ll Never Make It Out Alive! 

         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = T2-zOUsnSkuPLP0 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = iErFLSMyXgw2s9- ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = sl9URjiOCliTFur ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson to United Nations_ I&#39;m a feminist.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_ENGLISH SPEECH _ EMMA WATSON_ HeForShe Impact (English Subtitles).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson&#39;s Speech on Gender Equality _ ENGLISH SPEECH with BIG Subtitles.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson speech for HeForShe Second Year Anniversary (20_9_16).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson Speech for HeForShe IMPACT 10x10x10 Program at World Economic Forum 2015.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson&#39;s emotional speech on feminism.wav




Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Learn English with Emma Watson&#39;s Speech on the HeForShe Campaign - English Subtitle.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_ENGLISH SPEECH _ LEARN ENGLISH with EMMA WATSON.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson UN speech - Emma Watson #HeForShe.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson&#39;s amazing speech at UN.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Emma Watson/Emma Watson_Emma Watson’s Powerful UN Speech on Gender Equality 🌍 #HeForShe.wav
Found: Scarlett Johansson - Variety&#39;s Power of Women Full Speech (Scarlett Johansson)
Found: ENGLISH SPEECH _ SCARLETT JOHANSSON_ Take Time for Yourself (English Subtitles) (Scarlett Johansson)
Found: Election 2012 _ Scarlett Johansson&#39;s Full DNC Speech _ The New York Times (Scarlett Johanss



Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_State of the Union Cold Open - SNL.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_English Speech, Motivational Speech _ Scarlett Johansson Speech, Women Power _ English Big Subtitle.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON_ TAKE TIME FOR YOURSELF _ Learn English _ English Speech With Subtitles _ IES.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_Scarlett Johansson&#39;s Speech 2017 Full.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON _ ENGLISH SPEECH.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Scarlett Johansson/Scarlett Johansson_scarlett johansson shutting down sexist comments for 5 min straight.wav
Found: ENGLISH SPEECH _ HRITHIK ROSHAN_ 

         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = pQaLvOaTqEw8Jdk ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = aJP3Q3j4Hgexi3r ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = 3qZWi48knwJhRwI ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan 7 Rules of Success Inspirational Speech _ Motivational Interviews.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan English Speech With Subtitles _ Give Your Best _ Inspiring Speech _.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = d7dx3ptcp2UKpj_ ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = gJkXQHRSDp8UdBT ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = oMchdaaZUR1IWvQ ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Hrithik Roshan/Hrithik Roshan_English Speech, Motivational Speech _ Hrithik Roshan Inspiration Speech _ English Big Subtitle.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Hrithik Roshan/Hrithik Roshan_SPEECH IN ENGLISH  _ HRITHIK ROSHAN EVERY ONE CAN BE A SUPER HERO _ BIG SUBTITLES _ 2023.wav
Found: Thoughts on humanity, fame and love _ Shah Rukh Khan _ TED (Shah Rukh Khan)
Found: &quot;You have to Cheat with Life&quot; Shah Rukh Khan _ English Speech with English subtitles (Shah Rukh Khan)
Found: Shah Rukh Khan, Bollywood Star _ Journal Interview (Shah Rukh Khan)
Found: Best of Shah Rukh Khan - Best of all Motivational Eye Opening Speeches _ Let Them Bark (Shah Rukh Khan)
Found: Shah Rukh Khan EMOTIONAL Speech _ SRK Success Story Pathaan _ DUNKI _ JAWAN _ Srk 2023 Motivation (Shah Rukh Khan)
Found: ENGLISH SPEECH _ SHAH RUKH KHAN_ Perseverance &amp; Success (English Subtitles) (Shah Rukh Khan)
Found: Shah Rukh Khan Ha



Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan, Bollywood Star _ Journal Interview.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_Best of Shah Rukh Khan - Best of all Motivational Eye Opening Speeches _ Let Them Bark.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan EMOTIONAL Speech _ SRK Success Story Pathaan _ DUNKI _ JAWAN _ Srk 2023 Motivation.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_ENGLISH SPEECH _ SHAH RUKH KHAN_ Perseverance &amp; Success (English Subtitles).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan Has A &quot;No Kissing&quot; Contract _ Friday Night With Jonathan Ross.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = SkgUdSpYDV8Ncw- ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = OoblwGWkLCJTN1d ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = tbaiuZRcFGs6om4 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;KILL THAT WEAK VERSION OF YOURSELF __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav




Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;FOCUS ON YOURSELF AND STAY SILENT __  MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = EhfOIP4brRsyC86 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = ZvTyTNEDd7SysVn ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = 22y84E1tvFL8pgr ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;STOP BEING TOO FRIENDLY __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav
Found: Aishwarya Rai Is The Most Beautiful Person In The World _ David Letterman (Aishwarya Rai)
Found: Exclusive Interview with Aishwarya Rai Bachchan _ Anupama Chopra _ FC at Cannes (Aishwarya Rai)
Found: Aishwarya Rai Bachchan Interview at Cannes 2018 (Aishwarya Rai)
Found: Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes 2022 _ FC at Cannes _ Film Companion (Aishwarya Rai)
Found: Aishwarya Rai Bachchan Cannes Film Festival 2016 #Cannes2016 www.asianculturevulture.com (Aishwarya Rai)
Found: Cannes Film Festival_ Aishwarya Rai Bachchan Talks About Her Best Memories (Aishwarya Rai)
Found: Aishwarya Rai Interview at Cannes 2018 (AP) (Aishwarya Rai)
Found: Aishwarya Rai Interview at Cannes 2022 (Aishwarya Rai)
Found: Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes Film Festival 2017 (Aish

         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = eTnDNxttyNfaobi ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = z9ME17IvbNjPeA8 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = HRAV0vChYbidaPS ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Empower Each Other (English Subtitles).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_WATCH THIS EVERYDAY AND CHANGE YOUR LIFE - Priyanka Chopra Motivational Speech 2023.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = JMUviqCmTpVvlH6 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = Z169Hzi9UhJ7Irp ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = A6WaT441_nxI328 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_They said Girls are WASTE OF MONEY  _ Priyanka Chopra _ Motivation _ English Speech _ WINNER GIRLS.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = J9jQPL1f7yAGw9K ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = -fx6BdwZVC-Ix_P ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = yVe1xznwxpl07mX ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_PRIYANKA CHOPRA  - Motivational Speech - 12 Rules Of Life.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ LEARN ENGLISH with PRIYANKA CHOPRA.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Voice for the Voiceless (English Subtitles).wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_Stay Quiet After Disrespect , powerful motivational speech by Priyanka Chopra.wav


         player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         n = ezMXPBb1WRNl9kY ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = 5OgjnchoXcHdyX- ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U
         n = IIMuOm_cFE0nBf8 ; player = https://www.youtube.com/s/player/fc2a56a5/player_ias.vflset/en_US/base.js
         Please report this issue on  https://github.com/yt-dlp/yt-dlp/issues?q= , filling out the appropriate issue template. Confirm you are on the latest version using  yt-dlp -U


Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_Grow Yourself - Priyanka Chopra inspirational Speech _  Priyanka Chopra Motivation @beginners01.wav
Downloaded: /kaggle/working/youtube_data/youtube_audio/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra speech __ with Big Subtitles __ Learn English.wav
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/zlDmYkeQpVQ_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/8Pz7_Fx70gE_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/BDIRabVP24o_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/FFdd3bq2ti0_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/vChRfITEbEM_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/fCF8I_X1qKI_metadata.json
Saved metadata: /kaggle/working/youtube_data/youtube_metadata/Elon Musk/

In [12]:
root_dir = "/kaggle/working/youtube_data" 
for sub_dir in ["youtube_audio", "youtube_metadata"]:
    dir_path = os.path.join(root_dir, sub_dir)
    for speaker in os.listdir(dir_path):
        speaker_dir = os.path.join(dir_path, speaker)
        if os.path.isdir(speaker_dir):
            #Lists all .wav or .json files inside the speaker folder
            files = [f for f in os.listdir(speaker_dir) if f.endswith(".wav") or f.endswith(".json")]
            print(f"{sub_dir}/{speaker}: {len(files)} files")

youtube_audio/Aishwarya Rai: 10 files
youtube_audio/Ryan Reynolds: 8 files
youtube_audio/Priyanka Chopra: 14 files
youtube_audio/Emma Watson: 13 files
youtube_audio/Hrithik Roshan: 14 files
youtube_audio/Shah Rukh Khan: 10 files
youtube_audio/Elon Musk: 7 files
youtube_audio/Scarlett Johansson: 9 files
youtube_metadata/Aishwarya Rai: 10 files
youtube_metadata/Ryan Reynolds: 9 files
youtube_metadata/Priyanka Chopra: 14 files
youtube_metadata/Emma Watson: 13 files
youtube_metadata/Hrithik Roshan: 14 files
youtube_metadata/Shah Rukh Khan: 10 files
youtube_metadata/Elon Musk: 7 files
youtube_metadata/Scarlett Johansson: 9 files


In [13]:
import librosa
audio_dir = "/kaggle/working/youtube_data/youtube_audio"
for speaker in os.listdir(audio_dir):
    speaker_dir = os.path.join(audio_dir, speaker)
    if os.path.isdir(speaker_dir):
        total_duration = sum(librosa.get_duration(path=os.path.join(speaker_dir, f)) / 60 for f in os.listdir(speaker_dir) if f.endswith(".wav"))
        print(f"{speaker}: {total_duration:.2f} minutes")

Aishwarya Rai: 87.79 minutes
Ryan Reynolds: 65.45 minutes
Priyanka Chopra: 145.22 minutes
Emma Watson: 130.51 minutes
Hrithik Roshan: 100.92 minutes
Shah Rukh Khan: 134.44 minutes
Elon Musk: 67.97 minutes
Scarlett Johansson: 63.22 minutes


In [14]:
pip install librosa soundfile noisereduce pydub openai-whisper pyannote.audio

Collecting noisereduce
  Downloading noisereduce-3.0.3-py3-none-any.whl.metadata (14 kB)
Collecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m17.3 MB/s[0m eta [36m0:00:00[0m00:01[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting pyannote.audio
  Downloading pyannote.audio-3.3.2-py2.py3-none-any.whl.metadata (11 kB)
Collecting asteroid-filterbanks>=0.4 (from pyannote.audio)
  Downloading asteroid_filterbanks-0.4.0-py3-none-any.whl.metadata (3.3 kB)
Collecting lightning>=2.0.1 (from pyannote.audio)
  Downloading lightning-2.5.1.post0-py3-none-any.whl.metadata (39 kB)
Collecting pyannote.core>=5.0.0 (from pyannote.audio)
  Downloading pyannote.core-5.0.0-py3-none-any.whl.metadata (1.4 kB)
Collecting pyannote.database>=5.0.1 (from pya

In [15]:
import os
import librosa
import whisper
import soundfile as sf
from pydub import AudioSegment
from pyannote.audio import Pipeline
from tqdm import tqdm #progress bars 
import torch

In [16]:
AUDIO_DIR = "/kaggle/working/youtube_data/youtube_audio"
PROCESS_DIR = "/kaggle/working/processed_audio"
os.makedirs(PROCESS_DIR, exist_ok=True)

In [17]:
def preprocess_audio(file_path, output_path, target_sr=16000, target_dBFS=-20):
    try:
        # Load and convert to mono
        y, sr = librosa.load(file_path, sr=None, mono=True)
        # Resample(If the original sample rate isn’t 16,000 Hz, resample the audio to 16,000 Hz)
        if sr != target_sr:
            y = librosa.resample(y, orig_sr=sr, target_sr=target_sr)
            sr = target_sr
        # Trim silence
        y, _ = librosa.effects.trim(y, top_db=30)
        # Save temp file
        temp_path = output_path.replace(".wav", "_temp.wav")
        sf.write(temp_path, y, sr)
        # Normalize loudness
        sound = AudioSegment.from_wav(temp_path)
        change_in_dBFS = target_dBFS - sound.dBFS
        normalized_sound = sound.apply_gain(change_in_dBFS)
        normalized_sound.export(output_path, format="wav")
        os.remove(temp_path)  # Cleanup temp file
        print(f"Processed: {output_path}")
    except Exception as e:
        print(f"Failed to preprocess {file_path}: {e}")

In [18]:
def summarize_processed_data(processed_dir):
    for speaker in os.listdir(processed_dir):
        speaker_dir = os.path.join(processed_dir, speaker)
        total_minutes = 0
        num_files = 0
        for f in os.listdir(speaker_dir):
            if f.endswith(".wav"):
                file_path = os.path.join(speaker_dir, f)
                duration = librosa.get_duration(path=file_path)
                total_minutes += duration / 60
                num_files += 1
        print(f"{speaker}: {num_files} files, {total_minutes:.2f} minutes")

In [19]:
def main():
    for speaker in os.listdir(AUDIO_DIR):
        speaker_src_dir = os.path.join(AUDIO_DIR, speaker)
        speaker_dst_dir = os.path.join(PROCESS_DIR, speaker)
        os.makedirs(speaker_dst_dir, exist_ok=True)
        for file in tqdm(os.listdir(speaker_src_dir), desc=f"Processing {speaker}"):
            if file.endswith(".wav"):
                src_path = os.path.join(speaker_src_dir, file)
                dst_path = os.path.join(speaker_dst_dir, file)
                preprocess_audio(src_path, dst_path)
    summarize_processed_data(PROCESS_DIR)

if __name__ == "__main__":
    main()

Processing Aishwarya Rai:  10%|█         | 1/10 [00:11<01:41, 11.33s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview at Cannes 2018.wav


Processing Aishwarya Rai:  20%|██        | 2/10 [00:14<00:51,  6.46s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Exclusive Interview with Aishwarya Rai Bachchan _ Anupama Chopra _ FC at Cannes.wav


Processing Aishwarya Rai:  30%|███       | 3/10 [00:15<00:26,  3.85s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Interview at Cannes 2018 (AP).wav


Processing Aishwarya Rai:  40%|████      | 4/10 [00:16<00:17,  2.87s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Cannes Film Festival_ Aishwarya Rai Bachchan Talks About Her Best Memories.wav


Processing Aishwarya Rai:  50%|█████     | 5/10 [00:17<00:11,  2.20s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Interview at Cannes 2022.wav


Processing Aishwarya Rai:  60%|██████    | 6/10 [00:18<00:06,  1.68s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_AISH 1 interview in cannes 2013..wav


Processing Aishwarya Rai:  70%|███████   | 7/10 [00:19<00:04,  1.47s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Is The Most Beautiful Person In The World _ David Letterman.wav


Processing Aishwarya Rai:  80%|████████  | 8/10 [00:21<00:03,  1.76s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes 2022 _ FC at Cannes _ Film Companion.wav


Processing Aishwarya Rai:  90%|█████████ | 9/10 [00:22<00:01,  1.59s/it]

Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Cannes Film Festival 2016 #Cannes2016 www.asianculturevulture.com.wav


Processing Aishwarya Rai: 100%|██████████| 10/10 [00:24<00:00,  2.40s/it]


Processed: /kaggle/working/processed_audio/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes Film Festival 2017.wav


Processing Ryan Reynolds:  12%|█▎        | 1/8 [00:03<00:21,  3.07s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_UNBELIEVABLE! Blake Lively &amp; Ryan Reynolds Give DELUSIONAL Speech At Time 100 Gala.wav


Processing Ryan Reynolds:  25%|██▌       | 2/8 [00:04<00:10,  1.83s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds - You&#39;ll Never Make It Out Alive! (Motivational Speech).wav


Processing Ryan Reynolds:  38%|███▊      | 3/8 [00:05<00:07,  1.55s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds Talks Wicked, Wizard Of Oz, Ariana Grande &amp; More At The National Board Of Review Gala.wav


Processing Ryan Reynolds:  50%|█████     | 4/8 [00:06<00:06,  1.57s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynold’s Speech Will Leave You SPEECHLESS — Best Life Advice.wav


Processing Ryan Reynolds:  62%|██████▎   | 5/8 [00:07<00:03,  1.28s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_The Best Life Advice From Ryan Reynolds&#39; Speech Will Leave You Speechless (Must Watch).wav


Processing Ryan Reynolds:  75%|███████▌  | 6/8 [00:09<00:03,  1.52s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_ENGLISH SPEECH _ RYAN REYNOLDS_ Deadpool Interview (English Subtitles).wav


Processing Ryan Reynolds:  88%|████████▊ | 7/8 [00:10<00:01,  1.42s/it]

Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds&#39; Speech NO ONE Wants To Hear — One Of The Most Eye-Opening Speeches.wav


Processing Ryan Reynolds: 100%|██████████| 8/8 [00:12<00:00,  1.54s/it]


Processed: /kaggle/working/processed_audio/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds&#39; Speech at Green Day&#39;s Hollywood Walk of Fame Ceremony.wav


Processing Priyanka Chopra:   7%|▋         | 1/14 [00:01<00:25,  1.93s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Empower Each Other (English Subtitles).wav


Processing Priyanka Chopra:  14%|█▍        | 2/14 [00:02<00:15,  1.31s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Grow Yourself - Priyanka Chopra inspirational Speech _  Priyanka Chopra Motivation @beginners01.wav


Processing Priyanka Chopra:  21%|██▏       | 3/14 [00:03<00:12,  1.16s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_They said Girls are WASTE OF MONEY  _ Priyanka Chopra _ Motivation _ English Speech _ WINNER GIRLS.wav


Processing Priyanka Chopra:  29%|██▊       | 4/14 [00:05<00:12,  1.21s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Full Power of Women (English Subtitles).wav


Processing Priyanka Chopra:  36%|███▌      | 5/14 [00:06<00:12,  1.43s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ LEARN ENGLISH with PRIYANKA CHOPRA.wav


Processing Priyanka Chopra:  43%|████▎     | 6/14 [00:07<00:10,  1.30s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_WATCH THIS EVERYDAY AND CHANGE YOUR LIFE - Priyanka Chopra Motivational Speech 2023.wav


Processing Priyanka Chopra:  50%|█████     | 7/14 [00:09<00:10,  1.49s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra’s Speech Will Leave You SPEECHLESS — Best Life Advice.wav


Processing Priyanka Chopra:  57%|█████▋    | 8/14 [00:10<00:07,  1.31s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra speech __ with Big Subtitles __ Learn English.wav


Processing Priyanka Chopra:  64%|██████▍   | 9/14 [00:12<00:06,  1.30s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Voice for the Voiceless (English Subtitles).wav


Processing Priyanka Chopra:  71%|███████▏  | 10/14 [00:13<00:05,  1.44s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra&#39;s Life Advice Will Leave You Speechless _ One of The Most Eye Opening Videos Ever.wav


Processing Priyanka Chopra:  79%|███████▊  | 11/14 [00:16<00:05,  1.73s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Stay Quiet After Disrespect , powerful motivational speech by Priyanka Chopra.wav


Processing Priyanka Chopra:  86%|████████▌ | 12/14 [00:18<00:03,  1.83s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_PRIYANKA CHOPRA  - Motivational Speech - 12 Rules Of Life.wav


Processing Priyanka Chopra:  93%|█████████▎| 13/14 [00:21<00:02,  2.21s/it]

Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Be Fearless (English Subtitles).wav


Processing Priyanka Chopra: 100%|██████████| 14/14 [00:23<00:00,  1.65s/it]


Processed: /kaggle/working/processed_audio/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra&#39;s Life Advice Will Change Your Future — One of the Best Motivational Videos Ever.wav


Processing Emma Watson:   8%|▊         | 1/13 [00:00<00:11,  1.01it/s]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson Speech for HeForShe IMPACT 10x10x10 Program at World Economic Forum 2015.wav


Processing Emma Watson:  15%|█▌        | 2/13 [00:02<00:17,  1.55s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_ENGLISH SPEECH _ EMMA WATSON_ HeForShe Impact (English Subtitles).wav


Processing Emma Watson:  23%|██▎       | 3/13 [00:04<00:17,  1.76s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson’s Powerful UN Speech on Gender Equality 🌍 #HeForShe.wav


Processing Emma Watson:  31%|███       | 4/13 [00:06<00:16,  1.80s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson to United Nations_ I&#39;m a feminist.wav


Processing Emma Watson:  38%|███▊      | 5/13 [00:08<00:15,  1.89s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_ENGLISH SPEECH _ EMMA WATSON_ Find Your Tribe (English Subtitles).wav


Processing Emma Watson:  46%|████▌     | 6/13 [00:10<00:13,  1.93s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson&#39;s Speech on Gender Equality _ ENGLISH SPEECH with BIG Subtitles.wav


Processing Emma Watson:  54%|█████▍    | 7/13 [00:12<00:11,  1.91s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Learn English with Emma Watson&#39;s Speech on the HeForShe Campaign - English Subtitle.wav


Processing Emma Watson:  62%|██████▏   | 8/13 [00:15<00:10,  2.05s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson&#39;s amazing speech at UN.wav


Processing Emma Watson:  69%|██████▉   | 9/13 [00:18<00:10,  2.60s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson UN speech - Emma Watson #HeForShe.wav


Processing Emma Watson:  77%|███████▋  | 10/13 [00:20<00:07,  2.42s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson&#39;s emotional speech on feminism.wav


Processing Emma Watson:  85%|████████▍ | 11/13 [00:23<00:04,  2.42s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson HeForShe Speech at the United Nations _ UN Women 2014.wav


Processing Emma Watson:  92%|█████████▏| 12/13 [00:24<00:02,  2.01s/it]

Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_ENGLISH SPEECH _ LEARN ENGLISH with EMMA WATSON.wav


Processing Emma Watson: 100%|██████████| 13/13 [00:25<00:00,  1.95s/it]


Processed: /kaggle/working/processed_audio/Emma Watson/Emma Watson_Emma Watson speech for HeForShe Second Year Anniversary (20_9_16).wav


Processing Hrithik Roshan:   7%|▋         | 1/14 [00:01<00:21,  1.67s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_ENGLISH SPEECH _ HRITHIK ROSHAN_ Know Who You Are (English Subtitles).wav


Processing Hrithik Roshan:  14%|█▍        | 2/14 [00:03<00:23,  1.94s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan 7 Rules of Success Inspirational Speech _ Motivational Interviews.wav


Processing Hrithik Roshan:  21%|██▏       | 3/14 [00:04<00:17,  1.57s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan English Speech With Subtitles _ Give Your Best _ Inspiring Speech _.wav


Processing Hrithik Roshan:  29%|██▊       | 4/14 [00:08<00:24,  2.43s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan&#39;s Speech Inspires Million Crowd Talking About An Impossible Dream Hrx Store Launch.wav


Processing Hrithik Roshan:  36%|███▌      | 5/14 [00:09<00:17,  1.96s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_English Speech, Motivational Speech _ Hrithik Roshan Inspiration Speech _ English Big Subtitle.wav


Processing Hrithik Roshan:  43%|████▎     | 6/14 [00:11<00:14,  1.87s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_ENGLISH SPEECH _ HRITHIK ROSHAN_ Speech to Students GIIS leadership Lecture Series.wav


Processing Hrithik Roshan:  50%|█████     | 7/14 [00:12<00:10,  1.55s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_HRITHIK ROSHAN_ Know Who You Are _ English speech _ Motivational speech with english subtitles.wav


Processing Hrithik Roshan:  57%|█████▋    | 8/14 [00:13<00:09,  1.51s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_SPEECH IN ENGLISH  _ HRITHIK ROSHAN EVERY ONE CAN BE A SUPER HERO _ BIG SUBTITLES _ 2023.wav


Processing Hrithik Roshan:  64%|██████▍   | 9/14 [00:14<00:06,  1.39s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Impeccable English Speeches __ English Speech __ Hrithik Roshan - Speech __ English Subtitles __.wav


Processing Hrithik Roshan:  71%|███████▏  | 10/14 [00:15<00:04,  1.20s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan Speech _ IIFA Awards.wav


Processing Hrithik Roshan:  79%|███████▊  | 11/14 [00:16<00:03,  1.08s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Every Student &amp; Parent Must Watch - HRITHIK ROSHAN Inspiring Speech.wav


Processing Hrithik Roshan:  86%|████████▌ | 12/14 [00:17<00:02,  1.07s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_HRITHIK ROSHAN SPEECH _ Confused A Lot Then Do This One Thing (Speech With English Subtitle).wav


Processing Hrithik Roshan:  93%|█████████▎| 13/14 [00:18<00:01,  1.13s/it]

Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan Motivational Speech For all Engineering Students _ Bollywood Actor _ Filmkraft __.wav


Processing Hrithik Roshan: 100%|██████████| 14/14 [00:19<00:00,  1.42s/it]


Processed: /kaggle/working/processed_audio/Hrithik Roshan/Hrithik Roshan_Morning Motivation Video For STUDENTS by Hrithik Roshan _ Inspiring Speech.wav


Processing Shah Rukh Khan:  10%|█         | 1/10 [00:02<00:22,  2.51s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;STOP BEING TOO FRIENDLY __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav


Processing Shah Rukh Khan:  20%|██        | 2/10 [00:06<00:26,  3.26s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;KILL THAT WEAK VERSION OF YOURSELF __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav


Processing Shah Rukh Khan:  30%|███       | 3/10 [00:09<00:23,  3.42s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_Thoughts on humanity, fame and love _ Shah Rukh Khan _ TED.wav


Processing Shah Rukh Khan:  40%|████      | 4/10 [00:11<00:16,  2.70s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan EMOTIONAL Speech _ SRK Success Story Pathaan _ DUNKI _ JAWAN _ Srk 2023 Motivation.wav


Processing Shah Rukh Khan:  50%|█████     | 5/10 [00:14<00:14,  2.94s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_ENGLISH SPEECH _ SHAH RUKH KHAN_ Perseverance &amp; Success (English Subtitles).wav


Processing Shah Rukh Khan:  60%|██████    | 6/10 [00:15<00:09,  2.30s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_Best of Shah Rukh Khan - Best of all Motivational Eye Opening Speeches _ Let Them Bark.wav


Processing Shah Rukh Khan:  70%|███████   | 7/10 [00:19<00:07,  2.61s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;FOCUS ON YOURSELF AND STAY SILENT __  MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.wav


Processing Shah Rukh Khan:  80%|████████  | 8/10 [00:20<00:04,  2.28s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan, Bollywood Star _ Journal Interview.wav


Processing Shah Rukh Khan:  90%|█████████ | 9/10 [00:24<00:02,  2.64s/it]

Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan Has A &quot;No Kissing&quot; Contract _ Friday Night With Jonathan Ross.wav


Processing Shah Rukh Khan: 100%|██████████| 10/10 [00:26<00:00,  2.65s/it]


Processed: /kaggle/working/processed_audio/Shah Rukh Khan/Shah Rukh Khan_&quot;You have to Cheat with Life&quot; Shah Rukh Khan _ English Speech with English subtitles.wav


Processing Elon Musk:  14%|█▍        | 1/7 [00:00<00:05,  1.19it/s]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_Elon Musk &amp; His Enterprises _ TJ Cheong _ TEDxYouth@IASA.wav


Processing Elon Musk:  29%|██▊       | 2/7 [00:02<00:06,  1.37s/it]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_IT WILL GIVE YOU GOOSEBUMPS - Elon Musk Motivational Speech 2022.wav


Processing Elon Musk:  43%|████▎     | 3/7 [00:05<00:07,  1.90s/it]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_Elon Musk&#39;s Speech Will Leave You SPEECHLESS _ One of the Most Eye Opening Speeches Ever 2022.wav


Processing Elon Musk:  57%|█████▋    | 4/7 [00:07<00:05,  1.96s/it]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_Elon Musk’s Incredible Speech on the Education System _ Eye Opening Video on Education.wav


Processing Elon Musk:  71%|███████▏  | 5/7 [00:08<00:03,  1.65s/it]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_Elon Musk Monologue - SNL.wav


Processing Elon Musk:  86%|████████▌ | 6/7 [00:11<00:02,  2.22s/it]

Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_ENGLISH SPEECH _ ELON MUSK_ Think Big &amp; Dream Even Bigger (English Subtitles).wav


Processing Elon Musk: 100%|██████████| 7/7 [00:13<00:00,  1.86s/it]


Processed: /kaggle/working/processed_audio/Elon Musk/Elon Musk_Dare to Be Different_ Elon Musk _ Matthew Sotomey _ TEDxRiverHillHS.wav


Processing Scarlett Johansson:  11%|█         | 1/9 [00:00<00:06,  1.20it/s]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_Scarlett Johansson - Variety&#39;s Power of Women Full Speech.wav


Processing Scarlett Johansson:  22%|██▏       | 2/9 [00:01<00:05,  1.28it/s]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_Election 2012 _ Scarlett Johansson&#39;s Full DNC Speech _ The New York Times.wav


Processing Scarlett Johansson:  33%|███▎      | 3/9 [00:04<00:09,  1.59s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON _ ENGLISH SPEECH.wav


Processing Scarlett Johansson:  44%|████▍     | 4/9 [00:05<00:07,  1.57s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_Scarlett Johansson&#39;s Speech 2017 Full.wav


Processing Scarlett Johansson:  56%|█████▌    | 5/9 [00:07<00:06,  1.54s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON_ TAKE TIME FOR YOURSELF _ Learn English _ English Speech With Subtitles _ IES.wav


Processing Scarlett Johansson:  67%|██████▋   | 6/9 [00:07<00:03,  1.30s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_English Speech, Motivational Speech _ Scarlett Johansson Speech, Women Power _ English Big Subtitle.wav


Processing Scarlett Johansson:  78%|███████▊  | 7/9 [00:08<00:02,  1.15s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_scarlett johansson shutting down sexist comments for 5 min straight.wav


Processing Scarlett Johansson:  89%|████████▉ | 8/9 [00:10<00:01,  1.21s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_State of the Union Cold Open - SNL.wav


Processing Scarlett Johansson: 100%|██████████| 9/9 [00:11<00:00,  1.31s/it]

Processed: /kaggle/working/processed_audio/Scarlett Johansson/Scarlett Johansson_ENGLISH SPEECH _ SCARLETT JOHANSSON_ Take Time for Yourself (English Subtitles).wav
Aishwarya Rai: 10 files, 87.54 minutes
Ryan Reynolds: 8 files, 65.28 minutes
Priyanka Chopra: 14 files, 144.64 minutes
Emma Watson: 13 files, 129.88 minutes
Hrithik Roshan: 14 files, 100.38 minutes
Shah Rukh Khan: 10 files, 134.11 minutes
Elon Musk: 7 files, 67.48 minutes
Scarlett Johansson: 9 files, 63.12 minutes





In [20]:
PROCESS_DIR = "/kaggle/working/processed_audio"
TRANSCRIPTS_DIR = "/kaggle/working/transcripts"
os.makedirs(TRANSCRIPTS_DIR, exist_ok=True)

In [21]:
def transcribe_audio(input_path, output_path):
    if os.path.exists(output_path):
        print(f"Skipping {output_path}: already exists")
        return True
    try:
        model = whisper.load_model("base")
        result = model.transcribe(input_path) #Run the transcription on the input audio file
        with open(output_path, "w", encoding="utf-8") as f:
            f.write(result["text"]) #Save the transcribed text into a .txt file
        print(f"Transcribed: {output_path}")
        return True
    except Exception as e:
        print(f"Error transcribing {input_path}: {e}")
        return False

In [22]:
def summarize_transcriptions(transcripts_dir):
    for speaker in os.listdir(transcripts_dir):
        speaker_dir = os.path.join(transcripts_dir, speaker)
        if os.path.isdir(speaker_dir):
            files = [f for f in os.listdir(speaker_dir) if f.endswith(".txt")]
            print(f"{speaker}: {len(files)} transcription files")

In [23]:
def main():
    for speaker in os.listdir(PROCESS_DIR):
        speaker_src_dir = os.path.join(PROCESS_DIR, speaker)
        speaker_dst_dir = os.path.join(TRANSCRIPTS_DIR, speaker)
        os.makedirs(speaker_dst_dir, exist_ok=True)
        transcribed_count = 0
        for file in tqdm(os.listdir(speaker_src_dir), desc=f"Transcribing {speaker}"):
            if file.endswith(".wav"):
                src_path = os.path.join(speaker_src_dir, file)
                dst_path = os.path.join(speaker_dst_dir, f"{os.path.splitext(file)[0]}.txt")
                if transcribe_audio(src_path, dst_path):
                    transcribed_count += 1
        print(f"Summary for {speaker}: Transcribed {transcribed_count} files")
    summarize_transcriptions(TRANSCRIPTS_DIR)

if __name__ == "__main__":
    main()

Transcribing Aishwarya Rai:   0%|          | 0/10 [00:00<?, ?it/s]
  0%|                                               | 0.00/139M [00:00<?, ?iB/s][A
  9%|███▍                                   | 12.3M/139M [00:00<00:01, 129MiB/s][A
 21%|████████▏                              | 28.9M/139M [00:00<00:00, 155MiB/s][A
 33%|████████████▉                          | 45.9M/139M [00:00<00:00, 163MiB/s][A
 48%|██████████████████▊                    | 66.9M/139M [00:00<00:00, 185MiB/s][A
 61%|███████████████████████▊               | 84.5M/139M [00:00<00:00, 183MiB/s][A
 75%|█████████████████████████████▉          | 103M/139M [00:00<00:00, 188MiB/s][A
100%|████████████████████████████████████████| 139M/139M [00:00<00:00, 183MiB/s][A
Transcribing Aishwarya Rai:  10%|█         | 1/10 [00:14<02:12, 14.76s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview at Cannes 2018.txt


Transcribing Aishwarya Rai:  20%|██        | 2/10 [00:52<03:46, 28.29s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Exclusive Interview with Aishwarya Rai Bachchan _ Anupama Chopra _ FC at Cannes.txt


Transcribing Aishwarya Rai:  30%|███       | 3/10 [01:05<02:29, 21.36s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Interview at Cannes 2018 (AP).txt


Transcribing Aishwarya Rai:  40%|████      | 4/10 [01:27<02:09, 21.53s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Cannes Film Festival_ Aishwarya Rai Bachchan Talks About Her Best Memories.txt


Transcribing Aishwarya Rai:  50%|█████     | 5/10 [01:38<01:29, 17.87s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Interview at Cannes 2022.txt


Transcribing Aishwarya Rai:  60%|██████    | 6/10 [01:50<01:03, 15.93s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_AISH 1 interview in cannes 2013..txt


Transcribing Aishwarya Rai:  70%|███████   | 7/10 [02:08<00:49, 16.60s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Is The Most Beautiful Person In The World _ David Letterman.txt


Transcribing Aishwarya Rai:  80%|████████  | 8/10 [02:44<00:45, 22.54s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes 2022 _ FC at Cannes _ Film Companion.txt


Transcribing Aishwarya Rai:  90%|█████████ | 9/10 [03:02<00:21, 21.20s/it]

Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Cannes Film Festival 2016 #Cannes2016 www.asianculturevulture.com.txt


Transcribing Aishwarya Rai: 100%|██████████| 10/10 [03:19<00:00, 19.93s/it]


Transcribed: /kaggle/working/transcripts/Aishwarya Rai/Aishwarya Rai_Aishwarya Rai Bachchan Interview with Anupama Chopra _ Cannes Film Festival 2017.txt
Summary for Aishwarya Rai: Transcribed 10 files


Transcribing Ryan Reynolds:  12%|█▎        | 1/8 [00:43<05:05, 43.60s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_UNBELIEVABLE! Blake Lively &amp; Ryan Reynolds Give DELUSIONAL Speech At Time 100 Gala.txt


Transcribing Ryan Reynolds:  25%|██▌       | 2/8 [00:59<02:44, 27.38s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds - You&#39;ll Never Make It Out Alive! (Motivational Speech).txt


Transcribing Ryan Reynolds:  38%|███▊      | 3/8 [01:13<01:46, 21.25s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds Talks Wicked, Wizard Of Oz, Ariana Grande &amp; More At The National Board Of Review Gala.txt


Transcribing Ryan Reynolds:  50%|█████     | 4/8 [01:33<01:22, 20.68s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_Ryan Reynold’s Speech Will Leave You SPEECHLESS — Best Life Advice.txt


Transcribing Ryan Reynolds:  62%|██████▎   | 5/8 [01:45<00:52, 17.59s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_The Best Life Advice From Ryan Reynolds&#39; Speech Will Leave You Speechless (Must Watch).txt


Transcribing Ryan Reynolds:  75%|███████▌  | 6/8 [02:13<00:41, 20.99s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_ENGLISH SPEECH _ RYAN REYNOLDS_ Deadpool Interview (English Subtitles).txt


Transcribing Ryan Reynolds:  88%|████████▊ | 7/8 [02:30<00:19, 19.93s/it]

Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds&#39; Speech NO ONE Wants To Hear — One Of The Most Eye-Opening Speeches.txt


Transcribing Ryan Reynolds: 100%|██████████| 8/8 [02:50<00:00, 21.30s/it]


Transcribed: /kaggle/working/transcripts/Ryan Reynolds/Ryan Reynolds_Ryan Reynolds&#39; Speech at Green Day&#39;s Hollywood Walk of Fame Ceremony.txt
Summary for Ryan Reynolds: Transcribed 8 files


Transcribing Priyanka Chopra:   7%|▋         | 1/14 [00:32<07:00, 32.38s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Empower Each Other (English Subtitles).txt


Transcribing Priyanka Chopra:  14%|█▍        | 2/14 [00:47<04:25, 22.13s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Grow Yourself - Priyanka Chopra inspirational Speech _  Priyanka Chopra Motivation @beginners01.txt


Transcribing Priyanka Chopra:  21%|██▏       | 3/14 [01:00<03:20, 18.23s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_They said Girls are WASTE OF MONEY  _ Priyanka Chopra _ Motivation _ English Speech _ WINNER GIRLS.txt


Transcribing Priyanka Chopra:  29%|██▊       | 4/14 [01:15<02:48, 16.83s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Full Power of Women (English Subtitles).txt


Transcribing Priyanka Chopra:  36%|███▌      | 5/14 [01:39<02:55, 19.52s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ LEARN ENGLISH with PRIYANKA CHOPRA.txt


Transcribing Priyanka Chopra:  43%|████▎     | 6/14 [01:58<02:32, 19.08s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_WATCH THIS EVERYDAY AND CHANGE YOUR LIFE - Priyanka Chopra Motivational Speech 2023.txt


Transcribing Priyanka Chopra:  50%|█████     | 7/14 [02:25<02:33, 21.93s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra’s Speech Will Leave You SPEECHLESS — Best Life Advice.txt


Transcribing Priyanka Chopra:  57%|█████▋    | 8/14 [02:36<01:50, 18.36s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra speech __ with Big Subtitles __ Learn English.txt


Transcribing Priyanka Chopra:  64%|██████▍   | 9/14 [02:54<01:30, 18.06s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Voice for the Voiceless (English Subtitles).txt


Transcribing Priyanka Chopra:  71%|███████▏  | 10/14 [03:20<01:23, 20.75s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra&#39;s Life Advice Will Leave You Speechless _ One of The Most Eye Opening Videos Ever.txt


Transcribing Priyanka Chopra:  79%|███████▊  | 11/14 [03:51<01:11, 23.82s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Stay Quiet After Disrespect , powerful motivational speech by Priyanka Chopra.txt


Transcribing Priyanka Chopra:  86%|████████▌ | 12/14 [04:20<00:50, 25.38s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_PRIYANKA CHOPRA  - Motivational Speech - 12 Rules Of Life.txt


Transcribing Priyanka Chopra:  93%|█████████▎| 13/14 [04:59<00:29, 29.41s/it]

Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_ENGLISH SPEECH _ PRIYANKA CHOPRA_ Be Fearless (English Subtitles).txt


Transcribing Priyanka Chopra: 100%|██████████| 14/14 [05:29<00:00, 23.52s/it]


Transcribed: /kaggle/working/transcripts/Priyanka Chopra/Priyanka Chopra_Priyanka Chopra&#39;s Life Advice Will Change Your Future — One of the Best Motivational Videos Ever.txt
Summary for Priyanka Chopra: Transcribed 14 files


Transcribing Emma Watson:   8%|▊         | 1/13 [00:11<02:17, 11.49s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson Speech for HeForShe IMPACT 10x10x10 Program at World Economic Forum 2015.txt


Transcribing Emma Watson:  15%|█▌        | 2/13 [00:28<02:40, 14.58s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_ENGLISH SPEECH _ EMMA WATSON_ HeForShe Impact (English Subtitles).txt


Transcribing Emma Watson:  23%|██▎       | 3/13 [00:43<02:29, 14.92s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson’s Powerful UN Speech on Gender Equality 🌍 #HeForShe.txt


Transcribing Emma Watson:  31%|███       | 4/13 [02:33<07:53, 52.58s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson to United Nations_ I&#39;m a feminist.txt


Transcribing Emma Watson:  38%|███▊      | 5/13 [02:46<05:06, 38.26s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_ENGLISH SPEECH _ EMMA WATSON_ Find Your Tribe (English Subtitles).txt


Transcribing Emma Watson:  46%|████▌     | 6/13 [03:04<03:38, 31.26s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson&#39;s Speech on Gender Equality _ ENGLISH SPEECH with BIG Subtitles.txt


Transcribing Emma Watson:  54%|█████▍    | 7/13 [06:06<08:03, 80.60s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Learn English with Emma Watson&#39;s Speech on the HeForShe Campaign - English Subtitle.txt


Transcribing Emma Watson:  62%|██████▏   | 8/13 [08:10<07:51, 94.28s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson&#39;s amazing speech at UN.txt


Transcribing Emma Watson:  69%|██████▉   | 9/13 [08:35<04:50, 72.58s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson UN speech - Emma Watson #HeForShe.txt


Transcribing Emma Watson:  77%|███████▋  | 10/13 [09:28<03:19, 66.65s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson&#39;s emotional speech on feminism.txt


Transcribing Emma Watson:  85%|████████▍ | 11/13 [10:29<02:09, 64.81s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson HeForShe Speech at the United Nations _ UN Women 2014.txt


Transcribing Emma Watson:  92%|█████████▏| 12/13 [10:40<00:48, 48.48s/it]

Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_ENGLISH SPEECH _ LEARN ENGLISH with EMMA WATSON.txt


Transcribing Emma Watson: 100%|██████████| 13/13 [10:54<00:00, 50.33s/it]


Transcribed: /kaggle/working/transcripts/Emma Watson/Emma Watson_Emma Watson speech for HeForShe Second Year Anniversary (20_9_16).txt
Summary for Emma Watson: Transcribed 13 files


Transcribing Hrithik Roshan:   7%|▋         | 1/14 [00:12<02:43, 12.60s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_ENGLISH SPEECH _ HRITHIK ROSHAN_ Know Who You Are (English Subtitles).txt


Transcribing Hrithik Roshan:  14%|█▍        | 2/14 [00:32<03:24, 17.01s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan 7 Rules of Success Inspirational Speech _ Motivational Interviews.txt


Transcribing Hrithik Roshan:  21%|██▏       | 3/14 [00:44<02:39, 14.48s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan English Speech With Subtitles _ Give Your Best _ Inspiring Speech _.txt


Transcribing Hrithik Roshan:  29%|██▊       | 4/14 [01:10<03:10, 19.05s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan&#39;s Speech Inspires Million Crowd Talking About An Impossible Dream Hrx Store Launch.txt


Transcribing Hrithik Roshan:  36%|███▌      | 5/14 [01:22<02:30, 16.74s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_English Speech, Motivational Speech _ Hrithik Roshan Inspiration Speech _ English Big Subtitle.txt


Transcribing Hrithik Roshan:  43%|████▎     | 6/14 [01:37<02:09, 16.15s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_ENGLISH SPEECH _ HRITHIK ROSHAN_ Speech to Students GIIS leadership Lecture Series.txt


Transcribing Hrithik Roshan:  50%|█████     | 7/14 [01:47<01:39, 14.16s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_HRITHIK ROSHAN_ Know Who You Are _ English speech _ Motivational speech with english subtitles.txt


Transcribing Hrithik Roshan:  57%|█████▋    | 8/14 [01:59<01:20, 13.33s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_SPEECH IN ENGLISH  _ HRITHIK ROSHAN EVERY ONE CAN BE A SUPER HERO _ BIG SUBTITLES _ 2023.txt


Transcribing Hrithik Roshan:  64%|██████▍   | 9/14 [02:11<01:04, 12.86s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Impeccable English Speeches __ English Speech __ Hrithik Roshan - Speech __ English Subtitles __.txt


Transcribing Hrithik Roshan:  71%|███████▏  | 10/14 [02:22<00:49, 12.31s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan Speech _ IIFA Awards.txt


Transcribing Hrithik Roshan:  79%|███████▊  | 11/14 [02:31<00:33, 11.22s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Every Student &amp; Parent Must Watch - HRITHIK ROSHAN Inspiring Speech.txt


Transcribing Hrithik Roshan:  86%|████████▌ | 12/14 [02:42<00:22, 11.21s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_HRITHIK ROSHAN SPEECH _ Confused A Lot Then Do This One Thing (Speech With English Subtitle).txt


Transcribing Hrithik Roshan:  93%|█████████▎| 13/14 [02:55<00:11, 11.96s/it]

Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Hrithik Roshan Motivational Speech For all Engineering Students _ Bollywood Actor _ Filmkraft __.txt


Transcribing Hrithik Roshan: 100%|██████████| 14/14 [03:06<00:00, 13.34s/it]


Transcribed: /kaggle/working/transcripts/Hrithik Roshan/Hrithik Roshan_Morning Motivation Video For STUDENTS by Hrithik Roshan _ Inspiring Speech.txt
Summary for Hrithik Roshan: Transcribed 14 files


Transcribing Shah Rukh Khan:  10%|█         | 1/10 [00:30<04:36, 30.71s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_&quot;STOP BEING TOO FRIENDLY __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.txt


Transcribing Shah Rukh Khan:  20%|██        | 2/10 [01:13<05:02, 37.83s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_&quot;KILL THAT WEAK VERSION OF YOURSELF __ MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.txt


Transcribing Shah Rukh Khan:  30%|███       | 3/10 [01:51<04:26, 38.10s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_Thoughts on humanity, fame and love _ Shah Rukh Khan _ TED.txt


Transcribing Shah Rukh Khan:  40%|████      | 4/10 [02:12<03:06, 31.14s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan EMOTIONAL Speech _ SRK Success Story Pathaan _ DUNKI _ JAWAN _ Srk 2023 Motivation.txt


Transcribing Shah Rukh Khan:  50%|█████     | 5/10 [02:45<02:39, 31.93s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_ENGLISH SPEECH _ SHAH RUKH KHAN_ Perseverance &amp; Success (English Subtitles).txt


Transcribing Shah Rukh Khan:  60%|██████    | 6/10 [03:03<01:48, 27.03s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_Best of Shah Rukh Khan - Best of all Motivational Eye Opening Speeches _ Let Them Bark.txt


Transcribing Shah Rukh Khan:  70%|███████   | 7/10 [03:37<01:28, 29.47s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_&quot;FOCUS ON YOURSELF AND STAY SILENT __  MOTIVATIONAL SPEECH INSPIRED BY SHAH RUKH KHAN&quot;.txt


Transcribing Shah Rukh Khan:  80%|████████  | 8/10 [04:00<00:54, 27.19s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan, Bollywood Star _ Journal Interview.txt


Transcribing Shah Rukh Khan:  90%|█████████ | 9/10 [04:50<00:34, 34.33s/it]

Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_Shah Rukh Khan Has A &quot;No Kissing&quot; Contract _ Friday Night With Jonathan Ross.txt


Transcribing Shah Rukh Khan: 100%|██████████| 10/10 [05:14<00:00, 31.46s/it]


Transcribed: /kaggle/working/transcripts/Shah Rukh Khan/Shah Rukh Khan_&quot;You have to Cheat with Life&quot; Shah Rukh Khan _ English Speech with English subtitles.txt
Summary for Shah Rukh Khan: Transcribed 10 files


Transcribing Elon Musk:  14%|█▍        | 1/7 [00:10<01:04, 10.70s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_Elon Musk &amp; His Enterprises _ TJ Cheong _ TEDxYouth@IASA.txt


Transcribing Elon Musk:  29%|██▊       | 2/7 [00:29<01:17, 15.58s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_IT WILL GIVE YOU GOOSEBUMPS - Elon Musk Motivational Speech 2022.txt


Transcribing Elon Musk:  43%|████▎     | 3/7 [00:55<01:21, 20.27s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_Elon Musk&#39;s Speech Will Leave You SPEECHLESS _ One of the Most Eye Opening Speeches Ever 2022.txt


Transcribing Elon Musk:  57%|█████▋    | 4/7 [01:18<01:04, 21.43s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_Elon Musk’s Incredible Speech on the Education System _ Eye Opening Video on Education.txt


Transcribing Elon Musk:  71%|███████▏  | 5/7 [01:28<00:34, 17.04s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_Elon Musk Monologue - SNL.txt


Transcribing Elon Musk:  86%|████████▌ | 6/7 [01:56<00:21, 21.08s/it]

Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_ENGLISH SPEECH _ ELON MUSK_ Think Big &amp; Dream Even Bigger (English Subtitles).txt


Transcribing Elon Musk: 100%|██████████| 7/7 [02:13<00:00, 19.01s/it]


Transcribed: /kaggle/working/transcripts/Elon Musk/Elon Musk_Dare to Be Different_ Elon Musk _ Matthew Sotomey _ TEDxRiverHillHS.txt
Summary for Elon Musk: Transcribed 7 files


Transcribing Scarlett Johansson:  11%|█         | 1/9 [00:08<01:06,  8.30s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_Scarlett Johansson - Variety&#39;s Power of Women Full Speech.txt


Transcribing Scarlett Johansson:  22%|██▏       | 2/9 [00:16<00:56,  8.00s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_Election 2012 _ Scarlett Johansson&#39;s Full DNC Speech _ The New York Times.txt


Transcribing Scarlett Johansson:  33%|███▎      | 3/9 [00:43<01:42, 17.09s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON _ ENGLISH SPEECH.txt


Transcribing Scarlett Johansson:  44%|████▍     | 4/9 [00:57<01:19, 15.81s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_Scarlett Johansson&#39;s Speech 2017 Full.txt


Transcribing Scarlett Johansson:  56%|█████▌    | 5/9 [01:15<01:05, 16.47s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_SCARLETT JOHANSSON_ TAKE TIME FOR YOURSELF _ Learn English _ English Speech With Subtitles _ IES.txt


Transcribing Scarlett Johansson:  67%|██████▋   | 6/9 [01:24<00:41, 13.87s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_English Speech, Motivational Speech _ Scarlett Johansson Speech, Women Power _ English Big Subtitle.txt


Transcribing Scarlett Johansson:  78%|███████▊  | 7/9 [01:35<00:26, 13.13s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_scarlett johansson shutting down sexist comments for 5 min straight.txt


Transcribing Scarlett Johansson:  89%|████████▉ | 8/9 [01:52<00:14, 14.15s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_State of the Union Cold Open - SNL.txt


Transcribing Scarlett Johansson: 100%|██████████| 9/9 [02:10<00:00, 14.55s/it]

Transcribed: /kaggle/working/transcripts/Scarlett Johansson/Scarlett Johansson_ENGLISH SPEECH _ SCARLETT JOHANSSON_ Take Time for Yourself (English Subtitles).txt
Summary for Scarlett Johansson: Transcribed 9 files
Aishwarya Rai: 10 transcription files
Ryan Reynolds: 8 transcription files
Priyanka Chopra: 14 transcription files
Emma Watson: 13 transcription files
Hrithik Roshan: 14 transcription files
Shah Rukh Khan: 10 transcription files
Elon Musk: 7 transcription files
Scarlett Johansson: 9 transcription files





In [24]:
PROCESS_DIR = "/kaggle/working/processed_audio"
FILTERED_DIR = "/kaggle/working/filtered_audio"
REPORT_PATH = "/kaggle/working/diarization_report.json"
HUGGINGFACE_TOKEN = user_secrets.get_secret("HUGGINGFACE_TOKEN")
os.makedirs(FILTERED_DIR, exist_ok=True)
MIN_SPEECH_DURATION = 40  # Minimum seconds of speech to keep
MIN_SPEAKER_DOMINANCE = 0.80  # Main speaker must have at least 80% of speaking time

In [25]:
def get_wav_files(root_dir):
    """Recursively find all WAV files in directory tree"""
    return [os.path.join(dirpath, f)
            for dirpath, _, filenames in os.walk(root_dir)
            for f in filenames if f.lower().endswith('.wav')]

In [26]:
def diarize_and_filter(input_path, output_path, pipeline):
    result = {
        "file": os.path.basename(input_path),
        "status": "failed",
        "speakers": {},
        "total_duration": 0,
        "kept_duration": 0,
        "reason": ""
    }
    
    try:
        # Load audio with pydub and get duration
        audio = AudioSegment.from_wav(input_path)
        result["total_duration"] = len(audio) / 1000  # Convert to seconds
        
        # Run speaker diarization using the Hugging Face pipelin
        diarization = pipeline(input_path)
        
        # Analyze speakers
        speaker_segments = {}
        for turn, _, speaker in diarization.itertracks(yield_label=True):
            duration = turn.end - turn.start #duration is time spoken in seconds
            if speaker not in result["speakers"]:
                result["speakers"][speaker] = 0
            result["speakers"][speaker] += duration #tracks how long each speaker talked
            
            start_ms = int(turn.start * 1000)
            end_ms = int(turn.end * 1000)
            segment = audio[start_ms:end_ms] #Extracts the audio for this segment
            
            if speaker not in speaker_segments:
                speaker_segments[speaker] = []
            speaker_segments[speaker].append(segment)
        
        # Decision logic
        if not speaker_segments:
            result["reason"] = "No speech detected"
            return result
            
        # Find dominant speaker
        total_speech = sum(result["speakers"].values())
        main_speaker = max(result["speakers"].items(), key=lambda x: x[1])[0]
        main_ratio = result["speakers"][main_speaker] / total_speech
        
        if (len(speaker_segments) == 1 or 
            (main_ratio >= MIN_SPEAKER_DOMINANCE and 
             result["speakers"][main_speaker] >= MIN_SPEECH_DURATION)):
            
            # Combine segments for main speaker
            combined = sum(speaker_segments[main_speaker])
            result["kept_duration"] = len(combined) / 1000
            
            os.makedirs(os.path.dirname(output_path), exist_ok=True)
            combined.export(output_path, format='wav')
            
            result["status"] = "success"
            result["reason"] = f"Main speaker {main_speaker} had {main_ratio:.1%} of speech"
            return result
            
        else:
            result["reason"] = (f"Rejected - {len(speaker_segments)} speakers detected, "
                             f"main speaker had {main_ratio:.1%} of speech")
            return result
            
    except Exception as e:
        result["reason"] = f"Processing error: {str(e)}"
        return result

In [27]:
def main():
    # Initialize
    os.makedirs(FILTERED_DIR, exist_ok=True)
    report = {
        "config": {
            "min_speech_duration": MIN_SPEECH_DURATION,
            "min_speaker_dominance": MIN_SPEAKER_DOMINANCE
        },
        "results": []
    }

    # Get files
    wav_files = get_wav_files(PROCESS_DIR)
    if not wav_files:
        print(f"No WAV files found in {PROCESS_DIR}")
        return
    
    print(f"Found {len(wav_files)} WAV files to process")

    # Initialize pipeline
    try:
        pipeline = Pipeline.from_pretrained(
            "pyannote/speaker-diarization-3.1",
            use_auth_token=HUGGINGFACE_TOKEN
        )
        pipeline.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    except Exception as e:
        print(f"Pipeline initialization failed: {e}")
        return

    # Process files
    success_count = 0
    with tqdm(wav_files, desc="Processing") as pbar:
        for input_path in pbar:
            relative_path = os.path.relpath(input_path, PROCESS_DIR)
            output_path = os.path.join(FILTERED_DIR, relative_path)
            
            result = diarize_and_filter(input_path, output_path, pipeline)
            report["results"].append(result)
            
            if result["status"] == "success":
                success_count += 1
                pbar.set_postfix(success=success_count)

    # Save report
    with open(REPORT_PATH, 'w') as f:
        json.dump(report, f, indent=2)

    # Print summary
    total_hours = sum(r["kept_duration"] for r in report["results"]) / 3600
    print(f"\nCompleted: {success_count}/{len(wav_files)} files processed successfully")
    print(f"Total filtered audio: {total_hours:.2f} hours")
    print(f"Detailed report saved to: {REPORT_PATH}")

if __name__ == "__main__":
    main()

Found 85 WAV files to process


config.yaml:   0%|          | 0.00/469 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/5.91M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/399 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/26.6M [00:00<?, ?B/s]

config.yaml:   0%|          | 0.00/221 [00:00<?, ?B/s]

Processing: 100%|██████████| 85/85 [17:40<00:00, 12.48s/it, success=69]


Completed: 69/85 files processed successfully
Total filtered audio: 7.41 hours
Detailed report saved to: /kaggle/working/diarization_report.json





In [28]:
def summarize_diarize_data(FILTERED_DIR):
    for speaker in os.listdir(FILTERED_DIR):
        speaker_dir = os.path.join(FILTERED_DIR, speaker)
        total_minutes = 0
        num_files = 0
        for f in os.listdir(speaker_dir):
            if f.endswith(".wav"):
                file_path = os.path.join(speaker_dir, f)
                duration = librosa.get_duration(path=file_path)
                total_minutes += duration / 60
                num_files += 1
        print(f"{speaker}: {num_files} files, {total_minutes:.2f} minutes")

summarize_diarize_data(FILTERED_DIR)

Aishwarya Rai: 5 files, 41.29 minutes
Ryan Reynolds: 8 files, 50.85 minutes
Priyanka Chopra: 13 files, 98.99 minutes
Emma Watson: 12 files, 71.55 minutes
Hrithik Roshan: 12 files, 41.11 minutes
Shah Rukh Khan: 7 files, 67.41 minutes
Elon Musk: 7 files, 46.81 minutes
Scarlett Johansson: 5 files, 26.40 minutes


In [29]:
zip_path = "/kaggle/working/filtered_audio"
shutil.make_archive(zip_path, "zip", FILTERED_DIR)
display(FileLink('filtered_audio.zip')) 

In [30]:
abc  

NameError: name 'abc' is not defined

In [31]:
FILTERED_DIR = "/kaggle/working/filtered_audio"  # Input directory with filtered WAV files
PREPROCESSED_DIR = "/kaggle/working/preprocessed_audio"  # Output directory
os.makedirs(PREPROCESSED_DIR, exist_ok=True)

In [32]:
# Preprocessing parameters
SAMPLE_RATE = 22050

def process_audio(path, output_dir, sample_rate=SAMPLE_RATE):
    """Simplified audio processing for voice cloning."""
    try:
        # Load audio
        audio, sr = librosa.load(path, sr=sample_rate, mono=True)
        
        # Skip empty or very short files
        if len(audio) < sample_rate * 0.5:  # Less than 0.5 seconds
            print(f"Skipping {path}: Too short")
            return None
        
        # Trim silence
        trimmed, _ = librosa.effects.trim(audio, top_db=20)  # Less aggressive trimming
        
        # Normalize volume
        trimmed = librosa.util.normalize(trimmed)
        
        # Save processed audio
        output_path = os.path.join(output_dir, os.path.basename(path))
        sf.write(output_path, trimmed, sample_rate)
        return output_path
    
    except Exception as e:
        print(f"Error processing {path}: {e}")
        return None

In [33]:
def main():
    # Process each speaker's audio
    for speaker in tqdm(os.listdir(FILTERED_DIR), desc="Processing Speakers"):
        speaker_dir = os.path.join(FILTERED_DIR, speaker)
        output_speaker_dir = os.path.join(PREPROCESSED_DIR, speaker)
        os.makedirs(output_speaker_dir, exist_ok=True)
        
        if not os.path.isdir(speaker_dir):
            continue
            
        for file in tqdm(os.listdir(speaker_dir), desc=speaker, leave=False):
            if file.endswith(".wav"):
                input_path = os.path.join(speaker_dir, file)
                process_audio(input_path, output_speaker_dir)
    
    # Summarize results
    print("\nPreprocessed Dataset Summary:")
    for speaker in os.listdir(PREPROCESSED_DIR):
        speaker_dir = os.path.join(PREPROCESSED_DIR, speaker)
        if not os.path.isdir(speaker_dir):
            continue
        files = [f for f in os.listdir(speaker_dir) if f.endswith(".wav")]
        total_duration = sum(librosa.get_duration(path=os.path.join(speaker_dir, f)) for f in files)
        print(f"{speaker}: {len(files)} files, {total_duration / 60:.2f} minutes")
    
    # Create zip file
    zip_path = "/kaggle/working/preprocessed_audio"
    shutil.make_archive(zip_path, "zip", PREPROCESSED_DIR)
    display(FileLink('preprocessed_audio.zip'))

if __name__ == "__main__":
    main()

Processing Speakers:   0%|          | 0/8 [00:00<?, ?it/s]
Aishwarya Rai:   0%|          | 0/5 [00:00<?, ?it/s][A
Aishwarya Rai:  20%|██        | 1/5 [00:01<00:04,  1.22s/it][A
Aishwarya Rai:  40%|████      | 2/5 [00:01<00:01,  1.52it/s][A
Aishwarya Rai:  60%|██████    | 3/5 [00:01<00:01,  1.96it/s][A
Aishwarya Rai:  80%|████████  | 4/5 [00:02<00:00,  1.56it/s][A
Aishwarya Rai: 100%|██████████| 5/5 [00:03<00:00,  1.80it/s][A
Processing Speakers:  12%|█▎        | 1/8 [00:03<00:21,  3.07s/it]
Ryan Reynolds:   0%|          | 0/8 [00:00<?, ?it/s][A
Ryan Reynolds:  12%|█▎        | 1/8 [00:00<00:06,  1.07it/s][A
Ryan Reynolds:  25%|██▌       | 2/8 [00:01<00:03,  1.82it/s][A
Ryan Reynolds:  38%|███▊      | 3/8 [00:01<00:02,  2.24it/s][A
Ryan Reynolds:  50%|█████     | 4/8 [00:01<00:01,  2.27it/s][A
Ryan Reynolds:  62%|██████▎   | 5/8 [00:02<00:01,  2.77it/s][A
Ryan Reynolds:  75%|███████▌  | 6/8 [00:02<00:00,  2.54it/s][A
Ryan Reynolds:  88%|████████▊ | 7/8 [00:02<00:00,  2.76it/


Preprocessed Dataset Summary:
Aishwarya Rai: 5 files, 41.25 minutes
Ryan Reynolds: 8 files, 50.83 minutes
Priyanka Chopra: 13 files, 98.95 minutes
Emma Watson: 12 files, 71.54 minutes
Hrithik Roshan: 12 files, 41.10 minutes
Shah Rukh Khan: 7 files, 67.40 minutes
Elon Musk: 7 files, 46.79 minutes
Scarlett Johansson: 5 files, 26.38 minutes


In [34]:
audio_root = "/kaggle/working/preprocessed_audio"
transcript_root = "/kaggle/working/transcripts"

# Track missing transcripts
missing_transcripts = []

# Match and copy/generate .txt files
for speaker in os.listdir(audio_root):
    speaker_audio_path = os.path.join(audio_root, speaker)
    speaker_transcript_path = os.path.join(transcript_root, speaker)

    if not os.path.isdir(speaker_audio_path):
        continue

    for audio_file in os.listdir(speaker_audio_path):
        if audio_file.endswith(".wav"):
            base_name = os.path.splitext(audio_file)[0]
            transcript_file = base_name + ".txt"
            source_transcript_path = os.path.join(speaker_transcript_path, transcript_file)
            target_transcript_path = os.path.join(speaker_audio_path, transcript_file)

            if os.path.exists(source_transcript_path):
                shutil.copy(source_transcript_path, target_transcript_path)
            else:
                # Log missing transcript
                missing_transcripts.append(target_transcript_path)
                # Create dummy transcript
                with open(target_transcript_path, "w", encoding="utf-8") as f:
                    f.write("This is a placeholder transcript for voice cloning purposes.")

# Show a preview of missing transcripts handled
missing_transcripts[:5]

[]

In [35]:
!pip install resemblyzer

Collecting resemblyzer
  Downloading Resemblyzer-0.1.4-py3-none-any.whl.metadata (5.8 kB)
Collecting webrtcvad>=2.0.10 (from resemblyzer)
  Downloading webrtcvad-2.0.10.tar.gz (66 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m66.2/66.2 kB[0m [31m2.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting typing (from resemblyzer)
  Downloading typing-3.7.4.3.tar.gz (78 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m78.6/78.6 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading Resemblyzer-0.1.4-py3-none-any.whl (15.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.7/15.7 MB[0m [31m91.0 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hBuilding wheels for collected packages: webrtcvad, typing
  Building wheel for webrtcvad (setup.py) ... [?25l[?25hdone
  Created wheel for webrtcvad: filename=webrtcvad-2.0.1

In [36]:
from resemblyzer import VoiceEncoder, preprocess_wav

In [37]:
encoder = VoiceEncoder()

voice_root = "/kaggle/working/preprocessed_audio"
output_embeddings = {}

for speaker in os.listdir(voice_root):
    speaker_path = os.path.join(voice_root, speaker)
    if not os.path.isdir(speaker_path):
        continue

    speaker_embeddings = []
    for file in os.listdir(speaker_path):
        if file.endswith(".wav"):
            wav_fpath = os.path.join(speaker_path, file)
            try:
                wav = preprocess_wav(wav_fpath)
                embed = encoder.embed_utterance(wav)
                speaker_embeddings.append(embed)
            except Exception as e:
                print(f"Error processing {file}: {e}")

    if speaker_embeddings:
        # Mean embedding for the speaker
        mean_embedding = np.mean(speaker_embeddings, axis=0)
        output_embeddings[speaker] = mean_embedding
        print(f"Embedding generated for: {speaker}")
    else:
        print(f"No embeddings created for: {speaker}")

Loaded the voice encoder model on cuda in 0.02 seconds.
Embedding generated for: Aishwarya Rai
Embedding generated for: Ryan Reynolds
Embedding generated for: Priyanka Chopra
Embedding generated for: Emma Watson
Embedding generated for: Hrithik Roshan
Embedding generated for: Shah Rukh Khan
Embedding generated for: Elon Musk
Embedding generated for: Scarlett Johansson


In [38]:
pip install TTS --upgrade

Collecting TTS
  Downloading TTS-0.22.0-cp311-cp311-manylinux1_x86_64.whl.metadata (21 kB)
Collecting scikit-learn>=1.3.0 (from TTS)
  Downloading scikit_learn-1.7.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (17 kB)
Collecting anyascii>=0.3.0 (from TTS)
  Downloading anyascii-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting pysbd>=0.3.4 (from TTS)
  Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting pandas<2.0,>=1.4 (from TTS)
  Downloading pandas-1.5.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting trainer>=0.0.32 (from TTS)
  Downloading trainer-0.0.36-py3-none-any.whl.metadata (8.1 kB)
Collecting coqpit>=0.0.16 (from TTS)
  Downloading coqpit-0.0.17-py3-none-any.whl.metadata (11 kB)
Collecting pypinyin (from TTS)
  Downloading pypinyin-0.54.0-py2.py3-none-any.whl.metadata (12 kB)
Collecting hangul-romanize (from TTS)
  Downloading hangul_romanize-0.1.0-py3-none-any.whl.metadata (1.2 kB)
Collectin

In [39]:
import torch

# Save the original torch.load function
_original_torch_load = torch.load

# Monkey-patch torch.load to always use weights_only=False
def patched_torch_load(*args, **kwargs):
    kwargs['weights_only'] = False
    return _original_torch_load(*args, **kwargs)

torch.load = patched_torch_load

In [40]:
from TTS.api import TTS

tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")

 > You must confirm the following:
 | > "I have purchased a commercial license from Coqui: licensing@coqui.ai"
 | > "Otherwise, I agree to the terms of the non-commercial CPML: https://coqui.ai/cpml" - [y/n]


 | | >  y


 > Downloading model to /root/.local/share/tts/tts_models--multilingual--multi-dataset--xtts_v2


100%|█████████▉| 1.86G/1.87G [00:17<00:00, 108MiB/s] 
100%|██████████| 1.87G/1.87G [00:17<00:00, 107MiB/s]
100%|██████████| 4.37k/4.37k [00:00<00:00, 27.3kiB/s]

100%|██████████| 361k/361k [00:00<00:00, 1.61MiB/s]
100%|██████████| 32.0/32.0 [00:00<00:00, 153iB/s]


 > Model's license - CPML
 > Check https://coqui.ai/cpml.txt for more info.


2025-06-06 10:47:31.097853: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749206851.276606      35 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749206851.327261      35 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


 > Using model: xtts


GPT2InferenceModel has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.


In [44]:
text = "Time is tricky. You have whole months, even years, when nothing changes a speck, when you don’t go anywhere or do anything or think one new thought. And then you can get hit with a day or an hour, or half a second, when so much happens its almost like you are born all over again into some brand-new person you for damn sure never expected to meet."
output_dir = "/kaggle/working/synthesized_audio"
os.makedirs(output_dir, exist_ok=True)

for speaker in os.listdir("/kaggle/working/preprocessed_audio"):
    speaker_dir = os.path.join("/kaggle/working/preprocessed_audio", speaker)
    if os.path.isdir(speaker_dir):
        wav_file = next((f for f in os.listdir(speaker_dir) if f.endswith(".wav")), None)
        if wav_file:
            speaker_wav = os.path.join(speaker_dir, wav_file)
            output_path = os.path.join(output_dir, f"{speaker}_test.wav")
            tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="en", file_path=output_path)
            print(f"Synthesized audio for {speaker}: {output_path}")

 > Text splitted to sentences.
['Time is tricky.', 'You have whole months, even years, when nothing changes a speck, when you don’t go anywhere or do anything or think one new thought.', 'And then you can get hit with a day or an hour, or half a second, when so much happens its almost like you are born all over again into some brand-new person you for damn sure never expected to meet.']
 > Processing time: 141.0643768310547
 > Real-time factor: 4.519875016165567
Synthesized audio for Aishwarya Rai: /kaggle/working/synthesized_audio/Aishwarya Rai_test.wav
 > Text splitted to sentences.
['Time is tricky.', 'You have whole months, even years, when nothing changes a speck, when you don’t go anywhere or do anything or think one new thought.', 'And then you can get hit with a day or an hour, or half a second, when so much happens its almost like you are born all over again into some brand-new person you for damn sure never expected to meet.']
 > Processing time: 95.88901257514954
 > Real-tim

In [45]:
encoder = VoiceEncoder()
for speaker in os.listdir("/kaggle/working/preprocessed_audio"):
    speaker_dir = os.path.join("/kaggle/working/preprocessed_audio", speaker)
    wav_file = next((f for f in os.listdir(speaker_dir) if f.endswith(".wav")), None)
    if wav_file:
        ref_wav_path = os.path.join(speaker_dir, wav_file)
        synth_wav_path = os.path.join("/kaggle/working/synthesized_audio", f"{speaker}_test.wav")
        ref_wav = preprocess_wav(ref_wav_path)
        synth_wav = preprocess_wav(synth_wav_path)
        ref_embed = encoder.embed_utterance(ref_wav)
        synth_embed = encoder.embed_utterance(synth_wav)
        similarity = np.dot(ref_embed, synth_embed) / (np.linalg.norm(ref_embed) * np.linalg.norm(synth_embed))
        print(f"Cosine similarity for {speaker}: {similarity:.3f}")

Loaded the voice encoder model on cuda in 0.01 seconds.
Cosine similarity for Aishwarya Rai: 0.871
Cosine similarity for Ryan Reynolds: 0.908
Cosine similarity for Priyanka Chopra: 0.868
Cosine similarity for Emma Watson: 0.811
Cosine similarity for Hrithik Roshan: 0.839
Cosine similarity for Shah Rukh Khan: 0.906
Cosine similarity for Elon Musk: 0.859
Cosine similarity for Scarlett Johansson: 0.910


In [None]:
zip_path = "/kaggle/working/synthesized_audio"
shutil.make_archive(zip_path, "zip", output_dir)
display(FileLink('synthesized_audio.zip'))

In [46]:
pip install gradio

Collecting gradio
  Downloading gradio-5.33.0-py3-none-any.whl.metadata (16 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.6.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.10.2 (from gradio)
  Downloading gradio_client-1.10.2-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.13-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (from gradio)
  Downloading safehttpx-0.1.6-py3-none-any.whl.metadata (4.2 kB)
Collecting semantic-version~=2.0 (from gradio)
  Downloading semantic_version-2.10.0-py2.py3-none-any.whl.metadata (9.7 kB)
Co

In [47]:
import uuid
import gradio as gr
from TTS.api import TTS

# Initialize TTS model
tts = TTS(model_name="tts_models/multilingual/multi-dataset/xtts_v2")

# Speaker reference WAVs directory
speaker_dir = "/kaggle/working/preprocessed_audio"
speakers = sorted(os.listdir(speaker_dir))

# Function to generate speech
def generate_speech(speaker_name, text):
    speaker_path = os.path.join(speaker_dir, speaker_name)
    wav_file = next((f for f in os.listdir(speaker_path) if f.endswith(".wav")), None)
    
    if not wav_file:
        return "No reference audio found.", None

    speaker_wav = os.path.join(speaker_path, wav_file)
    output_path = f"/kaggle/working/synthesized_audio/{uuid.uuid4().hex}.wav"
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    
    # Generate speech
    tts.tts_to_file(text=text, speaker_wav=speaker_wav, language="en", file_path=output_path)
    
    return f"Generated voice for: {speaker_name}", output_path

# Build Gradio interface
demo = gr.Interface(
    fn=generate_speech,
    inputs=[
        gr.Dropdown(choices=speakers, label="Choose a Speaker"),
        gr.Textbox(label="Enter Text to Synthesize", lines=4, placeholder="Type something here...")
    ],
    outputs=[
        gr.Textbox(label="Status"),
        gr.Audio(label="Generated Audio")
    ],
    title="Voice Cloning",
    description="Choose a speaker and enter text to synthesize a cloned voice."
)

# Launch the app
demo.launch(share=True)

 > tts_models/multilingual/multi-dataset/xtts_v2 is already downloaded.
 > Using model: xtts
* Running on local URL:  http://127.0.0.1:7860
* Running on public URL: https://f1d7165a24a2a8d0c7.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




 > Text splitted to sentences.
['Life is like riding a bicycle.', 'To keep your balance, you must keep moving.']
 > Processing time: 24.114545583724976
 > Real-time factor: 4.3532693388225026
 > Text splitted to sentences.
['Life is like riding a bicycle.', 'To keep your balance, you must keep moving.']
 > Processing time: 26.83378291130066
 > Real-time factor: 4.359857000075008
 > Text splitted to sentences.
['Life is like riding a bicycle.', 'To keep your balance, you must keep moving.']
 > Processing time: 22.636836051940918
 > Real-time factor: 4.23748841131229
 > Text splitted to sentences.
['Life is like riding a bicycle.', 'To keep your balance, you must keep moving.']
 > Processing time: 21.002528429031372
 > Real-time factor: 3.7212791837566033
 > Text splitted to sentences.
['Life is like riding a bicycle.', 'To keep your balance, you must keep moving.']
 > Processing time: 20.199196338653564
 > Real-time factor: 3.9351169711912557
 > Text splitted to sentences.
['Life is lik