In [None]:
# --- Install dependencies ---
!pip install -q git+https://github.com/openai/whisper.git
!pip install -q pydub better_profanity ffmpeg-python
!apt -q update && apt install -y ffmpeg

# --- Imports ---
import whisper
from pydub import AudioSegment
from better_profanity import profanity
import ffmpeg
import os
from google.colab import files

# --- Upload multiple files ---
uploaded = files.upload()  # Upload 1–∞ MP4s

# --- Load model once ---
model = whisper.load_model("base")
profanity.load_censor_words()

# --- Process each video ---
for filename in uploaded.keys():
    base_name = os.path.splitext(filename)[0]
    audio_output = f"{base_name}_audio.wav"
    transcript_file = f"{base_name}_censored_transcript.txt"
    output_video = f"{base_name}_censored.mp4"

    print(f"\n🎬 Processing: {filename}")

    # Extract audio as mono WAV 16kHz
    ffmpeg.input(filename).output(audio_output, ac=1, ar='16000').run(overwrite_output=True)

    # Transcribe with word timestamps
    result = model.transcribe(audio_output, word_timestamps=True, verbose=False)
    transcript = result["text"]

    # Save and censor transcript
    censored_text = profanity.censor(transcript)
    with open(transcript_file, "w") as f:
        f.write(censored_text)

    # Detect profanity mute ranges
    mute_ranges = []
    for segment in result['segments']:
        for word in segment.get("words", []):
            if profanity.contains_profanity(word['word'].strip().lower()):
                mute_ranges.append((word['start'], word['end']))

    # Build volume filter
    def generate_combined_volume_filter(ranges):
        if not ranges:
            return None
        conditions = [f"between(t,{start},{end})" for start, end in ranges]
        return f"volume=enable='{'+'.join(conditions)}':volume=0"

    volume_filter = generate_combined_volume_filter(mute_ranges)

    if volume_filter:
        (
            ffmpeg
            .input(filename)
            .output(output_video, af=volume_filter, vcodec='copy', acodec='aac')
            .run(overwrite_output=True)
        )
    else:
        # No profanity, just copy original
        ffmpeg.input(filename).output(output_video, vcodec='copy', acodec='copy').run(overwrite_output=True)

    # Offer downloads
    files.download(transcript_file)
    files.download(output_video)

print("\n✨ All videos processed and purified.")


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Get:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Hit:3 http://security.ubuntu.com/ubuntu jammy-security InRelease
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Hit:5 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:6 http://archive.ubuntu.com/ubuntu jammy-updates InRelease
Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Fetched 3,632 B in 1s (2,658 B/s)
Reading package lists...
Building dependency tree...
Reading state informa

KeyboardInterrupt: 