In [1]:
#  Level 1 - Basic Profanity Censorship

import pandas as pd
import string
import re

# --- Load the CSV file ---
profanity_df = pd.read_csv("profanity.csv")

# --- Normalize columns ---
profanity_df["text"] = profanity_df["text"].str.lower().str.strip()
profanity_df["severity_description"] = profanity_df["severity_description"].str.lower().str.strip()

# --- Create sets of swear words ---
low_swear_words = set(profanity_df[profanity_df["severity_description"].isin(["mild", "strong", "severe"])]["text"])
mid_swear_words = set(profanity_df[profanity_df["severity_description"].isin(["strong", "severe"])]["text"])
strong_swear_words = set(profanity_df[profanity_df["severity_description"] == "severe"]["text"])

# --- Choose which level to censor ---
swear_words = low_swear_words  # Change this to low_swear_words or strong_swear_words as needed

# --- Sample text ---
sample_text = """
I have had it with these motherfucking snakes on this motherfucking plane!
"""

# --- Define censoring function ---
def censor_swear_words(text):
    def censor_match(match):
        word = match.group()
        clean_word = re.sub(rf"[{string.punctuation}]", "", word).lower()
        if clean_word in swear_words:
            return re.sub(r"[A-Za-z]", "*", word)
        return word

    # Replace while preserving punctuation
    return re.sub(r"\b[\w'-]+\b", censor_match, text)

# --- Run censoring ---
censored_text = censor_swear_words(sample_text)

# --- Display results ---
print("Original Text:", sample_text.strip())
print("Censored Text:", censored_text.strip())


Original Text: I have had it with these motherfucking snakes on this motherfucking plane!
Censored Text: I have had it with these ************* snakes on this ************* plane!


In [4]:
# level 2 - Video Transcription with Profanity Censorship

import os,whisper

# --- Config ---
video_path = r"C:\Users\jjhip\WorkSpace\AI-Video-Censorship\Whiplash.mp4"
Video_Name = os.path.splitext(os.path.basename(video_path))[0]
model_size = "base"
swear_words.update({"lipdick", "limpdick", "faggotlip", "limpdicks"})


# --- Load Whisper and transcribe ---
model = whisper.load_model(model_size)
result = model.transcribe(video_path)
text = result["text"].strip()

# --- Save transcription ---
with open(f"{Video_Name}_transcription.txt", "w", encoding="utf-8") as f:
    f.write(text)
print("Transcription Preview:\n", text, "\n")

# --- Censor transcription ---
censored_text = censor_swear_words(text)
with open(f"{Video_Name}_censored_transcription.txt", "w", encoding="utf-8") as f:
    f.write(censored_text)
print("Censored Transcription Preview:\n", censored_text)



Transcription Preview:
 Yes. Then why the fuck didn't you say so? I've carried your fat ass for too long, man. So I'm not going to have you cost us a competition because your mind's on a fucking happy meal instead of on pitch. You are a worthless, friendless, faggot-lip little piece of shit whose mommy left daddy when she figured out he wasn't Eugene O'Neill and who was now weeping and slapping all over my drum set and fucking nine-year-old girl. A bunch of fucking limp-dicks sour-note flattered on their girlfriends, flexible tempo dipshits. You got ten minutes, you fucking pathetic pansy ass, fruit fuck. We will stay here for as long as it takes until one of you faggot can play in time. There's enough cock suckers! Hurry the fuck up. You hear me cock suckers? You better start shitting me perfect four-hundreds. Fucking you're looking for there's no pot of gold down there. Adjusting the seat really? That's been your fucking problem the whole time, the seat height. So now you have it, ri

In [None]:

#  Level 3 - Iterative Video Profanity Censorship 

import os, re, whisper, subprocess, tempfile, shutil

# --- Config ---
video_path = r"C:\Users\jjhip\WorkSpace\AI-Video-Censorship\Snakes.mp4"
bleep_path = r"C:\Users\jjhip\WorkSpace\AI-Video-Censorship\Deep.mp3"  # Path to deep beep sound
output_final = os.path.splitext(video_path)[0] + "_clean.mp4"
model_size = "small"  # small model for word-level timestamps

# --- Parameters ---
MAX_PASSES = 10
PAD = 0.15  # 150ms padding around each word
MIN_PROB = 0.8  # Minimum confidence

# --- Load Whisper ---
print("🎧 Loading Whisper model...")
model = whisper.load_model(model_size)

# --- Function: censor a single pass ---
def censor_pass(input_video, output_video):
    result = model.transcribe(input_video, word_timestamps=True)
    mute_intervals = []

    for segment in result["segments"]:
        if "words" not in segment:
            continue
        for w in segment["words"]:
            clean = re.sub(r"[^a-z]", "", w["word"].lower())
            if clean in swear_words and w.get("prob", 1.0) > MIN_PROB:
                start = max(0, w["start"] - PAD)
                end = w["end"] + PAD
                mute_intervals.append((start, end))
                print(f"💢 Found profanity: '{clean}' {start:.2f}-{end:.2f}s")

    if not mute_intervals:
        shutil.copy(input_video, output_video)
        return 0

    # --- Build FFmpeg filter ---
    volume_filter = " + ".join([f"between(t,{s},{e})" for s, e in mute_intervals])
    filters = [f"[0:a]volume=enable='{volume_filter}':volume=0[base]"]

    overlay_parts = []
    for i, (s, e) in enumerate(mute_intervals):
        dur = e - s
        delay_ms = int(s * 1000)
        overlay_parts.append(
            f"[1:a]atrim=0:{dur},adelay={delay_ms}|{delay_ms},volume=1[a{i}]"
        )

    mix_inputs = "[base]" + "".join([f"[a{i}]" for i in range(len(mute_intervals))])
    filters += overlay_parts
    filters.append(f"{mix_inputs}amix=inputs={len(mute_intervals)+1}:duration=longest[aout]")

    filter_str = ";".join(filters)

    cmd = [
        "ffmpeg", "-y",
        "-i", input_video,
        "-i", bleep_path,
        "-filter_complex", filter_str,
        "-map", "0:v", "-map", "[aout]",
        "-c:v", "copy", "-c:a", "aac", output_video
    ]
    subprocess.run(cmd, check=True)
    return len(mute_intervals)

# --- Iterative loop ---
passes = 0
current_input = video_path
tmp_dir = tempfile.mkdtemp()

print("\n🚀 Starting iterative censorship...\n")

while passes < MAX_PASSES:
    passes += 1
    temp_output = os.path.join(tmp_dir, f"pass_{passes}.mp4")
    print(f"\n🔁 Pass {passes}: scanning & censoring...")
    hits = censor_pass(current_input, temp_output)

    if hits == 0:
        print(f"\n✅ Video is clean after {passes} pass(es)!")
        shutil.copy(temp_output if passes > 1 else current_input, output_final)
        break
    else:
        print(f"⚙️ {hits} profanities censored; re-checking...")
        current_input = temp_output
else:
    print(f"\n⚠️ Max passes ({MAX_PASSES}) reached; stopping.")
    shutil.copy(current_input, output_final)

# --- Cleanup ---
shutil.rmtree(tmp_dir)
print(f"\n✅ Final clean video saved to:\n{output_final}")


🎧 Loading Whisper model...

🚀 Starting iterative censorship...


🔁 Pass 1: scanning & censoring...




💢 Found profanity: 'motherfucking' 3.27-4.35s
💢 Found profanity: 'motherfucking' 5.37-6.55s
💢 Found profanity: 'fucking' 10.55-11.09s
⚙️ 3 profanities censored; re-checking...

🔁 Pass 2: scanning & censoring...

✅ Video is clean after 2 pass(es)!

✅ Final clean video saved to:
C:\Users\jjhip\WorkSpace\AI-Video-Censorship\Snakes_clean.mp4
