In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
!pip install moviepy
!pip install git+https://github.com/openai/whisper.git

Collecting git+https://github.com/openai/whisper.git
  Cloning https://github.com/openai/whisper.git to /tmp/pip-req-build-51uclz5o
  Running command git clone --filter=blob:none --quiet https://github.com/openai/whisper.git /tmp/pip-req-build-51uclz5o
  Resolved https://github.com/openai/whisper.git to commit ba3f3cd54b0e5b8ce1ab3de13e32122d0d5f98ab
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


In [3]:
import joblib
hate_speech_model_path = '/content/drive/MyDrive/insta-reel/lr_pipeline.joblib'
hate_speech_model = joblib.load(hate_speech_model_path)
hate_speech_model

In [4]:
import numpy as np
def predict_hate_speech(text):
    pred = hate_speech_model.predict([text])[0]
    prob = round(np.max(hate_speech_model.predict_proba([text])[0]),2)
    return pred,prob

In [5]:
text = 'i am very angry'
pred,prob = predict_hate_speech(text)
pred,prob

('neither', 0.49)

In [6]:
import tensorflow as tf
cls_model_path = '/content/drive/MyDrive/insta-reel/xception_checkpoint.keras'
cls_model = tf.keras.models.load_model(cls_model_path)
cls_model

<keras.src.engine.sequential.Sequential at 0x7c3c652cf490>

In [7]:
from PIL import Image
cls_label = {'not_smoking': 0, 'smoking': 1}
label = list(cls_label.keys())

def predict_smoking(img_path):
    img = Image.open(img_path)
    resized_img = img.resize((299, 299))
    img = np.asarray(resized_img)
    img = np.expand_dims(img, axis=0)
    img = img / 255
    predictions = cls_model.predict(img)
    return label[np.argmax(predictions)],round(np.max(predictions),2)

In [8]:
img_path = '/content/drive/MyDrive/insta-reel/sample_vid_data/images.jpeg'
pred,prob = predict_smoking(img_path)
pred,prob



('smoking', 0.95)

In [9]:
import cv2
import numpy as np
from tqdm import tqdm

def predict_smoking_video_1(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    predictions_dict = {}

    # Get total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Set the frame skip interval
    frame_skip_interval = 30  # Change this value as needed for performance

    # tqdm progress bar
    pbar = tqdm(total=total_frames)
    pred = 'not smoking'
    predictions = 1
    while True:
        # Read the frame
        ret, frame = cap.read()
        if not ret:
            break

        # Only process every nth frame
        if frame_count % frame_skip_interval == 0:
            resized_img = cv2.resize(frame, (299, 299))
            img = np.asarray(resized_img)
            img = np.expand_dims(img, axis=0)
            # print(img.shape)

            img = img / 255
            predictions = cls_model.predict(img)

            if np.argmax(predictions) == 1:
                if np.max(predictions) >= 0.9:
                    pred = 'smoking'
                    break

        frame_count += 1
        pbar.update(1)

    return pred, round(np.max(predictions), 2)


In [10]:
import cv2
import numpy as np
from tqdm import tqdm

def predict_smoking_video_2(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    predictions_dict = {}

    # Get total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the interval between frames to select 20 frames
    interval = total_frames // 30
    selected_frames = [i * interval for i in range(20)]

    # tqdm progress bar
    pbar = tqdm(total=20)
    pred = 'not smoking'
    predictions = 1
    while frame_count < total_frames:
        ret = cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
        if not ret:
            break

        ret, frame = cap.read()
        if not ret:
            break

        if frame_count in selected_frames:
            resized_img = cv2.resize(frame, (299, 299))
            img = np.asarray(resized_img)
            img = np.expand_dims(img, axis=0)

            img = img / 255
            predictions = cls_model.predict(img)

            if np.argmax(predictions) == 1 and np.max(predictions) >= 0.9:
                pred = 'smoking'
                break

            pbar.update(1)

        frame_count += 1

    return pred, round(np.max(predictions), 2)

In [11]:
import cv2
import numpy as np
from tqdm import tqdm

def predict_smoking_video_1(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0
    predictions_dict = {}

    # Get total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Get frames per second (FPS)
    fps = cap.get(cv2.CAP_PROP_FPS)

    # Calculate the number of frames to skip to have one frame per second
    frames_to_skip = int(round(fps))

    # tqdm progress bar
    pbar = tqdm(total=total_frames)
    pred = 'not smoking'
    predictions = 1
    while True:
        # Skip frames until one frame per second
        for _ in range(frames_to_skip):
            ret, frame = cap.read()
            if not ret:
                break
            frame_count += 1
            pbar.update(1)

        if not ret:
            break

        resized_img = cv2.resize(frame, (299, 299))
        img = np.asarray(resized_img)
        img = np.expand_dims(img, axis=0)
        # print(img.shape)

        img = img / 255
        predictions = cls_model.predict(img)

        if np.argmax(predictions) == 1:
            if np.max(predictions) >= 0.9:
                pred = 'smoking'
                break

    return pred, round(np.max(predictions), 2)


In [12]:
import cv2
import numpy as np
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor

def predict_frame(frame, cls_model):
    resized_img = cv2.resize(frame, (299, 299))
    img = np.asarray(resized_img)
    img = np.expand_dims(img, axis=0)
    img = img / 255
    predictions = cls_model.predict(img)
    return predictions

def predict_smoking_video_3(video_path):
    cap = cv2.VideoCapture(video_path)
    frame_count = 0

    # Get total number of frames
    total_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the interval between frames to select 20 frames
    interval = total_frames // 30
    selected_frames = [i * interval for i in range(20)]

    predictions_dict = {}

    # tqdm progress bar
    pbar = tqdm(total=20)

    pred = 'not smoking'
    predictions = []

    # Function to process each frame
    def process_frame(frame):
        predictions.append(predict_frame(frame, cls_model))
        pbar.update(1)

    with ThreadPoolExecutor() as executor:
        while frame_count < total_frames:
            ret = cap.set(cv2.CAP_PROP_POS_FRAMES, frame_count)
            if not ret:
                break

            ret, frame = cap.read()
            if not ret:
                break

            if frame_count in selected_frames:
                executor.submit(process_frame, frame)

            frame_count += 1

    for prediction in predictions:
        if np.argmax(prediction) == 1 and np.max(prediction) >= 0.9:
            pred = 'smoking'
            break

    return pred, round(np.max(predictions), 2)


In [13]:
video_path = '/content/drive/MyDrive/insta-reel/sample_vid_data/sample_smoking.mp4'
pred,prob = predict_smoking_video_2(video_path)
pred,prob

  0%|          | 0/20 [00:00<?, ?it/s]



  0%|          | 0/20 [00:00<?, ?it/s]


('smoking', 0.92)

In [14]:
video_path = '/content/drive/MyDrive/insta-reel/sample_vid_data/non-smoking.mp4'
pred,prob = predict_smoking_video_1(video_path)
pred,prob

  0%|          | 0/379 [00:00<?, ?it/s]



  7%|▋         | 26/379 [00:00<00:02, 152.02it/s]



 13%|█▎        | 51/379 [00:00<00:02, 152.52it/s]



 20%|██        | 76/379 [00:00<00:01, 154.14it/s]



 27%|██▋       | 101/379 [00:00<00:01, 153.43it/s]



 33%|███▎      | 126/379 [00:00<00:01, 152.71it/s]



 40%|███▉      | 151/379 [00:00<00:01, 155.13it/s]



 46%|████▋     | 176/379 [00:01<00:01, 154.76it/s]



 53%|█████▎    | 201/379 [00:01<00:01, 156.27it/s]



 60%|█████▉    | 226/379 [00:01<00:00, 157.04it/s]



 66%|██████▌   | 251/379 [00:01<00:00, 155.42it/s]



 73%|███████▎  | 276/379 [00:01<00:00, 155.38it/s]



 79%|███████▉  | 301/379 [00:01<00:00, 156.49it/s]



 86%|████████▌ | 326/379 [00:02<00:00, 156.48it/s]



 93%|█████████▎| 351/379 [00:02<00:00, 157.80it/s]



100%|██████████| 379/379 [00:02<00:00, 156.77it/s]


('not smoking', 0.84)

In [15]:
explicit_model_path = '/content/drive/MyDrive/insta-reel/vid_model_checkpoint.keras'
explicit_model = tf.keras.models.load_model(explicit_model_path)
explicit_model

<keras.src.engine.sequential.Sequential at 0x7c34d85ad840>

In [16]:
# Specify the height and width to which each video frame will be resized in our dataset.
IMAGE_HEIGHT , IMAGE_WIDTH = 64, 64

# Specify the number of frames of a video that will be fed to the model as one sequence.
SEQUENCE_LENGTH = 20

# Specify the list containing the names of the classes used for training. Feel free to choose any set of classes.
CLASSES_LIST = ['explicit', 'normal']

In [17]:
import cv2
def frames_extraction(video_path):
    '''
    This function will extract the required frames from a video after resizing and normalizing them.
    Args:
        video_path: The path of the video in the disk, whose frames are to be extracted.
    Returns:
        frames_list: A list containing the resized and normalized frames of the video.
    '''

    # Declare a list to store video frames.
    frames_list = []

    # Read the Video File using the VideoCapture object.
    video_reader = cv2.VideoCapture(video_path)

    # Get the total number of frames in the video.
    video_frames_count = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT))

    # Calculate the the interval after which frames will be added to the list.
    skip_frames_window = max(int(video_frames_count/SEQUENCE_LENGTH), 1)

    # Iterate through the Video Frames.
    for frame_counter in range(SEQUENCE_LENGTH):

        # Set the current frame position of the video.
        video_reader.set(cv2.CAP_PROP_POS_FRAMES, frame_counter * skip_frames_window)

        # Reading the frame from the video.
        success, frame = video_reader.read()

        # Check if Video frame is not successfully read then break the loop
        if not success:
            break

        # Resize the Frame to fixed height and width.
        resized_frame = cv2.resize(frame, (IMAGE_HEIGHT, IMAGE_WIDTH))

        # Normalize the resized frame by dividing it with 255 so that each pixel value then lies between 0 and 1
        normalized_frame = resized_frame / 255

        # Append the normalized frame into the frames list
        frames_list.append(normalized_frame)

    # Release the VideoCapture object.
    video_reader.release()

    # Return the frames list.
    return frames_list

In [18]:
def vid_class_pred(path,class_list):
    arr = np.array(frames_extraction(path))
    arr = np.expand_dims(arr, axis=0)
    model_pred = explicit_model.predict(arr).ravel()
    pred_prob = max(model_pred)
    pred_class = class_list[np.argmax(model_pred)]
    return pred_class,round(pred_prob,2)

In [19]:
class_list = CLASSES_LIST
path = '/content/drive/MyDrive/insta-reel/sample_vid_data/explicit/SaveInsta.App - 3037680951642907011_1327453630.mp4'
pred_class,pred_prob = vid_class_pred(path,class_list)
pred_class,pred_prob



('explicit', 1.0)

In [20]:
class_list = CLASSES_LIST
path = '/content/drive/MyDrive/insta-reel/sample_vid_data/normal/SaveInsta.App - 3241741895495626070_55489127536.mp4'
pred_class,pred_prob = vid_class_pred(path,class_list)
pred_class,pred_prob



('normal', 0.95)

In [21]:
from moviepy.editor import VideoFileClip

# Function to separate audio and save it as MP3
def separate_audio(video_path, save_path):
    video = VideoFileClip(video_path)
    audio = video.audio
    if audio is not None:
        audio.write_audiofile(save_path)
    else:
        print("No audio track found in the video.")
    video.close()

# Function to mute video and save it
def mute_video(video_path, save_path):
    video = VideoFileClip(video_path)
    muted_video = video.set_audio(None)
    muted_video.write_videofile(save_path, codec="libx264")
    video.close()

In [22]:
import os
video_name = 'sample_int_vid.mp4'
video_path = '/content/drive/MyDrive/insta-reel/sample_vid_data/sample_int_vid.mp4'
audio_save_path = os.path.join("audio", video_name.replace(".mp4", ".mp3"))
muted_video_save_path = os.path.join("muted_videos", video_name)

# Create directories if they don't exist
os.makedirs("uploads", exist_ok=True)
os.makedirs("audio", exist_ok=True)
os.makedirs("muted_videos", exist_ok=True)

# Separate audio and save it as MP3
separate_audio(video_path, audio_save_path)
print(f"Audio separated and saved as {audio_save_path}")

# Mute the video and save it
# mute_video(video_path, muted_video_save_path)
# print(f"Muted video saved as {muted_video_save_path}")

MoviePy - Writing audio in audio/sample_int_vid.mp3


                                                                      

MoviePy - Done.
Audio separated and saved as audio/sample_int_vid.mp3




In [23]:
import whisper
model = whisper.load_model("base")

def transcribe(audio):

    # load audio and pad/trim it to fit 30 seconds
    audio = whisper.load_audio(audio)
    audio = whisper.pad_or_trim(audio)

    # make log-Mel spectrogram and move to the same device as the model
    mel = whisper.log_mel_spectrogram(audio).to(model.device)

    # detect the spoken language
    _, probs = model.detect_language(mel)
    print(f"Detected language: {max(probs, key=probs.get)}")

    # decode the audio
    options = whisper.DecodingOptions()
    result = whisper.decode(model, mel, options)
    return result.text

100%|████████████████████████████████████████| 139M/139M [00:00<00:00, 181MiB/s]


In [24]:
easy_text = transcribe("/content/audio/sample_int_vid.mp3")
print(easy_text)

Detected language: en
I'm Ashley. If you're watching this video, it's a safe bet that you're preparing for a job interview and feel like you need a little help. Your interview is also likely to be virtual, i.e. a remote video call. To get hired in today's job market, it is essential to master the art of the virtual interview.


In [25]:
!pip install gradio
!pip install profanity

Collecting gradio
  Downloading gradio-4.28.3-py3-none-any.whl (12.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.2/12.2 MB[0m [31m37.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.110.2-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.9/91.9 kB[0m [31m9.1 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==0.16.0 (from gradio)
  Downloading gradio_client-0.16.0-py3-none-any.whl (314 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m314.4/314.4 kB[0m [31m26.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━

In [26]:
from profanity import profanity
profanity_res = profanity.contains_profanity("You smell like shit.")
profanity_res

True

In [27]:
def prediction_on_audio(audio_path):
    if not os.path.exists(audio_path):
        print("No audio file found.")
        return 'no_audio', 0, False

    text = transcribe(audio_path)
    pred, prob = predict_hate_speech(text)
    profanity_res = profanity.contains_profanity(text)
    return pred, prob, profanity_res

In [28]:
def respond(video_path):
    video_name = os.path.basename(video_path)
    audio_save_path = video_name.replace(".mp4", ".mp3")
    separate_audio(video_path, audio_save_path)
    # muted_video_save_path = video_path
    # mute_video(video_path, muted_video_save_path)
    pred1,prob1,prof_res = prediction_on_audio(audio_save_path)
    hate_speech_pred = f"Profanity Rating = {prof_res}, Hate speech Pred = {pred1}, Hate speech prob = {prob1} "

    pred2,prob2 = predict_smoking_video_2(video_path)
    smoking_pred = f"Pred = {pred2}, prob = {prob2}"

    class_list = ['explicit', 'normal']
    pred3,prob3 = vid_class_pred(video_path,class_list)

    explicit_pred = f"Pred = {pred3}, prob = {prob3}"

    if (pred1 != 'neither') or (pred2 != 'not smoking') or (pred3 != 'normal'):
        final_pred = 'detrimental'
    else:
        final_pred = 'non-detrimental'

    final_pred = f"Pred = {final_pred}"

    return hate_speech_pred,smoking_pred,explicit_pred,final_pred

In [29]:
video_path = '/content/drive/MyDrive/insta-reel/sample_vid_data/normal/SaveInsta.App - 3241741895495626070_55489127536.mp4'
hate_speech_pred,smoking_pred,explicit_pred,final_pred = respond(video_path)
hate_speech_pred,smoking_pred,explicit_pred,final_pred

MoviePy - Writing audio in SaveInsta.App - 3241741895495626070_55489127536.mp3




MoviePy - Done.
Detected language: en


  0%|          | 0/20 [00:00<?, ?it/s]



  5%|▌         | 1/20 [00:00<00:04,  4.45it/s]



 10%|█         | 2/20 [00:00<00:06,  2.97it/s]



 15%|█▌        | 3/20 [00:01<00:06,  2.48it/s]



 20%|██        | 4/20 [00:01<00:07,  2.12it/s]



 25%|██▌       | 5/20 [00:02<00:07,  1.89it/s]



 30%|███       | 6/20 [00:02<00:07,  1.76it/s]



 35%|███▌      | 7/20 [00:03<00:08,  1.62it/s]



 40%|████      | 8/20 [00:04<00:08,  1.48it/s]



 45%|████▌     | 9/20 [00:05<00:07,  1.39it/s]



 50%|█████     | 10/20 [00:06<00:07,  1.29it/s]



 55%|█████▌    | 11/20 [00:07<00:07,  1.21it/s]



 60%|██████    | 12/20 [00:08<00:07,  1.13it/s]



 65%|██████▌   | 13/20 [00:09<00:06,  1.06it/s]



 70%|███████   | 14/20 [00:10<00:06,  1.01s/it]



 75%|███████▌  | 15/20 [00:11<00:05,  1.01s/it]



 80%|████████  | 16/20 [00:11<00:03,  1.16it/s]



 85%|████████▌ | 17/20 [00:12<00:02,  1.29it/s]



 90%|█████████ | 18/20 [00:13<00:01,  1.34it/s]



 95%|█████████▌| 19/20 [00:13<00:00,  1.31it/s]



100%|██████████| 20/20 [00:28<00:00,  1.41s/it]




('Profanity Rating = False, Hate speech Pred = offensive_language, Hate speech prob = 0.41 ',
 'Pred = not smoking, prob = 0.6899999976158142',
 'Pred = normal, prob = 0.949999988079071',
 'Pred = detrimental')

In [31]:
import gradio as gr
import os

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(
        """
        # Video Content Moderation Model Demo WebApp
        Detects obscene, smoking, hate speech content in a given video
        """)

    hate_speech_pred = gr.Textbox(label="hate_speech_pred")
    smoking_pred = gr.Textbox(label="smoking_pred")
    explicit_pred = gr.Textbox(label="explicit_pred")
    final_pred = gr.Textbox(label="final_pred")
    input_video = gr.Video(sources=["upload","webcam"],label="input_video")
    submit_btn = gr.Button(value='submit')
    clear = gr.ClearButton([hate_speech_pred,smoking_pred,explicit_pred,final_pred,input_video])

    def respond(video_path):
      video_name = os.path.basename(video_path)
      audio_save_path = video_name.replace(".mp4", ".mp3")
      separate_audio(video_path, audio_save_path)
      # muted_video_save_path = video_path
      # mute_video(video_path, muted_video_save_path)
      pred1,prob1,prof_res = prediction_on_audio(audio_save_path)
      hate_speech_pred = f"Profanity Rating = {prof_res}, Hate speech Pred = {pred1}, Hate speech prob = {prob1} "

      pred2,prob2 = predict_smoking_video_2(video_path)
      smoking_pred = f"Pred = {pred2}, prob = {prob2}"

      class_list = ['explicit', 'normal']
      pred3,prob3 = vid_class_pred(video_path,class_list)

      explicit_pred = f"Pred = {pred3}, prob = {prob3}"

      if (pred1 != 'neither') or (pred2 != 'not smoking') or (pred3 != 'normal'):
          final_pred = 'detrimental'
      else:
          final_pred = 'non-detrimental'

      final_pred = f"Pred = {final_pred}"

      return hate_speech_pred,smoking_pred,explicit_pred,final_pred

    submit_btn.click(respond, [input_video], [hate_speech_pred,smoking_pred,explicit_pred,final_pred])

demo.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://0b2c2bf16ffeb8b95c.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


MoviePy - Writing audio in How to light a cigarette in style.mp3




MoviePy - Done.
Detected language: en


  0%|          | 0/20 [00:00<?, ?it/s]



  5%|▌         | 1/20 [00:00<00:03,  5.39it/s]



 10%|█         | 2/20 [00:00<00:03,  4.53it/s]



 10%|█         | 2/20 [00:00<00:06,  2.85it/s]


Keyboard interruption in main thread... closing server.
Killing tunnel 127.0.0.1:7860 <> https://0b2c2bf16ffeb8b95c.gradio.live


