In [2]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.36.1-py3-none-any.whl (12.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.3/12.3 MB[0m [31m51.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl (15 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.111.0-py3-none-any.whl (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting ffmpy (from gradio)
  Downloading ffmpy-0.3.2.tar.gz (5.5 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting gradio-client==1.0.1 (from gradio)
  Downloading gradio_client-1.0.1-py3-none-any.whl (318 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m318.1/318.1 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━

In [4]:
!pip install SpeechRecognition


Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl (32.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m32.8/32.8 MB[0m [31m40.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: SpeechRecognition
Successfully installed SpeechRecognition-3.10.4


In [5]:
import joblib
import gradio as gr
import numpy as np
import string
import nltk
import os
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from nltk.stem import WordNetLemmatizer
from moviepy.editor import VideoFileClip
from pydub import AudioSegment
import speech_recognition as sr


In [8]:
from google.colab import drive
drive.mount('/content/drive')
tfidf_vectorizer = joblib.load('/content/drive/MyDrive/tfidf_vectorizer.pkl')
lgbm_model = joblib.load('/content/drive/MyDrive/lgbm_multi_classifier.pkl')

Mounted at /content/drive


In [9]:

# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [10]:
# Initialize lemmatizer
lemmatizer = WordNetLemmatizer()



# Function to preprocess text
def preprocessing(text):
    # Convert text to lowercase
    text = text.lower()

    # Tokenize text
    tokens = word_tokenize(text)

    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [word for word in tokens if word not in stop_words and word not in string.punctuation]

    # Lemmatize tokens
    tokens = [lemmatizer.lemmatize(word) for word in tokens]

    # Join tokens back into text
    preprocessed_text = ' '.join(tokens)

    return preprocessed_text



In [11]:
# Function to predict toxicity
def predict_toxicity(text):
    processed_text = preprocessing(text)
    text_tfidf = tfidf_vectorizer.transform([processed_text])
    prediction = lgbm_model.predict(text_tfidf)
    probabilities = lgbm_model.predict_proba(text_tfidf)

    class_names = ['toxic', 'severe toxic', 'obscene', 'threat', 'insult', 'identity hate']

    toxic_classes = []
    toxic_probabilities = []

    for i in range(len(prediction[0])):
        if prediction[0][i] == 1:
            toxic_classes.append(class_names[i])
            toxic_probabilities.append(probabilities[i][0][1])

    if toxic_classes:
        result = "⚠️ This comment has been flagged as toxic. ⚠️\n\nIntensity Breakdown:\n"
        sorted_indices = np.argsort(toxic_probabilities)[::-1]
        for idx in sorted_indices:
            result += f"{toxic_classes[idx]}: {toxic_probabilities[idx]*100:.2f}%\n"
    else:
        result = "✅ This comment is safe and non-toxic. ✅"

    return result



In [12]:
# Function to convert video to audio
def video_to_audio(video_file):
    video_clip = VideoFileClip(video_file)
    audio_path = "audio.mp3"
    video_clip.audio.write_audiofile(audio_path)
    return audio_path



In [13]:
# Function to convert audio to text
def audio_to_text(audio_path):
    recognizer = sr.Recognizer()
    audio = AudioSegment.from_mp3(audio_path)
    audio.export("temp.wav", format="wav")

    with sr.AudioFile("temp.wav") as source:
        audio_data = recognizer.record(source)
        text = recognizer.recognize_google(audio_data)

    os.remove("temp.wav")
    return text



In [21]:
# Combined function to handle video input and return toxicity analysis
def analyze_video_toxicity(video_file):
    audio_path = video_to_audio(video_file)
    text = audio_to_text(audio_path)
    toxicity_result = predict_toxicity(text)
    return text, toxicity_result



In [None]:
# Create Gradio interface
def analyze_and_print(video_file):
    converted_text, toxicity_result = analyze_video_toxicity(video_file)
    print(f"Text from the video: {converted_text}")
    return toxicity_result

interface = gr.Interface(
    fn=analyze_and_print,
    inputs=gr.Video(label="Upload your video"),
    outputs=gr.Textbox(label="Toxicity Analysis"),
    title="Video Toxicity Classifier",
    description="Upload a video to analyze its audio content for toxicity.",
    theme="default",
    allow_flagging='never'
)

# Launch the interface
if __name__ == "__main__":
    interface.launch(debug=True)

Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
Running on public URL: https://514b072e67a391f7d6.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)


MoviePy - Writing audio in audio.mp3




MoviePy - Done.
Text from the video: what's the best pizza advice you ever gotten really amazing piece of advice I've got which I followed till today is respect people's time if you want your time to be respected
