In [13]:
!pip install --upgrade profanity-check
!pip install youtube-transcript-api
!pip install scikit-learn
!pip install nltk
!pip install joblib
!pip install alt-profanity-check
!pip install sklearn --upgrade
!pip install streamlit
!pip install streamlit pyngrok

In [14]:
%%writefile app.py
import streamlit as st
import warnings
from matplotlib.cbook import MatplotlibDeprecationWarning
from youtube_transcript_api import YouTubeTranscriptApi
from nltk.sentiment import SentimentIntensityAnalyzer
import nltk
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from googleapiclient.discovery import build
from profanity_check import predict_prob

# Ignore Streamlit warnings and errors
st.set_option('deprecation.showPyplotGlobalUse', False)
warnings.filterwarnings("ignore", category=MatplotlibDeprecationWarning)

# Download NLTK data (if not already downloaded)
nltk.download('vader_lexicon')

# Set up YouTube API client with your API key
API_KEY = "API_KEY"  # Replace with your actual API key
youtube = build("youtube", "v3", developerKey=API_KEY)

# Function to fetch the video transcript using the YouTube API
def generate_transcript(video_id, language='en'):
    transcript = YouTubeTranscriptApi.get_transcript(video_id, languages=[language])
    script = ""

    for text in transcript:
        t = text["text"]
        if t != '[Music]':
            script += t + " "

    return script

# Function to analyze sentiment in text using the NLTK library
def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = analyzer.polarity_scores(text)
    compound_score = sentiment_scores['compound']

    # Assign a sentiment label based on the compound score
    if compound_score >= 0.05:
        return 'positive'
    elif compound_score <= -0.05:
        return 'negative'
    else:
        return 'neutral'

# Function to plot a pie chart for sentiment distribution
def plot_pie_chart(labels, sizes, title):
    fig, ax = plt.subplots()
    ax.pie(sizes, labels=labels, autopct='%1.1f%%', startangle=90)
    ax.set_title(title)
    return fig  # Return the figure for Streamlit to display

# Function to count sentiment words in text
def count_sentiment_words(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_words = {'positive': 0, 'neutral': 0, 'negative': 0}

    for word in text.split():
        sentiment = analyze_sentiment(word + ' ')
        sentiment_words[sentiment] += 1

    return sentiment_words

# Function to plot a bar chart for sentiment word counts
def plot_bar_chart(data, title, xlabel, ylabel):
    fig, ax = plt.subplots()
    labels = list(data.keys())
    values = list(data.values())
    ax.bar(labels, values, color=['green', 'yellow', 'red'])
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    return fig  # Return the figure for Streamlit to display

# Function to analyze explicit content using the profanity_check library
def analyze_explicit_content(text):
    explicit_score = predict_prob([text])
    return explicit_score[0]

# Function to classify explicit content based on a threshold
def classify_explicit_content(explicit_score, threshold=0.5):
    if explicit_score >= threshold:
        return 'explicit'
    else:
        return 'non-explicit'

# Function to generate word cloud from text
def generate_word_cloud(text):
    wordcloud = WordCloud(width=800, height=400, background_color='white').generate(text)
    plt.figure(figsize=(10, 5))
    plt.imshow(wordcloud, interpolation='bilinear')
    plt.axis('off')
    st.pyplot()

# Function to analyze and plot various aspects of a YouTube video
def analyze_and_plot(video_url, language):
    # Extract video ID from the YouTube link
    video_id = video_url.split("v=")[1]

    transcript = generate_transcript(video_id, language)

    # Analyze sentiment for the main video
    sentiments = {'positive': 0, 'neutral': 0, 'negative': 0}
    negative_words_count = 0

    # Analyze explicit content for the main video
    explicit_content_score = analyze_explicit_content(transcript)

    # Classify explicit content
    explicit_class = classify_explicit_content(explicit_content_score)

    # Loop through each sentence in the transcript
    for sentence in transcript.split('.'):
        sentiment = analyze_sentiment(sentence)
        sentiments[sentiment] += 1

        # Count negative words
        if sentiment == 'negative':
            negative_words_count += len(sentence.split())

    # Display results
    st.header("YouTube Video Classification Results")
    st.subheader(f"Results for Video: {video_id}")

    # Classification Tab
    st.write("## Classification")

    # Sentiment Analysis
    st.subheader("Sentiment Analysis:")
    st.write(sentiments)
    # Plot pie chart for sentiment distribution of the main video
    fig_pie = plot_pie_chart(sentiments.keys(), sentiments.values(), f'Sentiment Distribution - {video_id}')
    st.pyplot(fig_pie)

    # Sentiment Word Counts
    st.subheader("Sentiment Word Counts:")
    sentiment_words_count = count_sentiment_words(transcript)
    st.write(sentiment_words_count)
    # Plot bar chart for sentiment word counts of the main video
    fig_bar = plot_bar_chart(sentiment_words_count, f'Sentiment Word Counts - {video_id}', 'Sentiment', 'Word Count')
    st.pyplot(fig_bar)

    # Total Number of Negative Words
    st.subheader(f"Total Number of Negative Words - {video_id}")
    st.write(negative_words_count)

    # Explicit Content Analysis
    st.subheader(f"Explicit Content Analysis - {video_id}")
    st.write(f"Explicit Content Score: {explicit_content_score}")
    st.write(f"Explicit Content Class: {explicit_class}")

    # Word Cloud
    st.subheader(f"Word Cloud - {video_id}")
    generate_word_cloud(transcript)

    # About Tab
    st.write("## About")
    # Display details of the main video
    st.subheader(f"Details for Main Video - {video_id}")
    st.write(f"Video ID: {video_id}")
    video_info = youtube.videos().list(part='snippet', id=video_id).execute()
    st.write(f"Title: {video_info['items'][0]['snippet']['title']}")
    st.write(f"Description: {video_info['items'][0]['snippet']['description']}")
    st.write(f"Thumbnail URL: {video_info['items'][0]['snippet']['thumbnails']['default']['url']}")
    st.write(f"Subtitles: {transcript}")

# Streamlit App
def main():
    st.title("YouTube Video Classification App")

    # Add tabs
    tabs = ["Home", "Classification", "About"]
    selected_tab = st.sidebar.selectbox("Select Tab", tabs)

    if selected_tab == "Home":
        st.write("## Welcome to the YouTube Video Classification App!")
        st.write("This app classify YouTube videos based on sentiment and explicit content.")
        st.write("## YouTube's Community Guidelines ")
        st.write("Violent or dangerous content: Hate speech, predatory behavior, graphic violence, malicious attacks, and content that promotes harmful or dangerous behavior isn't allowed on YouTube.")
        st.write("Words that are often flagged in content moderation are ")
        st.write("1. Profanity: Common swear words and offensive language.")
        st.write("2. Hate Speech: Words or phrases that promote discrimination, violence, or hatred towards individuals or groups based on attributes such as race, ethnicity, religion, gender, etc.")
        st.write("3. Harassment: Words that are used to harass or threaten others.")
        st.write("4. Violence: Terms related to violent actions or harm.")
        st.write("5. Explicit Content: Words related to explicit or adult content.")
        st.write(" ")
    elif selected_tab == "Classification":
        # User input for YouTube video URL
        video_url = st.text_input("Enter YouTube Video URL:")
        if not video_url:
            st.warning("Please enter a YouTube Video URL.")
            st.stop()

        # User input for subtitles language
        language = st.selectbox("Select Subtitles Language:", ["en", "es", "fr", "de","ta","hi"])  # Add more language options if needed

        # Button to classify the video
        if st.button("Classify Video"):
            # Analyze and plot aspects of the YouTube video
            analyze_and_plot(video_url, language)
    elif selected_tab == "About":
        st.write("## About the App")
        st.write("This app uses natural language processing techniques to analyze and classify YouTube videos based on sentiment and explicit content.")
        st.write("Built with Streamlit, NLTK, Matplotlib, YouTube API, and Profanity Check.")
        st.write("Team Members: Manigandan, Sai Kiran, Sarinika")
        st.write("Guide: Mr. Muthumani, Dr. M. Deivamani")

if __name__ == "__main__":
    main()


Overwriting app.py


In [16]:
!streamlit run app.py & npx localtunnel --port 8501