In [2]:
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from langdetect import detect
import pandas as pd
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer


In [3]:
def fetch_youtube_comments(api_key, education_query, max_results=50):
    # Create a YouTube API client
    youtube = build('youtube', 'v3', developerKey=api_key)

    
    comments_dataset = []

    try:
       
        search_response = youtube.search().list(
            q=education_query,
            type="video",
            part=["snippet"],
            maxResults=max_results,
            relevanceLanguage='en'  
        ).execute()

        
        for item in search_response.get('items', []):
            video_title = item['snippet']['title']
            video_id = item['id']['videoId']
            video_link = f'https://www.youtube.com/watch?v={video_id}'

            # Get comments for the video
            comments_request = youtube.commentThreads().list(
                part='snippet',
                videoId=video_id,
                textFormat='plainText'
            )

            try:
                comments_response = comments_request.execute()

               
                for comment in comments_response.get('items', []):
                    comment_text = comment['snippet']['topLevelComment']['snippet']['textDisplay']

                   
                    try:
                        comment_language = detect(comment_text)
                        if comment_language == 'en':
                            comments_dataset.append({'video_id': video_id, 'video_link': video_link, 'comment': comment_text})
                    except:
                        pass  

            except HttpError as e:
                print(f"HttpError during comments request:")

    except HttpError as e:
        print(f"HttpError during search request:")

   
    df_comments = pd.DataFrame(comments_dataset)

    return df_comments

api_key = your_api_key
education_query = 'java_course_tamil'
df_comments = fetch_youtube_comments(api_key, education_query)

HttpError during comments request:


In [4]:
def analyze_sentiment(df_comments):
    
    grouped_df = df_comments.groupby('video_id').filter(lambda x: len(x) >= 4)

    analyzer = SentimentIntensityAnalyzer()

    def get_sentiment_label(compound_score):
        if compound_score >= 0.05:
            return 'Positive'
        elif -0.05 < compound_score < 0.05:
            return 'Neutral'
        else:
            return 'Negative'

    grouped_df['sentiment_score'] = grouped_df['comment'].apply(lambda x: analyzer.polarity_scores(x)['compound'])
    
    grouped_df['sentiment_label'] = grouped_df['sentiment_score'].apply(get_sentiment_label)

    return grouped_df[['video_id', 'comment', 'sentiment_score', 'sentiment_label']]
    
grouped_df = analyze_sentiment(df_comments)

In [5]:
def calculate_final_sentiment(grouped_df):
    
    grouped_df['positive_score'] = grouped_df['sentiment_score'][grouped_df['sentiment_label'] == 'Positive']
    grouped_df['negative_score'] = grouped_df['sentiment_score'][grouped_df['sentiment_label'] == 'Negative']

    video_sentiment_scores = grouped_df.groupby('video_id')[['positive_score', 'negative_score']].sum().fillna(0)
    video_sentiment_scores['negative_score'] = video_sentiment_scores['negative_score'] * -1

    video_sentiment_scores['final_sentiment'] = \
        video_sentiment_scores.apply(lambda row: 'Positive' if row['positive_score'] > row['negative_score'] else 'Negative', axis=1)

    return video_sentiment_scores[['positive_score', 'negative_score', 'final_sentiment']]
    
video_sentiment_scores = calculate_final_sentiment(grouped_df)

In [6]:
def selection_positive_comments_video(video_sentiment_scores):
    
    video_sentiment_scores = video_sentiment_scores.drop(columns=['negative_score'])
    
    video_sentiment_scores = video_sentiment_scores[video_sentiment_scores['final_sentiment'] == 'Positive']

    video_sentiment_scores = video_sentiment_scores.sort_values(by='positive_score', ascending=False)
    
    video_sentiment_scores['link'] = video_sentiment_scores.index.map(lambda x: f'https://www.youtube.com/watch?v={x}')

    return video_sentiment_scores['link']
    
filtered_videos = selection_positive_comments_video(video_sentiment_scores)

print(filtered_videos)

video_id
BGTx91t8q50    https://www.youtube.com/watch?v=BGTx91t8q50
F3bELzeDa4U    https://www.youtube.com/watch?v=F3bELzeDa4U
jmHabsMagFc    https://www.youtube.com/watch?v=jmHabsMagFc
7Io11L9sPgk    https://www.youtube.com/watch?v=7Io11L9sPgk
XLnimroGCIg    https://www.youtube.com/watch?v=XLnimroGCIg
qOWPCPCDRZs    https://www.youtube.com/watch?v=qOWPCPCDRZs
toymwoKBtbM    https://www.youtube.com/watch?v=toymwoKBtbM
55lEuBLBTB8    https://www.youtube.com/watch?v=55lEuBLBTB8
EmxzVY-h_bo    https://www.youtube.com/watch?v=EmxzVY-h_bo
5vziI8ixDro    https://www.youtube.com/watch?v=5vziI8ixDro
T48rcoLss0I    https://www.youtube.com/watch?v=T48rcoLss0I
3FP1XbVVn0M    https://www.youtube.com/watch?v=3FP1XbVVn0M
udZJxhK28L8    https://www.youtube.com/watch?v=udZJxhK28L8
Zh11XoxDglg    https://www.youtube.com/watch?v=Zh11XoxDglg
BIOYwqmcDNk    https://www.youtube.com/watch?v=BIOYwqmcDNk
Xn6oijwoEPI    https://www.youtube.com/watch?v=Xn6oijwoEPI
_becmw4khX4    https://www.youtube.com/watch?v=