In [3]:
# import dependencies
import pandas as pd
from googleapiclient.discovery import build
from config import api_key
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

In [4]:
youtube = build('youtube', 'v3', developerKey=api_key)
analyzer = SentimentIntensityAnalyzer()

In [10]:
# import data
df = pd.read_csv('other_csvs/merged_df.csv')

In [4]:
# get rows where comment count is greater than 0
comment_df = df[df['video_comment_count'] > 0]
comment_df = comment_df.reset_index(drop=True)

In [5]:
def get_comments(video_id):
    try:
        results = youtube.commentThreads().list(
            part="snippet",
            videoId=video_id,
            textFormat="plainText",
            maxResults=20
        ).execute()

        comments = []
        for item in results["items"]:
            comment = item["snippet"]["topLevelComment"]["snippet"]["textDisplay"]
            comments.append(comment)
        return comments
    except:
        return None

In [6]:
def sentiment_analyzer_scores(test):
    score = analyzer.polarity_scores(test)
    # get compound score
    return score['compound']

In [7]:
for video in comment_df['video_id']:
    comments = get_comments(video)
    if comments is not None:
        comment_df.loc[comment_df['video_id'] == video, 
        'sentiment'] = sentiment_analyzer_scores(comments)

In [8]:
comment_df

Unnamed: 0,channel_id,video_title,video_title_clean,video_id,published,video_views,video_madeforkids,video_likes,video_comment_count,video_length,video_description,video_tags,sentiment
0,UCEGGyGmo0NbAPmw1zVNdXbg,Python Flatten 2D Nested List into 1D (100 Pyt...,Python Flatten 2D Nested List into 1D 100 Pyth...,R7EInXxh62k,2022-10-29T16:03:51Z,39,False,2,2,1:18,Python Flatten 2D Nested List into 1D (100 Pyt...,"['python', 'python for beginners', 'python pro...",0.6249
1,UCEGGyGmo0NbAPmw1zVNdXbg,Python Add 2 Lists TASK - (100 Python Coding ...,Python Add 2 Lists TASK 100 Python Coding Exe...,QfJoKGeNlPU,2022-10-27T17:46:15Z,36,False,2,1,31,Python Add 2 Lists TASK - Quick\n#python #pyth...,"['python', 'python for beginners', 'python pro...",0.0000
2,UCEGGyGmo0NbAPmw1zVNdXbg,Python Multiple Arguments *args #shorts #pytho...,Python Multiple Arguments args shorts python p...,SJfYPj5WRkU,2022-10-26T13:03:28Z,290,False,15,2,59,Python Multiple Arguments *args #shorts #pyt...,"['python', 'python for beginners', 'python pro...",0.4199
3,UCEGGyGmo0NbAPmw1zVNdXbg,Python *args in functions #shorts #python #pyt...,Python args in functions shorts python pythonf...,GCzfeBjusAs,2022-10-21T17:22:27Z,2154,False,93,5,59,Python *args in functions #shorts #python #p...,"['python', 'python for beginners', 'python pro...",0.5673
4,UCEGGyGmo0NbAPmw1zVNdXbg,Python String Palindrome or Not #python,Python String Palindrome or Not python,lDpXGU077UE,2022-10-20T14:48:24Z,24,False,0,2,3:48,Python String Palindrome or Not #python #pytho...,"['python', 'python for beginners', 'python pro...",0.3182
...,...,...,...,...,...,...,...,...,...,...,...,...,...
2142,UCC7jlYxfWti7WAW8r7ef1RQ,Citrus colored buildings in Menton in the Fren...,Citrus colored buildings in Menton in the Fren...,KtwVHRPfzWg,2022-04-11T16:00:19Z,1035,False,40,2,8,,,0.4939
2143,UCC7jlYxfWti7WAW8r7ef1RQ,How to visit Calanques National Park in France,How to visit Calanques National Park in France,rC4cHkOjlms,2022-04-10T15:00:26Z,3696,False,51,12,7:47,Calanques National Park in France is a beautif...,"['calanques national park', 'parc national de ...",0.9954
2144,UCC7jlYxfWti7WAW8r7ef1RQ,Southern France Travel Vlog | Eze and Monaco D...,Southern France Travel Vlog Eze and Monaco Day...,LCtBcIvb7J8,2022-04-03T15:00:16Z,3891,False,92,6,6:31,With beautiful weather year round the Southern...,"['southern france travel vlog', 'eze', 'monaco...",0.9763
2145,UCC7jlYxfWti7WAW8r7ef1RQ,How to save money in Paris by using the Paris ...,How to save money in Paris by using the Paris ...,C-34pIsWZPk,2022-03-27T15:00:04Z,4642,False,166,79,4:49,"We all want to save money while traveling, so ...","['how to save money in paris', 'using the pari...",0.9964


In [9]:
# export to csv
comment_df.to_csv('other_csvs/random_comment_df.csv', index=False)

In [12]:
# import the all_channels.csv file
channels_df = pd.read_csv('other_csvs/all_channels_df.csv')

# rename the column to match the comment_df
channels_df = channels_df.rename(columns={'id': 'channel_id'})

# merge the two dataframes
merged_df = pd.merge(channels_df, comment_df, on='channel_id')

In [45]:
merged_df.to_csv('other_csvs/merged_df.csv', index=False)

In [15]:
merged_df.columns

Index(['channel_id', 'title', 'description', 'customUrl', 'publishedAt',
       'thumbnails.default.url', 'defaultLanguage', 'viewCount',
       'subscriberCount', 'videoCount', 'topicIds', 'topicCategories',
       'relatedPlaylists.uploads', 'category_title', 'video_id', 'video_title',
       'video_title_clean', 'published', 'video_views', 'video_madeforkids',
       'video_likes', 'video_comment_count', 'video_length',
       'video_description', 'video_tags', 'sentiment'],
      dtype='object')