In [None]:
import os
import re
import json
import demoji
import pandas as pd
import urllib
import urllib.request
import nltk
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from langdetect import detect
import googleapiclient.discovery
from textblob import TextBlob, Word, Blobber
from textblob.classifiers import NaiveBayesClassifier
from textblob.taggers import NLTKTagger
from sklearn.model_selection import train_test_split
import sklearn.metrics as metrics
import matplotlib.pyplot as plt

In [None]:
def get_all_video_objects(channel_id):
    base_video_url = 'https://www.youtube.com/watch?v='
    base_search_url = 'https://www.googleapis.com/youtube/v3/search?'

    first_url = base_search_url+'key={}&channelId={}&part=snippet,id&order=date&maxResults=25'.format(api_key, channel_id)

    url = first_url
    video_object_list = []
    while True:
        inp = urllib.request.urlopen(url)
        resp = json.load(inp)

        for i in resp['items']:
            if i['id']['kind'] == "youtube#video":
                video_id = i['id']['videoId']
                video_title = i['snippet']['title']

                video_object = {
                    'video_title': video_title,
                    'video_id': video_id
                }

                video_object_list.append(video_object)
        try:
            next_page_token = resp['nextPageToken']
            url = first_url + '&pageToken={}'.format(next_page_token)
        except:
            break
    return video_object_list

In [None]:
def get_all_comment_objects(video_object_list):
    new_video_object_list = []
    
    for video in video_object_list:
        os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
        video_id = video['video_id']

        api_service_name = 'youtube'
        api_version = 'v3'
        DEVELOPER_KEY = api_key

        youtube = googleapiclient.discovery.build(
            api_service_name,
            api_version,
            developerKey=DEVELOPER_KEY
        )

        request = youtube.commentThreads().list(
            part='snippet',
            order='relevance',
            videoId=video_id
        )

        response = request.execute()

        comment_object_list = []
        while True:
            stop = False

            try:
                next_page_token = response['nextPageToken']
            except:
                stop = True
                pass

            detailed_comment_list = response['items']
            for comment_details in detailed_comment_list:
                comment = comment_details['snippet']['topLevelComment']['snippet']['textDisplay']
                like_count = comment_details['snippet']['topLevelComment']['snippet']['likeCount']

                comment_object = {
                    'comment': comment,
                    'comment_id': comment_details['snippet']['topLevelComment']['id'],
                    'like_count': like_count
                }

                comment_object_list.append(comment_object)

            if stop:
                break
            else:
                request = youtube.commentThreads().list(
                    part='snippet',
                    order='relevance',
                    pageToken=next_page_token,
                    videoId=video_id
                )
                response = request.execute()

        for comment in comment_object_list:
            new_video_object = {
                'video_title': video['video_title'],
                'video_id': video['video_id'],
                'comment_id': comment['comment_id'],
                'comment': comment['comment'],
                'comment_like_count': comment['like_count']
            }
            new_video_object_list.append(new_video_object)

    return new_video_object_list

In [None]:
api_key = '#enter your api key here'

In [None]:
# channel_id = 'UCArmutk8nAbYQdaYzgqKOwA'
channel_id = 'UCOGxnF5dWQtoEz6E-msabZA'

In [None]:
video_list = get_all_video_objects(channel_id)
for video in video_list:
    print(video)

{'video_title': 'Kaori Sakamoto wins NHK Trophy, Mana Kawabe silver, Young You bronze | THAT FIGURE SKATING SHOW', 'video_id': 'Z3Ss0bD2B8c'}
{'video_title': 'World Cup Qualifying: What to expect from Canada vs. Mexico', 'video_id': '8xFk5TicFN8'}
{'video_title': 'Shoma Uno takes gold at NHK Trophy, Yuzuru Hanyu missing in Japan | THAT FIGURE SKATING SHOW', 'video_id': 'XFWDthVyKt0'}
{'video_title': 'Sinitsina &amp; Katsalapov win NHK Trophy, Chock &amp; Bates, Takahashi | THAT FIGURE SKATING SHOW', 'video_id': 'EQGbo3rGCkM'}
{'video_title': 'If So, Do So - The Journey of Olympic Bobsledder Christopher Spring | Sports Documentary', 'video_id': 'OrbLUaL3E4I'}
{'video_title': 'Behind the Scenes of World Cup Qualifying: Canada vs. Costa Rica', 'video_id': 'CLYzx5nFf8g'}
{'video_title': 'Alphonso Davies&#39;s magical goal has filled Canadian soccer fans with a new belief', 'video_id': 'QwixhZOh4dA'}
{'video_title': 'Kingston Frontenacs’ Shane Wright on OHL return to play, Team Canada, and 

In [None]:
combined_list = get_all_comment_objects(video_list)
for x in combined_list:
    print(x)

{'video_title': 'Kaori Sakamoto wins NHK Trophy, Mana Kawabe silver, Young You bronze | THAT FIGURE SKATING SHOW', 'video_id': 'Z3Ss0bD2B8c', 'comment_id': 'Ugwv_uZozXHCgj1zN-l4AaABAg', 'comment': 'If a skater receives a ! or an e on a jump [like Kaori gets on her lutz] can they still receive positive GOE on it? Or does the +GOE go away?', 'comment_like_count': 0}
{'video_title': 'Kaori Sakamoto wins NHK Trophy, Mana Kawabe silver, Young You bronze | THAT FIGURE SKATING SHOW', 'video_id': 'Z3Ss0bD2B8c', 'comment_id': 'Ugxbjh7VDK176jFRLcd4AaABAg', 'comment': 'Really hope all who are injured heal well and are healthy.', 'comment_like_count': 0}
{'video_title': 'Kaori Sakamoto wins NHK Trophy, Mana Kawabe silver, Young You bronze | THAT FIGURE SKATING SHOW', 'video_id': 'Z3Ss0bD2B8c', 'comment_id': 'UgwHouLhKYPjhceDTiZ4AaABAg', 'comment': 'У Каори Сакомото заслуженное золото. Она не односезонная, сколько лет она борется с односезонками.', 'comment_like_count': 1}
{'video_title': 'World Cu

{'video_title': 'Songs To NEVER Skate To Again ft. Moulin Rouge, Romeo &amp; Juliet, Swan Lake', 'video_id': 'Ax7dQhtX2C0', 'comment_id': 'Ugy1g-jSxI0z4cSas254AaABAg', 'comment': 'Swan Lake , Phantom of the  Opera, Turandot, Romeo and Juliet, etc....', 'comment_like_count': 9}
{'video_title': 'Songs To NEVER Skate To Again ft. Moulin Rouge, Romeo &amp; Juliet, Swan Lake', 'video_id': 'Ax7dQhtX2C0', 'comment_id': 'Ugw94zk4-gnawFW1rHt4AaABAg', 'comment': 'My mind is completely blanking on the name but there is one piece of music that russian skater in all diciplines have been skating to lately.  It needs to stop. I&#39;m also over, Je Suie Malade, Celine Dion, Muse, anything by angsty female pop songs, Buble, and The Prayer', 'comment_like_count': 3}
{'video_title': 'Songs To NEVER Skate To Again ft. Moulin Rouge, Romeo &amp; Juliet, Swan Lake', 'video_id': 'Ax7dQhtX2C0', 'comment_id': 'Ugyq_8YjiiTyGofh5Jx4AaABAg', 'comment': 'Don&#39;t get me started.  Women&#39;s SP at Nationals this y

{'video_title': 'Nadia Comaneci Reflects on her Perfect 10, 41 Years Later', 'video_id': 'huXxqrC6WDw', 'comment_id': 'UgzQXDP7rcBT5k_vnN94AaABAg', 'comment': 'She is the greatest Gymnastic ever.', 'comment_like_count': 1}
{'video_title': 'Nadia Comaneci Reflects on her Perfect 10, 41 Years Later', 'video_id': 'huXxqrC6WDw', 'comment_id': 'Ugxra0H1MRWJU0FtclB4AaABAg', 'comment': 'Totally awsome,aboslutely perfect.', 'comment_like_count': 0}
{'video_title': 'Nadia Comaneci Reflects on her Perfect 10, 41 Years Later', 'video_id': 'huXxqrC6WDw', 'comment_id': 'UgzbKv7E5Z7lYq5xphJ4AaABAg', 'comment': 'Oh wow, i remember watching her from tv back then', 'comment_like_count': 0}
{'video_title': 'Nadia Comaneci Reflects on her Perfect 10, 41 Years Later', 'video_id': 'huXxqrC6WDw', 'comment_id': 'Ugzb7fMSyTyEvD1yvyF4AaABAg', 'comment': 'One word...legend', 'comment_like_count': 1}
{'video_title': 'Nadia Comaneci Reflects on her Perfect 10, 41 Years Later', 'video_id': 'huXxqrC6WDw', 'comment_

In [None]:
video_title = []
video_id = []
comment_id =[]
comment = []
comment_like_count = []
for x in combined_list:
    video_title.append(x['video_title'])
    video_id.append(x['video_id'])
    comment_id.append(x['comment_id'])
    comment.append(x['comment'])
    comment_like_count.append(x['comment_like_count'])
    
output_dict = {
    'video_title': video_title,
    'video_id': video_id,
    'comment_id': comment_id,
    'comment': comment,
    'comment_like_count': comment_like_count
}

In [None]:
output_df = pd.DataFrame(output_dict, columns=output_dict.keys())
output_df

Unnamed: 0,video_title,video_id,comment_id,comment,comment_like_count
0,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,If a skater receives a ! or an e on a jump [li...,0
1,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,Really hope all who are injured heal well and ...,0
2,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,UgwHouLhKYPjhceDTiZ4AaABAg,У Каори Сакомото заслуженное золото. Она не од...,1
3,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,It&#39;s going to be -10 or lower for Tuesday ...,13
4,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,-16 at game time! We need to take advantage of...,13
...,...,...,...,...,...
8870,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,Great to see that Ontario residents are making...,0
8871,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,I wonder if it would be faster to have a 3-d p...,0
8872,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Uggg0xfiIAMfEngCoAEC,"Like can&#39;t you see I am taking a nap, like...",2
8873,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UggC1z2HQBpkLHgCoAEC,2nd,0


In [None]:
data = output_df

In [None]:
# Removing Emojis
data['video_title'] = data['video_title'].apply(lambda x: demoji.replace(x, ''))
data['comment'] = data['comment'].apply(lambda x: demoji.replace(x, ''))
data

Unnamed: 0,video_title,video_id,comment_id,comment,comment_like_count
0,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,If a skater receives a ! or an e on a jump [li...,0
1,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,Really hope all who are injured heal well and ...,0
2,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,UgwHouLhKYPjhceDTiZ4AaABAg,У Каори Сакомото заслуженное золото. Она не од...,1
3,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,It&#39;s going to be -10 or lower for Tuesday ...,13
4,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,-16 at game time! We need to take advantage of...,13
...,...,...,...,...,...
8870,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,Great to see that Ontario residents are making...,0
8871,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,I wonder if it would be faster to have a 3-d p...,0
8872,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Uggg0xfiIAMfEngCoAEC,"Like can&#39;t you see I am taking a nap, like...",2
8873,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UggC1z2HQBpkLHgCoAEC,2nd,0


In [None]:
# Language Detection
def try_detect(comment):
    language = ''
    try:
        language = detect(comment)
    except:
        language = 'error'
    return language

data['language'] = data['comment'].apply(lambda x: try_detect(x))
data

Unnamed: 0,video_title,video_id,comment_id,comment,comment_like_count,language
0,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,If a skater receives a ! or an e on a jump [li...,0,en
1,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,Really hope all who are injured heal well and ...,0,en
2,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,UgwHouLhKYPjhceDTiZ4AaABAg,У Каори Сакомото заслуженное золото. Она не од...,1,ru
3,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,It&#39;s going to be -10 or lower for Tuesday ...,13,en
4,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,-16 at game time! We need to take advantage of...,13,en
...,...,...,...,...,...,...
8870,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,Great to see that Ontario residents are making...,0,en
8871,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,I wonder if it would be faster to have a 3-d p...,0,en
8872,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Uggg0xfiIAMfEngCoAEC,"Like can&#39;t you see I am taking a nap, like...",2,en
8873,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UggC1z2HQBpkLHgCoAEC,2nd,0,sq


In [None]:
# Extracting the english comments only
data = data[data['language'] == 'en']
data = data.drop(['language'], axis=1)
data

Unnamed: 0,video_title,video_id,comment_id,comment,comment_like_count
0,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,If a skater receives a ! or an e on a jump [li...,0
1,"Kaori Sakamoto wins NHK Trophy, Mana Kawabe si...",Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,Really hope all who are injured heal well and ...,0
3,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,It&#39;s going to be -10 or lower for Tuesday ...,13
4,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,-16 at game time! We need to take advantage of...,13
5,World Cup Qualifying: What to expect from Cana...,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,Pitch is for ️ even though the fans are grea...,3
...,...,...,...,...,...
8868,Mark McMorris&#39;s next stop: a 2018 Olympics...,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,i had a seizure from this,7
8869,Mark McMorris&#39;s next stop: a 2018 Olympics...,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,the video effects have ruined this video,3
8870,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,Great to see that Ontario residents are making...,0
8871,A Tour of the Mattamy National Cycling Centre ...,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,I wonder if it would be faster to have a 3-d p...,0


In [None]:
# Removing Special Characters
regex = r'[^0-9A-Za-z\t]'

comment = data.loc[:, 'comment']
comment = comment.apply(lambda x: re.sub(regex, ' ', x))

video_title = data.loc[:, 'video_title']
video_title = video_title.apply(lambda x: re.sub(regex, ' ', x))

data = data.drop(['comment'], axis=1)
data = data.drop(['video_title'], axis=1)

data['comment'] = comment
data['video_title'] = video_title

data

Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,If a skater receives a or an e on a jump li...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,Really hope all who are injured heal well and ...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,It 39 s going to be 10 or lower for Tuesday ...,World Cup Qualifying What to expect from Cana...
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 at game time We need to take advantage of...,World Cup Qualifying What to expect from Cana...
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,Pitch is for even though the fans are grea...,World Cup Qualifying What to expect from Cana...
...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,i had a seizure from this,Mark McMorris 39 s next stop a 2018 Olympics...
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,the video effects have ruined this video,Mark McMorris 39 s next stop a 2018 Olympics...
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,Great to see that Ontario residents are making...,A Tour of the Mattamy National Cycling Centre ...
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,I wonder if it would be faster to have a 3 d p...,A Tour of the Mattamy National Cycling Centre ...


In [None]:
data['polarity'] = data['comment'].apply(lambda x: TextBlob(x).sentiment.polarity)
data

Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title,polarity
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,If a skater receives a or an e on a jump li...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...,0.227273
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,Really hope all who are injured heal well and ...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...,0.350000
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,It 39 s going to be 10 or lower for Tuesday ...,World Cup Qualifying What to expect from Cana...,0.000000
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 at game time We need to take advantage of...,World Cup Qualifying What to expect from Cana...,-0.400000
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,Pitch is for even though the fans are grea...,World Cup Qualifying What to expect from Cana...,0.650000
...,...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,i had a seizure from this,Mark McMorris 39 s next stop a 2018 Olympics...,0.000000
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,the video effects have ruined this video,Mark McMorris 39 s next stop a 2018 Olympics...,0.000000
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,Great to see that Ontario residents are making...,A Tour of the Mattamy National Cycling Centre ...,0.533333
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,I wonder if it would be faster to have a 3 d p...,A Tour of the Mattamy National Cycling Centre ...,0.000000


In [None]:
data['polarity_category'] = data['polarity'].apply(lambda x: 1 if x >= 0 else -1)
data

Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title,polarity,polarity_category
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,If a skater receives a or an e on a jump li...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...,0.227273,1
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,Really hope all who are injured heal well and ...,Kaori Sakamoto wins NHK Trophy Mana Kawabe si...,0.350000,1
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,It 39 s going to be 10 or lower for Tuesday ...,World Cup Qualifying What to expect from Cana...,0.000000,1
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 at game time We need to take advantage of...,World Cup Qualifying What to expect from Cana...,-0.400000,-1
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,Pitch is for even though the fans are grea...,World Cup Qualifying What to expect from Cana...,0.650000,1
...,...,...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,i had a seizure from this,Mark McMorris 39 s next stop a 2018 Olympics...,0.000000,1
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,the video effects have ruined this video,Mark McMorris 39 s next stop a 2018 Olympics...,0.000000,1
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,Great to see that Ontario residents are making...,A Tour of the Mattamy National Cycling Centre ...,0.533333,1
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,I wonder if it would be faster to have a 3 d p...,A Tour of the Mattamy National Cycling Centre ...,0.000000,1


In [None]:
# Lower Case conversion of comments
data['video_title'] = data['video_title'].str.lower()
data['comment'] = data['comment'].str.lower()
data

Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title,polarity,polarity_category
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,if a skater receives a or an e on a jump li...,kaori sakamoto wins nhk trophy mana kawabe si...,0.227273,1
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,really hope all who are injured heal well and ...,kaori sakamoto wins nhk trophy mana kawabe si...,0.350000,1
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,it 39 s going to be 10 or lower for tuesday ...,world cup qualifying what to expect from cana...,0.000000,1
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 at game time we need to take advantage of...,world cup qualifying what to expect from cana...,-0.400000,-1
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,pitch is for even though the fans are grea...,world cup qualifying what to expect from cana...,0.650000,1
...,...,...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,i had a seizure from this,mark mcmorris 39 s next stop a 2018 olympics...,0.000000,1
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,the video effects have ruined this video,mark mcmorris 39 s next stop a 2018 olympics...,0.000000,1
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,great to see that ontario residents are making...,a tour of the mattamy national cycling centre ...,0.533333,1
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,i wonder if it would be faster to have a 3 d p...,a tour of the mattamy national cycling centre ...,0.000000,1


In [None]:
# Stripping trailing spaces
data['video_title'] = data['video_title'].str.strip()
data['comment'] = data['comment'].str.strip()
data

Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title,polarity,polarity_category
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,if a skater receives a or an e on a jump li...,kaori sakamoto wins nhk trophy mana kawabe si...,0.227273,1
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,really hope all who are injured heal well and ...,kaori sakamoto wins nhk trophy mana kawabe si...,0.350000,1
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,it 39 s going to be 10 or lower for tuesday ...,world cup qualifying what to expect from cana...,0.000000,1
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 at game time we need to take advantage of ...,world cup qualifying what to expect from cana...,-0.400000,-1
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,pitch is for even though the fans are grea...,world cup qualifying what to expect from cana...,0.650000,1
...,...,...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,i had a seizure from this,mark mcmorris 39 s next stop a 2018 olympics...,0.000000,1
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,the video effects have ruined this video,mark mcmorris 39 s next stop a 2018 olympics...,0.000000,1
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,great to see that ontario residents are making...,a tour of the mattamy national cycling centre ...,0.533333,1
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,i wonder if it would be faster to have a 3 d p...,a tour of the mattamy national cycling centre ...,0.000000,1


In [None]:
# Removing stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Jaideep\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
def remove_stopwords(line):
    word_tokens = word_tokenize(line)
    filtered_sentence = [w for w in word_tokens if not w in stop_words]
    return ' '.join(filtered_sentence)

In [None]:
nltk.download('punkt')
data['video_title'] = data['video_title'].apply(lambda x: remove_stopwords(x))
data['comment'] = data['comment'].apply(lambda x: remove_stopwords(x))
data

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Jaideep\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


Unnamed: 0,video_id,comment_id,comment_like_count,comment,video_title,polarity,polarity_category
0,Z3Ss0bD2B8c,Ugwv_uZozXHCgj1zN-l4AaABAg,0,skater receives e jump like kaori gets lutz st...,kaori sakamoto wins nhk trophy mana kawabe sil...,0.227273,1
1,Z3Ss0bD2B8c,Ugxbjh7VDK176jFRLcd4AaABAg,0,really hope injured heal well healthy,kaori sakamoto wins nhk trophy mana kawabe sil...,0.350000,1
3,8xFk5TicFN8,UgwUOQdkZ937AYHyzy14AaABAg,13,39 going 10 lower tuesday night hope players f...,world cup qualifying expect canada vs mexico,0.000000,1
4,8xFk5TicFN8,UgxBR0QU7ginLxzF4hx4AaABAg,13,16 game time need take advantage mexico strugg...,world cup qualifying expect canada vs mexico,-0.400000,-1
5,8xFk5TicFN8,UgxNGWxaImsEzo2IdA54AaABAg,3,pitch even though fans great hopefully injurie...,world cup qualifying expect canada vs mexico,0.650000,1
...,...,...,...,...,...,...,...
8868,fIaWgbHdwIU,UghXMHuU8pmGu3gCoAEC,7,seizure,mark mcmorris 39 next stop 2018 olympics test ...,0.000000,1
8869,fIaWgbHdwIU,Uggd8cxZW0BmrngCoAEC,3,video effects ruined video,mark mcmorris 39 next stop 2018 olympics test ...,0.000000,1
8870,LliIdJVgVAA,UgzQg6--OmyOAzgdf9V4AaABAg,0,great see ontario residents making great use v...,tour mattamy national cycling centre milton,0.533333,1
8871,LliIdJVgVAA,Ugiiv0lPeCzEh3gCoAEC,0,wonder would faster 3 printed bike,tour mattamy national cycling centre milton,0.000000,1


In [None]:
data.to_csv('comments.csv', index=False)