In [172]:
import pandas as pd
import numpy as np
import os
import requests
import json
import googleapiclient.discovery

In [173]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## Data acquisition

#### Getting a list with videos

List of videos using the YouTube Data API [YouTube Data API](https://tools.digitalmethods.net/netvizz/youtube/mod_videos_list.php)

Querying for the terms: `Global warming`, `Climate change`, `Paris agreement`, `Climate realism`

#### Getting all comments (including replies) to all videos in the list

Get all comments to a video using the [CommentThreads method of YouTube Developer API](https://developers.google.com/youtube/v3/docs/commentThreads/list)

The API documentation of CommentsThread states that it might not contain all replies: 

>A commentThread resource contains information about a YouTube comment thread, which comprises a top-level comment and replies, if any exist, to that comment. A commentThread resource can represent comments about either a video or a channel.

>Both the top-level comment and the replies are actually comment resources nested inside the commentThread resource. The commentThread resource does not necessarily contain all replies to a comment, and you need to use the comments.list method if you want to retrieve all replies for a particular comment. Also note that some comments do not have replies.

Therefore we use the [Coments list method](https://developers.google.com/youtube/v3/docs/commentThreads/list) to get all replies to a comment. 

In [174]:
API_KEY = 'AIzaSyCL_cDtrC6z28mDmas0FtFWBGhdCMnnje0'

In [175]:
data_path = 'data_raw/videolist_search50_2021_01_19-13_55_33.tab'

In [191]:
videos = pd.read_csv(data_path,sep='\t',header=(0))
videos.head()

Unnamed: 0,position,channelId,channelTitle,videoId,publishedAt,publishedAtSQL,videoTitle,videoDescription,tags,videoCategoryId,videoCategoryLabel,duration,durationSec,dimension,definition,caption,thumbnail_maxres,licensedContent,viewCount,likeCount,dislikeCount,favoriteCount,commentCount
0,1,UC3XTzVzaHQEd30rQbuvCtTQ,LastWeekTonight,5scez5dqtAc,2017-06-05T06:30:00Z,2017-06-05 06:30:00,Paris Agreement: Last Week Tonight with John Oliver (HBO),Donald Trump plans to withdraw the United States from the Paris agreement on climate change. That's bad news for anyone who happens to live on this planet. Connect with Last Week Tonight online... Subscribe to the Last Week Tonight YouTube channel for more almost news as it almost happens: www.youtube.com/user/LastWeekTonight Find Last Week Tonight on Facebook like your mom would: http://Facebook.com/LastWeekTonight Follow us on Twitter for news about jokes and jokes about news: http://Twitter.com/LastWeekTonight Visit our official site for all that other stuff at once: http://www.hbo.com/lastweektonight,"last week tonight paris agreement,paris accord,john oliver paris agreement",24,Entertainment,PT20M58S,1258,2d,hd,False,https://i.ytimg.com/vi/5scez5dqtAc/maxresdefault.jpg,1.0,13021725,176853,12626,0,13747.0
1,2,UCuLEr-GWiwCBB6zBDX3elOQ,UN Climate Change,WiGD0OgK2ug,2020-09-24T16:19:23Z,2020-09-24 16:19:23,"Ever wondered: What is the 'Paris Agreement', and how does it work?","This video explains the basics of the Paris Agreement: the international UN treaty of 2015 that aims to tackle climate change head on. It's central aim is to strengthen the global response to the threat of climate change by keeping a global temperature rise this century well below 2 degrees Celsius above pre-industrial levels, with ambition to limit the increase even further to 1.5 degrees Celsius. Additionally, the agreement aims to strengthen the ability of countries to deal with the impacts of climate change.","Paris agreement,unfccc,United Nations climate change,climate change,Climate ambition,UN treaty,United Nations",25,News & Politics,PT1M40S,100,2d,hd,False,https://i.ytimg.com/vi/WiGD0OgK2ug/maxresdefault.jpg,,21031,93,5,0,6.0
2,3,UCAuUUnT6oDeKwE6v1NGQxug,TED,MIA_1xQc7x8,2016-05-11T15:15:47Z,2016-05-11 15:15:47,The inside story of the Paris climate agreement | Christiana Figueres,"What would you do if your job was to save the planet? When Christiana Figueres was tapped by the UN to lead the Paris climate conference (COP 21) in December 2015, she reacted the way many people would: she thought it would be impossible to bring the leaders of 195 countries into agreement on how to slow climate change. Find out how she turned her skepticism into optimism — and helped the world achieve the most important climate agreement in history. TEDTalks is a daily video podcast of the best talks and performances from the TED Conference, where the world's leading thinkers and doers give the talk of their lives in 18 minutes (or less). Look for talks on Technology, Entertainment and Design -- plus science, business, global issues, the arts and much more. Find closed captions and translated subtitles in many languages at http://www.ted.com/translate Follow TED news on Twitter: http://www.twitter.com/tednews Like TED on Facebook: https://www.facebook.com/TED Subscribe to our channel: http://www.youtube.com/user/TEDtalksDirector","TED Talk,TED Talks,Christiana Figueres,UNFCCC,COP 21,Paris Agreement,climate change",29,Nonprofits & Activism,PT14M51S,891,2d,hd,True,https://i.ytimg.com/vi/MIA_1xQc7x8/maxresdefault.jpg,1.0,205893,2786,635,0,537.0
3,4,UC8exuytmPBqeM6klg7zBNsQ,Mindscape,1DdfNU5iATU,2017-06-10T21:42:52Z,2017-06-10 21:42:52,Paris agreement simplified,"Everything you need to know about Paris agreement is given here. which is easy to understand and gives at most clarity about this agreement. involvement of different countries . we have explained how actually the Paris agreement is going to work Supported by : Designed by Freepik"" Designed by starline / Freepik Designed by Brgfx / Freepik"" Designed by Graphiqastock / Freepik"" Designed by Photoroyalty / Freepik"" Designed by Ibrandify / Freepik"" Designed by brgfx / Freepik"" Designed by Frimufilms / Freepik Designed by Macrovector / Freepik"" Music by AShamaluevMusic","paris agreement Detail understanding,paris agreement USA,United states paris agreement,China paris agreement,India paris agreement,paris agreement news,paris agreement simple,what is paris agreement?,paris agreement simplified,paris agreement quick,paris agreement and its effects,paris agreement latest update,co2 intensity,paris agreement,mindscape",25,News & Politics,PT8M32S,512,2d,hd,False,https://i.ytimg.com/vi/1DdfNU5iATU/maxresdefault.jpg,1.0,113592,1678,200,0,171.0
4,5,UCrp_UI8XtuYfpiqluWLD7Lw,CNBC Television,HzLEYJE33ww,2020-10-23T02:43:37Z,2020-10-23 02:43:37,President Donald Trump defends his decision to terminate the Paris agreement on climate change,"President Donald Trump and former Vice President Joe Biden participate in the final presidential debate on Thursday at 9 p.m. ET in Nashville, Tenn., moderated by NBC News’ Kristen Welker. For access to live and exclusive video from CNBC subscribe to CNBC PRO: https://cnb.cx/2NGeIvi » Subscribe to CNBC TV: https://cnb.cx/SubscribeCNBCtelevision » Subscribe to CNBC: https://cnb.cx/SubscribeCNBC » Subscribe to CNBC Classic: https://cnb.cx/SubscribeCNBCclassic Turn to CNBC TV for the latest stock market news and analysis. From market futures to live price updates CNBC is the leader in business news worldwide. The News with Shepard Smith is CNBC’s daily news podcast providing deep, non-partisan coverage and perspective on the day’s most important stories. Available to listen by 8:30pm ET / 5:30pm PT daily beginning September 30: https://www.cnbc.com/2020/09/29/the-news-with-shepard-smith-podcast.html?__source=youtube%7Cshepsmith%7Cpodcast Connect with CNBC News Online Get the latest news: http://www.cnbc.com/ Follow CNBC on LinkedIn: https://cnb.cx/LinkedInCNBC Follow CNBC News on Facebook: https://cnb.cx/LikeCNBC Follow CNBC News on Twitter: https://cnb.cx/FollowCNBC Follow CNBC News on Instagram: https://cnb.cx/InstagramCNBC https://www.cnbc.com/select/best-credit-cards/ #CNBC #CNBCTV","Bottom Line,CNBC,business news,finance stock,stock market,news channel,news station,breaking news,us news,world news,cable,cable news,finance news,money,money tips",25,News & Politics,PT5M11S,311,2d,hd,False,,1.0,16479,173,41,0,217.0


In [177]:
comments_vid2 = pd.read_csv('data_raw/videoinfo_MIA_1xQc7x8_2021_01_20-12_13_25_comments.tab',sep='\t',header=(0))

In [178]:
len(videos[videos['commentCount'] < 50])

19

In [179]:
video_id = 'TRG2o7ISk40'
#video_id = 's2coXdufOzE'

In [180]:
api_service_name = "youtube"
api_version = "v3"
DEVELOPER_KEY = API_KEY

youtube = googleapiclient.discovery.build(
api_service_name, api_version, developerKey = DEVELOPER_KEY)

In [181]:
class Video_comments:
    
    def __init__(self, api_key, video_id):
        self.api_key = api_key
        self.video_id = video_id
        self.max_results = 100
        
        comments_df = pd.DataFrame({
                            'id':[],
                            'published_at': [], 
                            'author_name': [], 
                            'text': [],
                            'is_reply': [],
                            'likeCount': [],
                            'cleaned': [],
                            'video_id': [],
                            'video_published_at': []}, 
                            columns = [ 'id',
                                        'published_at', 
                                        'author_name', 
                                        'text', 
                                        'likeCount',
                                        'is_reply', 
                                        'cleaned', 
                                        'video_id', 
                                        'video_published_at'])
        
        self.comments_df = comments_df
        
    def _add_to_dataframe(self, response):
        for i, main_comment in enumerate(response['items']):
            comment = main_comment['snippet']['topLevelComment']['snippet']

            new_row = pd.Series(data={
                                    'id':main_comment['snippet']['topLevelComment']['id'],
                                    'published_at':comment['publishedAt'] , 
                                    'author_name': comment['authorDisplayName'], 
                                    'text': comment['textOriginal'],
                                    'likeCount':comment['likeCount'],
                                    'is_reply': 0,
                                    'video_id': comment['videoId']})

            self.comments_df = self.comments_df.append(new_row, ignore_index=True)

            
            #check if the top level comment has replies. If yey then get these too and add to df
            request_replies = youtube.comments().list(
            part="snippet",
            parentId=main_comment['snippet']['topLevelComment']['id']
            )

            response_replies = request_replies.execute()
        
            #if response_replies['items'] > 0 then the main comment has replies
            if(len(response_replies['items']) > 0):
                for i, main_comment in enumerate(response_replies['items']):      
                    reply = main_comment['snippet']

                    new_row = pd.Series(data={
                                            'id':reply['parentId'],
                                            'published_at':reply['publishedAt'] , 
                                            'author_name': reply['authorDisplayName'], 
                                            'text': reply['textOriginal'],
                                            'likeCount':reply['likeCount'],
                                            'is_reply': 1,
                                            'video_id': comment['videoId']})

                    self.comments_df = self.comments_df.append(new_row, ignore_index=True)
                    
            
    def _get_next_page(self, response):
        
        request1 = youtube.commentThreads().list(
        part="id, snippet",
        videoId=self.video_id,
        order="relevance",
        maxResults=self.max_results,
        pageToken=response['nextPageToken']
        )
        response1 = request1.execute()
        self._add_to_dataframe(response1)
        
        print('there is still a next page ',len(response1['items']))
        
        if ('nextPageToken' in response1.keys()):
            self._get_next_page(response1)
        elif(len(response1['items']) > 0):
            print('make the residual now ',len(response1['items']))
            #self._add_to_dataframe(response1)
            
    def get_comments(self):
        request = youtube.commentThreads().list(
            part="id, snippet",
            videoId=self.video_id,
            order="relevance",
            maxResults= self.max_results
        )
        response = request.execute()
        print('first page ',len(response['items']))
        self._add_to_dataframe(response)
        
        if 'nextPageToken' in response.keys():
            print('there is a next page')
            self._get_next_page(response)
        
        return self.comments_df

In [182]:
vid_comments = Video_comments(API_KEY, video_id)

In [183]:
comments_df = vid_comments.get_comments()

first page  22


In [184]:
comments_df.shape

(45, 9)

In [186]:
comments_df

Unnamed: 0,id,published_at,author_name,text,likeCount,is_reply,cleaned,video_id,video_published_at
0,UgzzJeKl8HjXJcuuZKZ4AaABAg,2020-12-12T19:12:45Z,Abhishek K,3 Librandus till now have given their attendance,16.0,0.0,,TRG2o7ISk40,
1,UgzzJeKl8HjXJcuuZKZ4AaABAg,2020-12-16T17:55:40Z,Arnav Singh,96 😂,0.0,1.0,,TRG2o7ISk40,
2,UgzzJeKl8HjXJcuuZKZ4AaABAg,2020-12-13T12:10:46Z,Kalluri Medhanand,62 🙃,0.0,1.0,,TRG2o7ISk40,
3,UgzzJeKl8HjXJcuuZKZ4AaABAg,2020-12-12T21:50:53Z,Sandipan Das,37 now 😀,0.0,1.0,,TRG2o7ISk40,
4,UgxGKoA-43vUB-dvmkt4AaABAg,2020-12-12T19:47:21Z,pintu lalu,Haha.. someone told there is no global warming and climate change.. only old age ppl are feeling it 😂😂😂,4.0,0.0,,TRG2o7ISk40,
5,Ugx6Im8BWSa1iuLSxS54AaABAg,2020-12-13T03:03:47Z,Gurdeep Dhiman,🙏🏻🙏🏻,0.0,0.0,,TRG2o7ISk40,
6,UgwLShhuIknM6_1CK4Z4AaABAg,2020-12-13T04:30:54Z,Musirhythm,2047 tak BJP ko power m rkhenge toh pkka HIndu rashtra bnn jayega,7.0,0.0,,TRG2o7ISk40,
7,UgwLShhuIknM6_1CK4Z4AaABAg,2020-12-14T04:49:13Z,Sidharth Mohapatro,😂😂😂😂,0.0,1.0,,TRG2o7ISk40,
8,Ugwq95Yjuwu1Gxh6dHd4AaABAg,2020-12-18T09:52:07Z,najeeb khalifa,Hahahaha\nWhat a joke,0.0,0.0,,TRG2o7ISk40,
9,UgwZ9Q9PflJEqYrc5HZ4AaABAg,2020-12-12T20:18:31Z,jos john,.,0.0,0.0,,TRG2o7ISk40,
