In [100]:
import pandas as pd
import numpy as np
import os
import requests
import json
import googleapiclient.discovery
import altair as alt

In [101]:
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

## Data acquisition

#### Getting a list with videos

List of videos using the YouTube Data API [YouTube Data API](https://tools.digitalmethods.net/netvizz/youtube/mod_videos_list.php)

Querying for the terms: `Global warming`, `Climate change`, `Paris agreement`, `Climate realism`.

#### Getting all comments (including replies) to all videos in the list

Get all comments to a video using the [CommentThreads method of YouTube Developer API](https://developers.google.com/youtube/v3/docs/commentThreads/list)

The API documentation of CommentsThread states that it might not contain all replies: 

>A commentThread resource contains information about a YouTube comment thread, which comprises a top-level comment and replies, if any exist, to that comment. A commentThread resource can represent comments about either a video or a channel.

>Both the top-level comment and the replies are actually comment resources nested inside the commentThread resource. The commentThread resource does not necessarily contain all replies to a comment, and you need to use the comments.list method if you want to retrieve all replies for a particular comment. Also note that some comments do not have replies.

Therefore we use the [Coments list method](https://developers.google.com/youtube/v3/docs/commentThreads/list) to get all replies to a comment. 

In [102]:
API_KEY = 'AIzaSyCo58wzF-1eZXXTvb71cUJlzBJ2a9Dt3ms'

#### List of vidoes containing the term `Paris agreement`

In [103]:
data_path = 'data_raw/videolist_search50_2021_01_19-13_55_33.tab'

In [104]:
def load_videos(data_path, min_comments_count = 3):
    videos = pd.read_csv(data_path, sep='\t',header=(0))
    #remove entries where commentCount is None
    videos = videos.dropna(how='all', subset=['commentCount'])
    #remove videos where comments count is lesser then minimum
    videos.drop(videos[videos['commentCount'] < min_comments_count].index, inplace = True)
    videos.sort_values(['commentCount'], ascending=[False])  
    return videos

In [105]:
videos = load_videos(data_path)

In [106]:
print(len(videos))
print(len(videos[videos.commentCount.isna()]))
print(np.min(videos.commentCount))

40
0
3.0


We need to find more videos with more than a minimum number of comments. E.g. 2 comments.

In [108]:
print(f"We have in total {np.sum(videos['commentCount'])} from comments distributed in {len(videos)} for videos containing the term Paris Agreement.")
print(f"Mean commen count: {np.mean(videos['commentCount'])} Median: {np.median(videos['commentCount'])}")

We have in total 33282.0 from comments distributed in 40 for videos containing the term Paris Agreement.
Mean commen count: 832.05 Median: 115.5


In [51]:
video_id = 'MIA_1xQc7x8'

In [52]:
#TODO: Concat video lists remove duplicates based on videoId
#TODO: find number of users commented multiple videos

#### Class to load all comments of a video 

In [53]:
class Video_comments:
    
    def __init__(self, api_key):
        self.api_key  = api_key
        #self.video_id = video_id
        self.max_results = 100
        
        #TODO: Include ThreadId
        comments_df = pd.DataFrame({
                            'id':[],
                            'threadId':[],
                            'published_at': [], 
                            'author_name': [], 
                            'text': [],
                            'is_reply': [],
                            'likeCount': [],
                            'cleaned': [],
                            'video_id': [],
                            'video_published_at': []}, 
                            columns = [ 'id',
                                        'threadId',
                                        'published_at', 
                                        'author_name', 
                                        'text', 
                                        'likeCount',
                                        'is_reply', 
                                        'cleaned', 
                                        'video_id', 
                                        'video_published_at'])
        
        self.comments_df = comments_df
        
    
    '''load all replies of top level comments and append dataframe witth all top level comments and replies. 
    (appendingt to df and loading replies should be devided to different methods.)'''
    def _add_to_dataframe(self, response):
        for i, main_comment in enumerate(response['items']):
            comment = main_comment['snippet']['topLevelComment']['snippet']

            new_row = pd.Series(data={
                                    'id':main_comment['snippet']['topLevelComment']['id'],
                                    'threadId':main_comment['snippet']['topLevelComment']['id'],
                                    'published_at':comment['publishedAt'] , 
                                    'author_name': comment['authorDisplayName'], 
                                    'text': comment['textOriginal'],
                                    'likeCount':comment['likeCount'],
                                    'is_reply': 0,
                                    'video_id': comment['videoId']})

            self.comments_df = self.comments_df.append(new_row, ignore_index=True)

            
            #check if the top level comment has replies. If yey then get these too and add to df
            request_replies = requests.get(f"https://youtube.googleapis.com/youtube/v3/comments?part=snippet&parentId={main_comment['snippet']['topLevelComment']['id']}&key={self.api_key}")
            response_replies = json.loads(request_replies.text)
        
            #if response_replies['items'] > 0 then the main comment has replies
            if(len(response_replies['items']) > 0):
                for i, main_reply in enumerate(response_replies['items']):      
                    reply = main_reply['snippet']

                    new_row = pd.Series(data={
                                            'id':reply['parentId'],
                                            'threadId':main_comment['snippet']['topLevelComment']['id'],
                                            'published_at':reply['publishedAt'] , 
                                            'author_name': reply['authorDisplayName'], 
                                            'text': reply['textOriginal'],
                                            'likeCount':reply['likeCount'],
                                            'is_reply': 1,
                                            'video_id': comment['videoId']})

                    self.comments_df = self.comments_df.append(new_row, ignore_index=True)
                    
    
    '''Load (and append comments dataframe) recursively comments from next page until there are no next page. '''
    def _get_next_page(self, response):     
        request1 = requests.get(f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&maxResults={self.max_results}&pageToken={str(response['nextPageToken'])}&videoId={self.video_id}&key={self.api_key}")
        response1 = json.loads(request1.text)
        self._add_to_dataframe(response1)

        
        if ('nextPageToken' in response1.keys()):
            self._get_next_page(response1)
    
    '''Start loading comments. Paginated.'''
    def get_comments(self, video_id):  
        video_id = video_id
        request = requests.get(f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&maxResults={self.max_results}&videoId={self.video_id}&key={self.api_key}")
        response = json.loads(request.text)     
        
        self._add_to_dataframe(response)
        
        if 'nextPageToken' in response.keys():
            self._get_next_page(response)
        
        return self.comments_df

In [54]:
vid_comments = Video_comments(API_KEY)

In [55]:
comments_df = vid_comments.get_comments(video_id)

In [56]:
comments_df.shape

(555, 10)

In [None]:
'''
totalVideoCount = videos.shape[0]
counter = 1

for i, video in videos.iterrows():
    if(counter > 10):
        print('video: ',counter,' of ',totalVideoCount,' # of comments: ',video.commentCount)
        comments_df = vid_comments.get_comments(video.videoId)
        all_comments_df = pd.concat([all_comments_df, comments_df], axis=0)
        print(all_comments_df.shape)
    counter+=1    
'''

In [57]:
comments_df

Unnamed: 0,id,threadId,published_at,author_name,text,likeCount,is_reply,cleaned,video_id,video_published_at
0,UgwMMvtqhUAJAmkMmit4AaABAg,UgwMMvtqhUAJAmkMmit4AaABAg,2021-01-31T12:55:40Z,BOSSMATE,Lol your never de Carbon the planet with solar power without using fossil fuels lol,1.0,0.0,,MIA_1xQc7x8,
1,UgyrcDXosYTdYJrfEC54AaABAg,UgyrcDXosYTdYJrfEC54AaABAg,2021-01-31T05:16:09Z,Jasmeet Singh,"Why don't u guys understand its step 1, there are lot of issues and problems. But if u are concerned ? Work for its resolution.",0.0,0.0,,MIA_1xQc7x8,
2,Ugyobbm-z4DSP2OYs_54AaABAg,Ugyobbm-z4DSP2OYs_54AaABAg,2021-01-31T00:39:44Z,libation14221,What a fraud. No enforcement provision.,1.0,0.0,,MIA_1xQc7x8,
3,Ugy7ZAu6YBx1wK6pU9N4AaABAg,Ugy7ZAu6YBx1wK6pU9N4AaABAg,2021-01-29T21:42:37Z,Ric Di clemente,CCP CHINA IS A FRIEND TOO NOBODY ! THEIR ONLY AGENDA IS TO CONTROL; DOMINATE & TAKEOVER THE WORLD !!! BELIEVE ?,0.0,0.0,,MIA_1xQc7x8,
4,UgzvfKRzwQ4h3keC2Od4AaABAg,UgzvfKRzwQ4h3keC2Od4AaABAg,2021-01-29T21:37:09Z,Ric Di clemente,FACT FOR ALL OF SPACE CLIMATE CHANGE IS AS NATURAL AS THE RAIN ? \nThe Paris Accord ; ONLY BENEFITS CCP CHINA !,0.0,0.0,,MIA_1xQc7x8,
5,UgztjIGTJWm6WnNNGpx4AaABAg,UgztjIGTJWm6WnNNGpx4AaABAg,2021-01-29T02:23:57Z,Agri-Life Organic Farm,Bla Bla Bla Bla; Im a third of the way through this video and all I've heard is verbal diarrhea. Cut to the chase; the Paris Climate Accord is a fraud. The US was the only country to commit to reducing carbon emissions. All the other countries said they would in the future but now and in the ten years to come they could keep increasing carbon emissions and then they would only keep immission at their levels in ten year. I'm sure that fool we know have in the white house doesn't even know what is in the Paris Climate Accord.,0.0,0.0,,MIA_1xQc7x8,
6,Ugze93ouUvmazA-AP7N4AaABAg,Ugze93ouUvmazA-AP7N4AaABAg,2021-01-28T17:29:51Z,buckhaa1502,Climate had always been changing and will continue to do that regardless if humans are here or not...trillions of dollars can be spent trying to CHANGE the climate and we will still does very very little if at all in affecting its outcome.. best to spent that effort and money on REAL issues in our society,0.0,0.0,,MIA_1xQc7x8,
7,UgxivsLSnskclydg-eh4AaABAg,UgxivsLSnskclydg-eh4AaABAg,2021-01-27T09:07:38Z,daskippa lawson,"This is what the Great Reset looks like. Put your mask on and get your Covid 19 shot. Shut up and be happy World, Welcome to The New World Order.",2.0,0.0,,MIA_1xQc7x8,
8,Ugyyx-7RqK4Jo33dsuR4AaABAg,Ugyyx-7RqK4Jo33dsuR4AaABAg,2021-01-26T23:34:35Z,M locascio,And it's now 2021. And?,0.0,0.0,,MIA_1xQc7x8,
9,UgytiBlIiyL0TTOi8_94AaABAg,UgytiBlIiyL0TTOi8_94AaABAg,2021-01-24T03:17:25Z,John Brandt,Really ? Were you called in to fix it? Ego v,0.0,0.0,,MIA_1xQc7x8,
