<a href="https://colab.research.google.com/github/Dan5Playground/colab/blob/master/YouTube_Comment_Dan_v2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Code for [Extracting YouTube Comments with YouTube API & Python](https://python.gotrained.com/youtube-api-extracting-comments/)


# Step 1 : Project Setup
Follow the instructions in  [Extracting YouTube Comments with YouTube API & Python](https://python.gotrained.com/youtube-api-extracting-comments/) to obatin  authorization credentials to make API calls

In [0]:
# upload the credential file 

In [0]:
from google.colab import files
uploaded = files.upload()

Saving client_secret.json to client_secret.json


In [0]:
# check the uploaded file, make sure client_secret.json exits
! ls

client_secret.json  sample_data


# Step 2 : Code to download the comments 

Note:

- Keywords-based video search 
- only the first 3 pages of videos are used and some of them may chose to close comments. More pages could be used by changing  `max_pages`
- saved data includes 'Video ID', 'Title', 'Comment'. 'Comment' only has top-level comments

In [0]:
# Client Installation 
#  setup the credentials to access the API, you need to install the Google API client library. 
! pip install google-api-python-client --quiet
#  install additional libraries which will handle authentication
! pip install google-auth google-auth-oauthlib google-auth-httplib2 --quiet

In [0]:
import csv
import os
import pickle
import pdb
import google.oauth2.credentials

from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
from google_auth_oauthlib.flow import InstalledAppFlow
from google.auth.transport.requests import Request

In [0]:
#  defined the scope

# The CLIENT_SECRETS_FILE variable specifies the name of a file that contains
# the OAuth 2.0 information for this application, including its client_id and
# client_secret.
CLIENT_SECRETS_FILE = "client_secret.json"

# This OAuth 2.0 access scope allows for full read/write access to the
# authenticated user's account and requires requests to use an SSL connection.
SCOPES = ['https://www.googleapis.com/auth/youtube.force-ssl']
API_SERVICE_NAME = 'youtube'
API_VERSION = 'v3'

In [0]:
# build a service that will be responsible for interacting with the API.  
def get_authenticated_service():
    credentials = None
    if os.path.exists('token.pickle'):
        with open('token.pickle', 'rb') as token:
            credentials = pickle.load(token)
    #  Check if the credentials are invalid or do not exist
    if not credentials or not credentials.valid:
        # Check if the credentials have expired
        if credentials and credentials.expired and credentials.refresh_token:
            credentials.refresh(Request())
        else:
            flow = InstalledAppFlow.from_client_secrets_file(
                CLIENT_SECRETS_FILE, SCOPES)
            credentials = flow.run_console()

        # Save the credentials for the next run
        with open('token.pickle', 'wb') as token:
            pickle.dump(credentials, token)

    return build(API_SERVICE_NAME, API_VERSION, credentials = credentials)


def get_video_comments(service, **kwargs):
    comments = []
    results = None 
    try:
      results = service.commentThreads().list(**kwargs).execute()
    except Exception as e:
      # add exception to deal with closed comments 
      print (str(e))

    while results:
        for item in results['items']:
            comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
            comments.append(comment)

        # Check if another page exists
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.commentThreads().list(**kwargs).execute()
        else:
            break

    return comments


def write_to_csv(comments, keyword = 'None'):
    with open('comments_'+ keyword.replace(" ", "_") + '.csv', 'w') as comments_file:
        comments_writer = csv.writer(comments_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
        comments_writer.writerow(['Video ID', 'Title', 'Comment'])
        for row in comments:
            # convert the tuple to a list and write to the output file
            comments_writer.writerow(list(row))


def get_videos(service, **kwargs):
    final_results = []
    results = service.search().list(**kwargs).execute()

    i = 0
    max_pages = 3
    while results and i < max_pages:
        final_results.extend(results['items'])

        # Check if another page exists
        if 'nextPageToken' in results:
            kwargs['pageToken'] = results['nextPageToken']
            results = service.search().list(**kwargs).execute()
            i += 1
        else:
            break

    return final_results


def search_videos_by_keyword(service, **kwargs):
    results = get_videos(service, **kwargs)
    keyword = kwargs['q']
    print (keyword)
    final_result = []
    for item in results:
        title = item['snippet']['title']
        video_id = item['id']['videoId']
        comments = get_video_comments(service, part='snippet', videoId=video_id, textFormat='plainText')
        # make a tuple consisting of the video id, title, comment and add the result to 
        # the final list
        final_result.extend([(video_id, title, comment) for comment in comments]) 

    write_to_csv(final_result, keyword)



In [0]:
# Main():
# When running locally, disable OAuthlib's HTTPs verification. When
# running in production *do not* leave this option enabled.
# os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1'
service = get_authenticated_service()
keyword = input('Enter a keyword: ')
search_videos_by_keyword(service, q=keyword, part='id,snippet', eventType='completed', type='video')


Enter a keyword: climate change
climate change
<HttpError 403 when requesting https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=_r9Xdb-mRxk&textFormat=plainText&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">
<HttpError 403 when requesting https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=l3guUTgoDOQ&textFormat=plainText&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">
<HttpError 403 when requesting https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId=rYoNKq_aFyM&textFormat=plainText&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.">


In [0]:
# download the comments
files.download('comments_'+ keyword.replace(" ", "_") + '.csv') 

In [0]:
keyword

# Step 3 : Basic Analysis 

In [0]:
import pandas as pd 
import numpy as np


In [0]:
data = pd.read_csv('comments_'+ keyword.replace(" ", "_") + '.csv')


In [0]:
print ('query key word: ', keyword)
print ('total number of comments: ', data.shape[0])
print ('number of unique videos: ', len(data['Video ID'].unique()))


query key word:  coming out
total number of comments:  888
number of unique videos:  13
