### **Extracting YouTube Comments**

In [2]:
import pandas as pd
import json
import requests
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError

In [3]:
file = open("api.json")
key_dict = json.load(file)
api_key = key_dict["YouTube_api_key"]

In [6]:
def generateYouTubeVideoList(query):
  # Set the maximum number of results to retrieve
  max_results = 25
  # Create the API request URL
  url = f'https://www.googleapis.com/youtube/v3/search?part=id&q={query}&maxResults={max_results}&key={api_key}'
  # Send the API request
  response = requests.get(url)
  # Parse the JSON response
  data = response.json()
  # Extract the video IDs from the response
  video_ids = []
  for item in data['items']:
      if item['id']['kind'] == 'youtube#video':
          video_ids.append(item['id']['videoId'])
  # Print the video IDs
  return video_ids

In [7]:
def getComments(video_id,api_key):
  # Create a YouTube API client
  youtube = build('youtube', 'v3', developerKey=api_key)
  # Call the API to retrieve comments
  try:
      response = youtube.commentThreads().list(
          part='snippet',
          videoId=video_id,
          maxResults=1000  # Adjust the number of results 
      ).execute()

      # Extract the comments from the response
      comments = []
      for item in response['items']:
          comment = item['snippet']['topLevelComment']['snippet']['textDisplay']
          comments.append(comment)

      # Print the comments
      youtubeLink = 'https://www.youtube.com/watch?v' + video_id
      return comments

  except HttpError as e:
      print(f'An HTTP error occurred: {e}')
      comments = "Comments disabled"
      return comments

In [9]:
def generateData(query):
  video_id = generateYouTubeVideoList(query)
  data = {'YoutubeLink':[],'Comment':[]}
  for each in video_id:
    comment = getComments(each,api_key)
    video =  'https://www.youtube.com/watch?v=' + each
    for every in comment:
      data['Comment'].append(every)
      data['YoutubeLink'].append(video)

  return data

In [10]:
query = ["Modi visit to usa","Impact of Modi's visit to usa","Highlights of PM Modi's usa visit","India usa relationship after Modi's visit","Reaction on pm Modi's visit to usa"] #Enter the list of queries
dataDict = []
newDf = pd.DataFrame(columns=['YoutubeLink', 'Comment']) #It will contain the final comments and the youtubeLink
for each in query:
  data = generateData(each)
  #print(data)
  df = pd.DataFrame(data)
  newDf = pd.concat([newDf, df], ignore_index=True)

In [11]:

newDf

Unnamed: 0,YoutubeLink,Comment
0,https://www.youtube.com/watch?v=3Qt-OgNvYps,China can learn a thing or 2 from India in the...
1,https://www.youtube.com/watch?v=3Qt-OgNvYps,🔱🔥🔱🌍⏳🌍✌️🇮🇳🌅🤗🛣️👍
2,https://www.youtube.com/watch?v=3Qt-OgNvYps,"As an bharatiya and gujarati, proud moment ❤"
3,https://www.youtube.com/watch?v=3Qt-OgNvYps,Nice video
4,https://www.youtube.com/watch?v=3Qt-OgNvYps,Garba in white house
...,...,...
2295,https://www.youtube.com/watch?v=54LHQoa85AE,"Zoot bolnaa band Karo, pakistanio"
2296,https://www.youtube.com/watch?v=54LHQoa85AE,Shame .. any terrorist activities from India ?...
2297,https://www.youtube.com/watch?v=54LHQoa85AE,No single difference between a educated and no...
2298,https://www.youtube.com/watch?v=54LHQoa85AE,Indian pakistan ke Bare me Koi Sochte hi nhi 😂


In [12]:
newDf.to_csv("Raw_Youtube_Search_Results.csv",index=False)