# Grabbing YouTube comments

In [1]:
import os
import googleapiclient.discovery
from dotenv import load_dotenv
load_dotenv()

YOUTUBE_API_KEY = os.getenv("YOUTUBE_API_KEY")

if not YOUTUBE_API_KEY:
    raise ValueError("YOUTUBE_API_KEY not loaded. Please check your .env file.")

YOUTUBE_API_KEY successfully loaded


In [3]:
api_service_name = "youtube"
api_version = "v3"

youtube = googleapiclient.discovery.build(
    api_service_name, api_version, developerKey = YOUTUBE_API_KEY)

request = youtube.commentThreads().list(
    part="snippet,replies",
    videoId="-2k1rcRzsLA"
)
response = request.execute()

The response object returns a dictionary with some very comprehensive information

In [4]:
response

{'kind': 'youtube#commentThreadListResponse',
 'etag': 'siH11pIYr1xfaDCRYzlWMPb92Rc',
 'nextPageToken': 'Z2V0X25ld2VzdF9maXJzdC0tQ2dnSWdBUVZGN2ZST0JJRkNKMGdHQUVTQlFpSklCZ0FFZ1VJaUNBWUFCSUZDS2dnR0FBU0JRaUhJQmdBSWcwS0N3aW51dGU4QmhDb2k4ZGU=',
 'pageInfo': {'totalResults': 20, 'resultsPerPage': 20},
 'items': [{'kind': 'youtube#commentThread',
   'etag': 'jACuBxSzZRuMzPGKqnjLeBlAR_M',
   'id': 'UgzJNA5OXuCx1euq6n94AaABAg',
   'snippet': {'channelId': 'UCsBjURrPoezykLs9EqgamOA',
    'videoId': '-2k1rcRzsLA',
    'topLevelComment': {'kind': 'youtube#comment',
     'etag': 'gzHU2XVklFyYYGsUkIs6fG0kUVU',
     'id': 'UgzJNA5OXuCx1euq6n94AaABAg',
     'snippet': {'channelId': 'UCsBjURrPoezykLs9EqgamOA',
      'videoId': '-2k1rcRzsLA',
      'textDisplay': 'This time even Hainan Island is missing. Why can&#39;t you build a competent AI? It’s because you fail to get even the most basic information right—once again, an erroneous map is presented upfront.',
      'textOriginal': "This time even Hain

To get the comments, let's find the exact key which contains the information we are looking for.

In [5]:
response['items'][0]['snippet']['topLevelComment']['snippet']['textDisplay']

'This time even Hainan Island is missing. Why can&#39;t you build a competent AI? It’s because you fail to get even the most basic information right—once again, an erroneous map is presented upfront.'

Lets now store all the comments into a list

In [6]:
comments = []
for item in response['items']:
    comments.append(item['snippet']['topLevelComment']['snippet']['textDisplay'])

Another shorter way of doing the same thing as above using list comprehension

In [7]:
comments = [item['snippet']['topLevelComment']['snippet']['textDisplay'] for item in response['items']]

In [8]:
comments

['This time even Hainan Island is missing. Why can&#39;t you build a competent AI? It’s because you fail to get even the most basic information right—once again, an erroneous map is presented upfront.',
 'Is this better for bouncing ideas off of about things in law/sociology/anthropology topics than something like Claude?',
 'lol',
 'i watch either this channel or linus tech tips after postnut clarity hits me ... then i fall sleep.. every night',
 'The age of AGI is here ❤🚀🚀❤️',
 '科學沒有政治、性別、階級之分！',
 'Idk kinda the same if not worse.',
 '<a href="https://www.youtube.com/watch?v=-2k1rcRzsLA&amp;t=2">0:02</a> where is hainan island?',
 'Nice.. that it is open source.... other wise these us startuos will use ai to control our mind ... and will kill whistle blowers',
 'And this is <i>actually</i> open source unlike &quot;Open&quot;AI. Great to see!',
 '美国的朋友们你们搜索不会遇到“对不起，我无法回答这个问题”吗？',
 'Saying AI is human smart, is like saying submarines can swim. A two year old has more intelligence than 

In [9]:
len(comments)

20

Before storing everything into a database, I want to grab some more information regarding the comments.

In [10]:
authors = [item['snippet']['topLevelComment']['snippet']['authorDisplayName'] for item in response['items']]
rating = [item['snippet']['topLevelComment']['snippet']['viewerRating'] if item['snippet']['topLevelComment']['snippet']['viewerRating'] != 'none' else 0 for item in response['items']]
likes = [item['snippet']['topLevelComment']['snippet']['likeCount'] for item in response['items']]
published = [item['snippet']['topLevelComment']['snippet']['publishedAt'] for item in response['items']]

We need to structure the data so that it is in the format expected by our database when inserting to tables

In [11]:
data = []
for i in range(0, len(comments)):
    data.append((authors[i], comments[i], likes[i], rating[i], published[i]))

In [12]:
data

[('@正文Eureka',
  'This time even Hainan Island is missing. Why can&#39;t you build a competent AI? It’s because you fail to get even the most basic information right—once again, an erroneous map is presented upfront.',
  0,
  0,
  '2025-01-26T10:31:21Z'),
 ('@dylanhuntly3517',
  'Is this better for bouncing ideas off of about things in law/sociology/anthropology topics than something like Claude?',
  0,
  0,
  '2025-01-26T09:59:30Z'),
 ('@贝娜丽莎', 'lol', 0, 0, '2025-01-26T09:46:20Z'),
 ('@ipchwn',
  'i watch either this channel or linus tech tips after postnut clarity hits me ... then i fall sleep.. every night',
  0,
  0,
  '2025-01-26T09:43:29Z'),
 ('@NextFuckingLevel',
  'The age of AGI is here ❤🚀🚀❤️',
  0,
  0,
  '2025-01-26T09:33:14Z'),
 ('@Alex-ji3me', '科學沒有政治、性別、階級之分！', 0, 0, '2025-01-26T09:16:32Z'),
 ('@ghostafridi8255',
  'Idk kinda the same if not worse.',
  0,
  0,
  '2025-01-26T08:56:11Z'),
 ('@ryana5435',
  '<a href="https://www.youtube.com/watch?v=-2k1rcRzsLA&amp;t=2">0:02<

# Storing data into a MySQL database

In [13]:
MYSQL_USERNAME = os.getenv("MYSQL_USERNAME")
MYSQL_PASSWORD = os.getenv("MYSQL_PASSWORD")

if not MYSQL_USERNAME:
    raise ValueError("MYSQL_USERNAME not loaded. Please check your .env file.")
if not MYSQL_PASSWORD:
    raise ValueError("MYSQL_PASSWORD not loaded. Please check your .env file.")

MYSQL_USERNAME successfully loaded
MYSQL_PASSWORD successfully loaded


In [14]:
import mysql.connector

# Database connection
connection = mysql.connector.connect(
    host="localhost",
    user=MYSQL_USERNAME,
    password=MYSQL_PASSWORD,
    database="youtube_api_project"
)

connection.is_connected()

True

In [15]:
cursor = connection.cursor()

# Create a table (if not exists)
create_table_query = """
CREATE TABLE IF NOT EXISTS comments_table (
    id INT AUTO_INCREMENT PRIMARY KEY,
    authorName VARCHAR(255),
    comment MEDIUMTEXT,
    likes INT,
    rating INT,
    publishedTime VARCHAR(255)
);
"""
cursor.execute(create_table_query)

In [16]:
# Insert data into the table
insert_query = "INSERT INTO comments_table (authorName, comment, likes, rating, publishedTime) VALUES (%s, %s, %s, %s, %s)"

cursor.executemany(insert_query, data)

# Commit changes to the database
connection.commit()

print(f"{cursor.rowcount} rows were inserted.")

# Close the cursor and connection
cursor.close()
connection.close()

20 rows were inserted.
