In [None]:
# Install required packages
# !pip install python-dotenv
# !pip install numpy
# !pip install pandas
# !pip install google-api-python-client
# !pip install mysql-connector-python
# !pip install sqlalchemy

In [1]:
# Import required packages
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd
from googleapiclient.discovery import build
import mysql.connector
from sqlalchemy import create_engine

In [2]:
# Load environment variables from .env file
load_dotenv()

# Get YouTube API key from .env 
youtube_api_key = os.getenv("youtube_api_key")
# Get MySQL username from .env
mysql_user = os.getenv("mysql_user")
# Get MySQL password from .env
mysql_password = os.getenv("mysql_password")

In [3]:
# Build the YouTube service object
youtube = build("youtube", "v3", developerKey=youtube_api_key)

In [4]:
# Extract data from a single YouTube channel
channel_name = "AlexTheAnalyst"

# Get channel data using the YouTube channels API
# Note: Uses 1 out of 10.000 units from the daily usage limit 
channel_data = youtube.channels().list(part="statistics,snippet,contentDetails", forHandle=channel_name).execute()  

In [5]:
# Extract relevant channel data and store as pandas DataFrame
channel_df = pd.DataFrame([{
    "channel_name": channel_data["items"][0]["snippet"]["title"],
    "channel_id": channel_data["items"][0]["id"],
    "view_count": int(channel_data["items"][0]["statistics"]["viewCount"]),
    "video_count": int(channel_data["items"][0]["statistics"]["videoCount"]),
    "subscriber_count": int(channel_data["items"][0]["statistics"]["subscriberCount"])
}])
channel_df

Unnamed: 0,channel_name,channel_id,view_count,video_count,subscriber_count
0,Alex The Analyst,UC7cs8q-gJRlGwj4A8OmCmXg,31918404,293,734000


In [29]:
# Extract uploads playlist ID containing all videos of the channel 
uploads_playlist_id = channel_data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
uploads_playlist_id

'UU7cs8q-gJRlGwj4A8OmCmXg'

In [11]:
# Extract video IDs from the playlist

# Initialize an empty list to store video IDs
video_ids = []
next_page_token = None

# Loop through each video in the uploads playlist
while True:
    # Get playlist data using the YouTube PlaylistItems API 
    # Note: Each loop uses 1 out of 10.000 units from the daily usage limit (1 unit for 50 videos)
    playlist_data = youtube.playlistItems().list(
        part="snippet", 
        playlistId=uploads_playlist_id, 
        maxResults=50,
        pageToken=next_page_token
    ).execute()

    # Extract video IDs from the playlist data
    video_ids += [video_data["snippet"]["resourceId"]["videoId"] for video_data in playlist_data["items"]]
    
    # Get the next page token
    next_page_token = playlist_data.get("nextPageToken")
    
    # Exit the loop if there are no more pages
    if next_page_token is None:
        break

In [42]:
# Get video data using the YouTube Videos API 
# Note: Uses 1 out of 10.000 units from the daily usage limit (1 unit per 50 videos)
video_data = youtube.videos().list(part="statistics,snippet,contentDetails", id=video_ids[0:3]).execute()    

In [43]:
# Initialize an empty list to store dictionaries for each video
videos_ls = []

# Loop through each video 
for video in video_data["items"]:
    # Extract relevant data 
    video_dict = {
        "video_id": video["id"],
        "channel_id": video["snippet"]["channelId"],
        "video_title": video["snippet"]["title"],
        "video_description": video["snippet"]["description"],
        "published_at": video["snippet"]["publishedAt"],
        "video_duration": video["contentDetails"]["duration"],
        "views": video["statistics"]["viewCount"],
        "likes": video["statistics"]["likeCount"],
        "comments": video["statistics"]["commentCount"],
        "thumbnail_url": video["snippet"]["thumbnails"]["maxres"]["url"]
    }
    # Append video data dictionary to the list
    videos_ls.append(video_dict)

# Convert list of dictionaries to pandas DataFrame
videos_df = pd.DataFrame(videos_ls)    
videos_df

Unnamed: 0,video_id,channel_id,video_title,video_description,published_at,video_duration,views,likes,comments,thumbnail_url
0,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,Stored Procedures in MySQL | Advanced MySQL Se...,Full MySQL Course: https://www.analystbuilder....,2024-03-26T12:00:12Z,PT12M37S,1873,90,6,https://i.ytimg.com/vi/7vnxpcqmqNQ/maxresdefau...
1,uEk07jXdKOo,UC7cs8q-gJRlGwj4A8OmCmXg,Temp Tables in MySQL | Advanced MySQL Series,Full MySQL Course: https://www.analystbuilder....,2024-03-19T12:00:56Z,PT7M46S,4487,156,8,https://i.ytimg.com/vi/uEk07jXdKOo/maxresdefau...
2,UC7uvOqcUTs,UC7cs8q-gJRlGwj4A8OmCmXg,CTEs in MySQL | Advanced MySQL Series,Full MySQL Course: https://www.analystbuilder....,2024-03-12T12:00:23Z,PT10M31S,7354,225,15,https://i.ytimg.com/vi/UC7uvOqcUTs/maxresdefau...


In [40]:
# Extract comments 

# Initialize an empty list to store comments
comments_ls = []

# Loop through each video
for video_id in video_ids:
    # Initialize next_page_token to None
    next_page_token = None

    # Loop through data batches of 100 comments 
    while True:
        # Get data from 100 comments using the YouTube CommentThreads API 
        # Note: Each loop uses 1 out of 10.000 units from the daily usage limit (1 unit for 100 comments)
        comments_data = youtube.commentThreads().list(
            part="snippet", 
            videoId=video_id, 
            maxResults=100,
            pageToken=next_page_token
        ).execute()

        # Loop through each comment
        for comment in comments_data["items"]:
            # Extract comment data in dictionary format
            comment_dict = {
                "comment_id": comment["snippet"]["topLevelComment"]["id"],
                "video_id": comment["snippet"]["topLevelComment"]["snippet"]["videoId"],
                "channel_id": comment["snippet"]["topLevelComment"]["snippet"]["channelId"],
                "comment_text": comment["snippet"]["topLevelComment"]["snippet"]["textOriginal"]
            }
            # Append comment data dictionary to the list
            comments_ls.append(comment_dict)

        # Get the next page token
        next_page_token = playlist_data.get("nextPageToken")

        # Exit the loop if there are no more pages
        if next_page_token is None: 
            break
        
# Convert list of dictionaries to pandas DataFrame
comments_df = pd.DataFrame(comments_ls)    
comments_df 

Unnamed: 0,comment_id,video_id,channel_id,comment_text
0,UgxWFbxtmtXNGcjbKyB4AaABAg,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,Work out the problems that God has assigned you
1,Ugy_W5U-LBIwrC0T_Bt4AaABAg,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,Does that require write permissions?
2,Ugw8qi8vhP3rxHiv0wx4AaABAg,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,excellent
3,UgxMQae8IV5kV8uTHCB4AaABAg,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,Dear Alex\r\nMy name is Molwedi Ramoeletsi Aug...
4,Ugwpbyi1L1r2gXzeatJ4AaABAg,7vnxpcqmqNQ,UC7cs8q-gJRlGwj4A8OmCmXg,Pls do video on creation of views
...,...,...,...,...
16017,UgztX5Zp0jjsOBtRqdp4AaABAg,6lQzbk6_OTw,UC7cs8q-gJRlGwj4A8OmCmXg,"Hey Alex, what do you think about COGNOS?"
16018,Ugx5i3bb5-5V8zNge_x4AaABAg,6lQzbk6_OTw,UC7cs8q-gJRlGwj4A8OmCmXg,"Hi Alex,\nfound your channel on Reddit and am ..."
16019,Ugz9os89TlxWUGtj0zR4AaABAg,6lQzbk6_OTw,UC7cs8q-gJRlGwj4A8OmCmXg,"Great video, Alex! I definitely agree that Exc..."
16020,UgxgUXmGkengAMwgAGt4AaABAg,6lQzbk6_OTw,UC7cs8q-gJRlGwj4A8OmCmXg,"Hey Alex, great video, just went through all o..."


In [19]:
# Load data into a MySQL database

# Connect to MySQL database
connection = mysql.connector.connect(
    host = "localhost",
    user = mysql_user,
    password = mysql_password,
    database = "youtube_analytics"
)

# Create a cursor object for executing SQL queries
cursor = connection.cursor()

# Save pandas DataFrame to MySQL table
try:
    # Create a SQLAlchemy engine for interacting with the MySQL database
    engine = create_engine(f"mysql+mysqlconnector://{mysql_user}:{mysql_password}@localhost/youtube_analytics")
    # Save the DataFrame to a MySQL table 
    channel_df.to_sql("channels", con=engine, if_exists="replace", index=False)
    # Print a success message
    print("DataFrame successfully saved to MySQL table.")
except Exception as e:
    # Print an error message if any exception occurs
    print("Error:", e)
finally:
    # Close the cursor and connection to free up resources
    cursor.close()
    connection.close()

DataFrame successfully saved to MySQL table.
