In [None]:
# Install required packages
# !pip install python-dotenv
# !pip install numpy
# !pip install pandas
# !pip install google-api-python-client
# !pip install mysql-connector-python
# !pip install sqlalchemy

In [1]:
# Import required packages
from dotenv import load_dotenv
import os
import numpy as np
import pandas as pd
from googleapiclient.discovery import build
import mysql.connector
from sqlalchemy import create_engine

In [2]:
# Load environment variables from .env file
load_dotenv()

# Get YouTube API key from .env 
youtube_api_key = os.getenv("youtube_api_key")
# Get MySQL username from .env
mysql_user = os.getenv("mysql_user")
# Get MySQL password from .env
mysql_password = os.getenv("mysql_password")

In [3]:
# Build the YouTube service object
youtube = build("youtube", "v3", developerKey=youtube_api_key)

In [4]:
# Extract data from a single YouTube channel
channel_name = "AlexTheAnalyst"

# Make YouTube API request (uses 1 out of 10.000 units from the daily usage limit)
channel_data = youtube.channels().list(part="statistics,snippet,contentDetails", forHandle=channel_name).execute()  

In [5]:
# Extract relevant channel data and store as pandas DataFrame
channel_df = pd.DataFrame([{
    "channel_name": channel_data["items"][0]["snippet"]["title"],
    "channel_id": channel_data["items"][0]["id"],
    "view_count": int(channel_data["items"][0]["statistics"]["viewCount"]),
    "video_count": int(channel_data["items"][0]["statistics"]["videoCount"]),
    "subscriber_count": int(channel_data["items"][0]["statistics"]["subscriberCount"])
}])
channel_df

Unnamed: 0,channel_name,channel_id,view_count,video_count,subscriber_count
0,Alex The Analyst,UC7cs8q-gJRlGwj4A8OmCmXg,31918404,293,734000


In [6]:
# Extract uploads playlist ID containing all videos of the channel 
uploads_playlist_id = channel_data["items"][0]["contentDetails"]["relatedPlaylists"]["uploads"]
uploads_playlist_id

'UU7cs8q-gJRlGwj4A8OmCmXg'

In [11]:
# Extract video IDs from the playlist
video_ids = []
next_page_token = None

while True:
    # Get uploads playlist data using the YouTube PlaylistItems API 
    # Note: Each iteration of the while loop uses 1 out of 10.000 units from the daily usage limit (10 units for 500 videos)
    playlist_data = youtube.playlistItems().list(
        part="snippet", 
        playlistId=uploads_playlist_id, 
        maxResults=50,
        pageToken=next_page_token
    ).execute()

    # Extract video IDs from the playlist data
    video_ids += [video_data["snippet"]["resourceId"]["videoId"] for video_data in playlist_data["items"]]
    
    # Get the next page token, if available, and repeat the loop
    next_page_token = playlist_data.get("nextPageToken")
    if next_page_token is None:
        break

In [37]:
# Get video data using the YouTube Videos API (uses 1 out of 10.000 units from the daily usage limit)
video_data = youtube.videos().list(part="statistics,snippet,contentDetails", id=video_ids).execute()    

In [49]:
video_data

{'kind': 'youtube#videoListResponse',
 'etag': 'OSEk55St8DrZ07y0RRA3OXjqKt0',
 'items': [{'kind': 'youtube#video',
   'etag': 'P1JN1V292R_pE8Qxm6HqcSrRA_o',
   'id': '7vnxpcqmqNQ',
   'snippet': {'publishedAt': '2024-03-26T12:00:12Z',
    'channelId': 'UC7cs8q-gJRlGwj4A8OmCmXg',
    'title': 'Stored Procedures in MySQL | Advanced MySQL Series',
    'description': "Full MySQL Course: https://www.analystbuilder.com/courses/mysql-for-data-analytics\n\nIn this lesson we are going to take a look at Stored Procedures in MySQL!\n\nGitHub Code: https://github.com/AlexTheAnalyst/MySQL-YouTube-Series/blob/main/Advanced%20-%20Stored%20Procedures.sql\n____________________________________________ \n\nSUBSCRIBE!\nDo you want to become a Data Analyst? That's what this channel is all about! My goal is to help you learn everything you need in order to start your career or even switch your career into Data Analytics. Be sure to subscribe to not miss out on any content!\n_________________________________

In [19]:
# Load data into a MySQL database

# Connect to MySQL database
connection = mysql.connector.connect(
    host = "localhost",
    user = mysql_user,
    password = mysql_password,
    database = "youtube_analytics"
)

# Create a cursor object for executing SQL queries
cursor = connection.cursor()

# Save pandas DataFrame to MySQL table
try:
    # Create a SQLAlchemy engine for interacting with the MySQL database
    engine = create_engine(f"mysql+mysqlconnector://{mysql_user}:{mysql_password}@localhost/youtube_analytics")
    # Save the DataFrame to a MySQL table 
    channel_df.to_sql("channels", con=engine, if_exists="replace", index=False)
    # Print a success message
    print("DataFrame successfully saved to MySQL table.")
except Exception as e:
    # Print an error message if any exception occurs
    print("Error:", e)
finally:
    # Close the cursor and connection to free up resources
    cursor.close()
    connection.close()

DataFrame successfully saved to MySQL table.
