# Setup

## Import Library

In [1]:
from googleapiclient.discovery import build
import pandas as pd
import os

## Create youtube api object

In [2]:
API_KEY = os.environ.get('YOUTUBE_API_KEY')

# Create a YouTube resource object
youtube = build('youtube', 'v3', developerKey=API_KEY)

## Create target list of channels

In [3]:
CHANNEL_LIST1 = ['UClWUQqWTL6xSK2Bx1bRlKPw', 'UCrYmtJBtLdtm2ov84ulV-yg', 'UCl2oCaw8hdR_kbqyqd2klIA', 'UCqK_GSMbpiV8spgD3ZGloSw',
                'UCbLhGKVY-bJPcawebgtNfbw', 'UCQQ_fGcMDxlKre3SEqEWrLA', 'UCMtJYS0PrtiUwlk6zjGDEMA', 'UCi7RBPfTtRkVchV6qO8PUzg',
                'UCc4Rz_T9Sb1w5rqqo9pL1Og', 'UCI7M65p3A-D3P4v5qW8POxQ']

In [4]:
CHANNEL_LIST2 = ['UClWUQqWTL6xSK2Bx1bRlKPw', 'UCN9Nj4tjXbVTLYWN0EKly_Q', 'UCviqt5aaucA1jP3qFmorZLQ', 'UCo_xHWmMxdodlIufRlZzRPw',
                'UCla2jS8BrfLJj7kbKyy5_ew', 'UCQglaVhGOBI0BR5S6IJnQPg', 'UCJWCJCWOxBYSi5DhCieLOLQ', 'UCiUnrCUGCJTCC7KjuW493Ww',
                'UCRqBu-grVX1p97WaX4d-OuQ', 'UCKQvGU-qtjEthINeViNbn6A']

## Get Channel Metadata

In [8]:
num_list = 1
var_name = f"CHANNEL_LIST{num_list}"

In [18]:
channel_data = []

for CHANNEL_ID in globals()[var_name]:
    # Make the API request
    request = youtube.channels().list(
        part='snippet,contentDetails,statistics,brandingSettings',
        id=CHANNEL_ID
    )
    response = request.execute()

    # Extract and print channel metadata
    try:
        channel = response['items'][0]
        snippet = channel['snippet']
        statistics = channel['statistics']

        channel_title = snippet.get('title')
        channel_published_date = snippet.get('publishedAt')
        channel_country = snippet.get('country', 'N/A')
        channel_view_count = statistics.get('viewCount')
        channel_subscriber_count = statistics.get('subscriberCount')
        channel_video_count = statistics.get('videoCount')

        channel_data.append({
            "Title" : channel_title,
            "Published At": channel_published_date,
            "Country": channel_country,
            "View Count": channel_view_count,
            "Subscriber Count": channel_subscriber_count,
            "Video Count": channel_video_count
        })
    except:
        print(f"{CHANNEL_ID} Channel not found.")

channel_df = pd.DataFrame(channel_data)

channel_df.to_csv(f"channel_data{num_list}.csv")


## Get TOP N Channel videos

In [9]:
# Number of top videos to retrieve
TOP_N = 300

# Step 1: Retrieve videos from the channel ordered by view count
videos_data = []
for CHANNEL_ID in globals()[var_name]:
    video_ids = []
    next_page_token = None
    while True:
        search_response = youtube.search().list(
            part='id',
            channelId=CHANNEL_ID,
            maxResults=min(TOP_N,50),
            order='viewCount',
            type='video',
            pageToken=next_page_token
        ).execute()

        video_ids.extend([item['id']['videoId'] for item in search_response['items']])

        next_page_token = search_response.get('nextPageToken')
        if not next_page_token or len(video_ids) >= TOP_N:
            break

    # Step 2: Retrieve video statistics for each video ID

    for i in range(0, len(video_ids), 50):
        batch_ids = video_ids[i:i+50]
        videos_response = youtube.videos().list(
            part='snippet,statistics',
            id=','.join(batch_ids)
        ).execute()

        for item in videos_response['items']:
            video_title = item['snippet']['title']
            video_id = item['id']
            videos_data.append({
                'title': video_title,
                'video_id' : video_id
            })

# Step 3: Convert the list of dictionaries to a DataFrame
videos_df = pd.DataFrame(videos_data)

videos_df.to_csv(f'video_data{num_list}.csv')


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/videos?part=snippet%2Cstatistics&id=w_R3lxNcr7U%2CcZXHuitFsoo%2CJs9uNHHDAfw%2CFg5MdhaWLww%2C0KsSgtou_2I%2CCCAch4NbF1M%2CCF-WugfTv2s%2CejRC_1TgJFU%2CZ1YNLxxBad4%2CAtz5XzhlEdc%2C3AZg58VDYig%2CO5v1Us0FRKM%2CJh_wPk1T7LY%2CoDPy_tIo_wE%2CWZXq3TUqxLs%2C2M3in0FYZFU%2CDZkYkAC-BVQ%2CK6rRHYIXvE8%2Ctx-JKTQ-V9o%2CDnuULWQdV5Q%2CjkxNIZS7CR0%2CcSprmxg-cks%2CqfWFjS5PjiI%2CU5F3_Ede_zA%2CxEszucWyx0w%2Cio7xJgmBU_A%2CyEaX9WH9drM%2CfpneeOVpuhE%2CalMJXIiTXb8%2CL7cI2NpBG5I%2C1rRX1wI3kDY%2C7Z4yI3jxBd4%2CFvGgnsijpRo%2CnsoYLMVmz0g%2CCUKwHhxINf0%2CihhMNJRbEec%2CJ1RC3SUastI%2CKQjB8NXa1c4%2Co2A6EkuwJ_M%2CcrX7VrftRhY%2Cwslypyl8doQ%2Cvboml1MJ2rA%2CLPI6NYq0FGM%2CnDVfSvV1YaQ%2Ccn-AKCC9Ecw%2CBHH-QEKuAlQ%2CdSbObQ2O0EU%2CsYKGFYDolK8%2CrRSmWwZpwtU%2CRLTw8_qOshQ&key=AIzaSyDrPxD0B7IxMpFpPYVIcbjkqyALDCoO6y8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">

In [7]:
videos_df.shape

(1099, 2)

## Get Video Metadata

In [13]:
video_id = 'NVwUMyYuLtw'

# Make the API request
request = youtube.videos().list(
    part='snippet,contentDetails,statistics',
    id=video_id
)
response = request.execute()

# Extract and print video metadata
if response['items']:
    video = response['items'][0]
    snippet = video['snippet']
    statistics = video['statistics']
    content_details = video['contentDetails']

    print(f"Title: {snippet.get('title')}")
    # print(f"Description: {snippet.get('description')}")
    print(f"Published At: {snippet.get('publishedAt')}")
    print(f"Channel Title: {snippet.get('channelTitle')}")
    print(f"Tags: {snippet.get('tags', [])}")
    print(f"Duration: {content_details.get('duration')}")
    print(f"View Count: {statistics.get('viewCount')}")
    print(f"Like Count: {statistics.get('likeCount')}")
    print(f"Comment Count: {statistics.get('commentCount')}")
else:
    print("Video not found.")


HttpError: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/videos?part=snippet%2CcontentDetails%2Cstatistics&id=NVwUMyYuLtw&key=AIzaSyDrPxD0B7IxMpFpPYVIcbjkqyALDCoO6y8&alt=json returned "The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.". Details: "[{'message': 'The request cannot be completed because you have exceeded your <a href="/youtube/v3/getting-started#quota">quota</a>.', 'domain': 'youtube.quota', 'reason': 'quotaExceeded'}]">