In [None]:
import pandas as pd
from googleapiclient.discovery import build
from dotenv import load_dotenv
import os
import time

load_dotenv()
# API key settings
API_KEY = os.getenv('youtube_api_key_3')
youtube = build('youtube', 'v3', developerKey=API_KEY)

df = pd.read_csv('youtubeChannelData.csv')

# Only top 10 channels for testing (Quota limit)
df_channels = df[:10]

# Datas that will be collected
columns = ['Channel Id', 'Channel Name', 'Category', 'publishedAt', 'title', 'description', 
          'channelTitle', 'categoryId', 'viewCount', 'likeCount', 'commentCount']
df_videos = pd.DataFrame(columns=columns)

def get_shorts_videos(playlist_id):
    videos = []
    next_page_token = None

    while True:
        request = youtube.playlistItems().list(
            part="contentDetails",
            playlistId=playlist_id,
            maxResults=50,
            pageToken=next_page_token
        )
        response = request.execute()

        # get video id from playlist
        for item in response.get('items', []):
            video_id = item['contentDetails']['videoId']
            videos.append(video_id)

        # check if there is next page
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    return videos

def get_videos_metadata(video_ids):
    all_video_data = []
    
    for i in range(0, len(video_ids), 50):  # API max limit is 50
        request = youtube.videos().list(
            part="snippet,contentDetails,statistics",
            id=','.join(video_ids[i:i+50]),
            fields="items(id,snippet(publishedAt,title,description,channelTitle,categoryId),statistics(viewCount,likeCount,commentCount))"
        )
        response = request.execute()
        
        # Filter out videos that are not from 2024
        for video in response.get('items', []):
            published_at = video['snippet']['publishedAt']
            if published_at[:4] != '2024':
                break  # Playlist is sorted by date, so if it's before 2024, we can stop here
            
            video_data = {
                'publishedAt': published_at,
                'title': video['snippet']['title'],
                'description': video['snippet']['description'],
                'channelTitle': video['snippet']['channelTitle'],
                'categoryId': video['snippet']['categoryId'],
                'viewCount': video['statistics'].get('viewCount', 0),
                'likeCount': video['statistics'].get('likeCount', 0),
                'commentCount': video['statistics'].get('commentCount', 0)
            }
            all_video_data.append(video_data)

    return pd.DataFrame(all_video_data)

for _, channel in df_channels.iterrows():
    try:
        # getting Shorts Playlist ID
        channel_id = channel['Channel ID']
        playlist_id = 'UUSH' + channel_id[2:]  # replace first 2 characters with 'UUSH'

        # get shorts videos from playlist
        video_ids = get_shorts_videos(playlist_id)
        if not video_ids:
            continue

        # get metadata of videos
        videos_metadata = get_videos_metadata(video_ids)
        
        # add channel info
        videos_metadata['Channel Id'] = channel['Channel ID']
        videos_metadata['Channel Name'] = channel['Channel Name']
        videos_metadata['Category'] = channel['Category']

        # add to dataframe
        df_videos = pd.concat([df_videos, videos_metadata], ignore_index=True)

    except Exception as e:
        print(f"Error processing channel {channel['Channel ID']}: {e}") # Quota Error or No shorts videos
        continue

# save to csv
df_videos.to_csv('youtube_videos_2024.csv', index=False, encoding='utf-8-sig')


Error processing channel UCTheFErn4MureanSiqgKIag: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=contentDetails&playlistId=UUSHTheFErn4MureanSiqgKIag&maxResults=50&key=AIzaSyCips5I73F9bykzcN6nhek4xcmzS6HhUfU&alt=json returned "The playlist identified with the request's <code>playlistId</code> parameter cannot be found.". Details: "[{'message': "The playlist identified with the request's <code>playlistId</code> parameter cannot be found.", 'domain': 'youtube.playlistItem', 'reason': 'playlistNotFound', 'location': 'playlistId', 'locationType': 'parameter'}]">
Error processing channel UCU2zNeYhf9pi_wSqFbYE96w: <HttpError 404 when requesting https://youtube.googleapis.com/youtube/v3/playlistItems?part=contentDetails&playlistId=UUSHU2zNeYhf9pi_wSqFbYE96w&maxResults=50&key=AIzaSyCips5I73F9bykzcN6nhek4xcmzS6HhUfU&alt=json returned "The playlist identified with the request's <code>playlistId</code> parameter cannot be found.". Details: "[{'message

In [None]:
df_channels = df[20:30]

df_videos = pd.DataFrame(columns=columns)

for _, channel in df_channels.iterrows():
    try:
        # getting Shorts Playlist ID
        channel_id = channel['Channel ID']
        playlist_id = 'UUSH' + channel_id[2:]  # replace first 2 characters with 'UUSH'

        # get shorts videos from playlist
        video_ids = get_shorts_videos(playlist_id)
        if not video_ids:
            continue

        # get metadata of videos
        videos_metadata = get_videos_metadata(video_ids)
        
        # add channel info
        videos_metadata['Channel Id'] = channel['Channel ID']
        videos_metadata['Channel Name'] = channel['Channel Name']
        videos_metadata['Category'] = channel['Category']

        # add to dataframe
        df_videos = pd.concat([df_videos, videos_metadata], ignore_index=True)

    except Exception as e:
        print(f"Error processing channel {channel['Channel ID']}: {e}")
        continue

# save to csv by adding to existing csv
existing_csv_path = 'youtube_videos_2024.csv'
df_videos.to_csv(existing_csv_path, mode='a', header=False, index=False, encoding='utf-8-sig')
