In [1]:
from googleapiclient.discovery import build
import time
import re
import csv

In [2]:
# YouTube API setup
DEVELOPER_KEY = "AIzaSyAU-OrnitoqphYfTEJSCLWcZXlesS7bqqo"
YOUTUBE_API_SERVICE_NAME = 'youtube'
YOUTUBE_API_VERSION = 'v3'

In [3]:
COUNTRY_CODES = {
    'US': 'United States', 'GB': 'United Kingdom', 'CA': 'Canada', 
    'DE': 'Germany', 'FR': 'France', 'IN': 'India', 'JP': 'Japan', 
    'KR': 'South Korea', 'BR': 'Brazil', 'AU': 'Australia',
    'RU': 'Russia', 'MX': 'Mexico', 'ES': 'Spain', 'IT': 'Italy',
    'ID': 'Indonesia'
}

In [4]:
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)

In [5]:
def get_video_categories(region_code='US'):
    request = youtube.videoCategories().list(
        part='snippet',
        regionCode=region_code
    )
    response = request.execute()

    categories = {}
    for item in response['items']:
        categories[item['id']] = item['snippet']['title']
    
    return categories

In [6]:
def parse_duration(duration):
    duration = duration[2:]

    seconds = 0
    
    hours_match = re.search(r'(\d+)H', duration)
    if hours_match:
        seconds += int(hours_match.group(1)) * 3600
    
    minutes_match = re.search(r'(\d+)M', duration)
    if minutes_match:
        seconds += int(minutes_match.group(1)) * 60
    
    seconds_match = re.search(r'(\d+)S', duration)
    if seconds_match:
        seconds += int(seconds_match.group(1))
    
    return seconds

In [7]:
def get_trending_videos(region_code='US', max_results=50):
    
    categories = get_video_categories('US')
    RANK = 1

    next_page_token = None
    total_videos = 999999

    videos_data = []

    while len(videos_data) < total_videos:
        
        request = youtube.videos().list(
            part='snippet,statistics,contentDetails',
            chart='mostPopular',
            regionCode=region_code,
            maxResults=max_results,
            pageToken= next_page_token
        )
        response = request.execute()
        # return response
        
        total_videos = response['pageInfo']['totalResults']
    
        for item in response['items']:
            # Join tags into a comma-separated string
            tags = ', '.join(item['snippet'].get('tags', []))
            
            video_data = {
                "rank": RANK,
                'region' : COUNTRY_CODES[region_code],
                "title": item['snippet']['title'],
                'language': item['snippet'].get('defaultAudioLanguage', 'unknown'),
                'live_broadcast_content': item['snippet']['liveBroadcastContent'],
                "category":categories[item['snippet']['categoryId']],
                "channel_title": item['snippet']['channelTitle'],
                "description": item['snippet']['description'],
                "thumbnail": item['snippet']['thumbnails']['medium']['url'],
                "publish_at": item['snippet']['publishedAt'],
                "tags": tags,  # Now a string instead of an array
                'duration_second': parse_duration(item['contentDetails']['duration']),
                "likes": item['statistics'].get('likeCount', 0),
                "views": item['statistics']['viewCount'],
                "comments": item['statistics'].get('commentCount', 0),
                'region_code': region_code,
                "video_id": item['id'],
                "category_id": item['snippet']['categoryId'],
                "channel_id": item['snippet']['channelId'],
            }
            videos_data.append(video_data)

            RANK += 1

        time.sleep(1)
        
        if(len(videos_data) != total_videos):
            next_page_token = response['nextPageToken']

    return videos_data

In [8]:
def save_to_csv(data, filename='trending_videos.csv'):
    keys = data[0].keys()

    with open(filename, 'w', encoding='utf8', newline='') as output_file:
        fc = csv.DictWriter(output_file, fieldnames=keys)
        fc.writeheader()
        fc.writerows(data)

In [12]:
countries = COUNTRY_CODES.keys()

data = []
for country in countries:
    data.append += get_trending_videos(country)
    print(f'{country} fetched')

US fetched
GB fetched
CA fetched
DE fetched
FR fetched
IN fetched
JP fetched
KR fetched
BR fetched
AU fetched
RU fetched
MX fetched
ES fetched
IT fetched
ID fetched


In [None]:
save_to_csv(data, './../trending_videos_global.csv')