In [16]:
# This script fetches and writes YouTube trending video data into separate CSV files for each specified country.
# It ensures that data is only fetched once per day per country.

from googleapiclient.discovery import build
import csv
import os
from datetime import datetime

# Initialize the YouTube API client with valid API key
api_key = 'AIzaSyCK8-7XuIAxYspa_wP_Ho8oE6f3LjKU4r0'
youtube = build('youtube', 'v3', developerKey=api_key)

# List of country codes for which to fetch YouTube trending data
countries = ["CA", "FR", "GB", "IN", "JP", "MX", "RU", "US"]

# Create a directory named 'youtube_trending_data' to store CSV files
data_directory = 'input'
os.makedirs(data_directory, exist_ok=True)

# Define the headers for the CSV files
column_headers = ["video_id", "title", "publishedAt", "channelId", "channelTitle",
                  "categoryId", "trending_date", "tags", "view_count", "likes",
                  "comment_count", "thumbnail_link", "comments_disabled",
                  "ratings_disabled", "description"]

# Loop through each country code to fetch and save the trending video data
for country_code in countries:
    # Construct the file path for the CSV file for each country
    file_path = os.path.join(data_directory, f'{country_code}_youtube_trending_data.csv')
    
    # Flag to determine whether new data needs to be fetched
    fetch_data = True
    today_date = datetime.now().strftime("%Y-%m-%d")

    # Check if the file already exists and if the last entry is from today
    if os.path.isfile(file_path):
        with open(file_path, 'r', newline='', encoding='utf-8') as file:
            csv_reader = csv.reader(file)
            last_record = None
            # Iterate through the file to find the last non-empty row
            for record in csv_reader:
                if record:
                    last_record = record

            # Check if the last record's date matches today's date
            if last_record and last_record[6].split('T')[0] == today_date:
                fetch_data = False  # Skip fetching data if already done today

    # Fetch and write data to the CSV file if not done already for today
    if fetch_data:
        with open(file_path, 'a', newline='', encoding='utf-8') as file:
            csv_writer = csv.writer(file)

            # Write the column headers if the file is new
            if not os.path.getsize(file_path):
                csv_writer.writerow(column_headers)

            # Setup for YouTube API request pagination
            max_results_per_request = 50  # Maximum results YouTube API allows per request
            total_videos_to_fetch = 200   # Total number of videos to fetch
            processed_videos = 0         # Counter for the number of videos processed
            page_token = None            # Token for handling pagination

            # Continue fetching data until the desired number of videos is reached
            while processed_videos < total_videos_to_fetch:
                # Make a request to the YouTube API for trending videos
                video_request = youtube.videos().list(
                    part="snippet,contentDetails,statistics",
                    chart='mostPopular',
                    regionCode=country_code,
                    maxResults=max_results_per_request,
                    pageToken=page_token
                )
                video_response = video_request.execute()

                # Process each video in the response
                for video in video_response.get('items', []):
                    snippet = video['snippet']
                    statistics = video['statistics']

                    # Extract and organize the video details
                    video_details = [
                        video['id'],
                        snippet['title'],
                        snippet['publishedAt'],
                        snippet['channelId'],
                        snippet['channelTitle'],
                        snippet['categoryId'],
                        today_date,  # Use today's date as the trending date
                        ','.join(snippet.get('tags', [])),
                        statistics.get('viewCount', 0),
                        statistics.get('likeCount', 0),
                        statistics.get('commentCount', 0),
                        snippet['thumbnails']['default']['url'],
                        snippet.get('comments_disabled', False),
                        snippet.get('ratings_disabled', False),
                        snippet['description']
                    ]

                    # Write the video details to the CSV file
                    csv_writer.writerow(video_details)

                    # Increment the processed videos counter
                    processed_videos += 1
                    if processed_videos >= total_videos_to_fetch:
                        break

                # Update the page token for the next batch of videos
                page_token = video_response.get('nextPageToken')
                if not page_token:
                    break


YouTube video categories have been saved to 'US_category_id.json'
