In [None]:
from googleapiclient.discovery import build
import pandas as pd
from datetime import datetime, timedelta
import isodate
import schedule
import time

#Total day to run
day_to_run = 30

# Replace 'Your API Key' with your key and ensure it is kept secure
api_key = 'Your_API_Key' #(Substitute 'Your_API_Key' with your own key)

def daily_job():
    youtube = build('youtube', 'v3', developerKey=api_key)

    def get_category_mapping(region_code):
        category_response = youtube.videoCategories().list(
            part='snippet',
            regionCode=region_code
        ).execute()

        category_mapping = {}
        for category in category_response['items']:
            category_id = category['id']
            category_name = category['snippet']['title']
            category_mapping[category_id] = category_name

        return category_mapping

    def get_videos(next_page_token=None):
        return youtube.videos().list(
            part="snippet,contentDetails,statistics",
            chart="mostPopular",
            regionCode="HK",
            maxResults=50,
            pageToken=next_page_token
        ).execute()

    # Get the mapping of category IDs to names
    category_mapping = get_category_mapping('US')

    # Get the current date as the trending date
    trending_date = datetime.now().date()

    video_data = []

    # Pagination loop
    next_page_token = None
    while True:
        response = get_videos(next_page_token)
        for item in response['items']:
            # Parse the duration into a timedelta object
            duration_timedelta = isodate.parse_duration(item['contentDetails']['duration'])

            # Format the duration into HH:MM:SS
            duration_formatted = str(duration_timedelta)
            if duration_timedelta < timedelta(hours=1):
                duration_formatted = duration_formatted[2:]

            # Check if likes are enabled
            likes_enabled = 'likeCount' in item['statistics']
            
            # Check if dislikes are enabled
            dislikes_enabled = 'dislikeCount' in item['statistics']

            # Get the default thumbnail link
            thumbnail_link = item['snippet']['thumbnails']['default']['url']

            # Get the tags, if they exist
            tags = item['snippet'].get('tags', [])

            # Collect video details
            category_id = item['snippet']['categoryId']
            data = {
                'Trending_Date': trending_date,
                'Channel_Name': item['snippet']['channelTitle'],
                'Publish Date': item['snippet']['publishedAt'],
                'Video_Name': item['snippet']['title'],
                'Category_ID': category_id,
                'Category_Name': category_mapping.get(category_id, 'Unknown'),
                'View_Number': item['statistics']['viewCount'],
                'Likes_Enabled': 'Yes' if likes_enabled else 'No',
                'Like_Number' : item.get('statistics', {}).get('likeCount', 'N/A'),
                'Disikes_Enabled': 'Yes' if dislikes_enabled else 'No',
                'Dislike_Number': item['statistics'].get('dislikeCount', 'N/A') if dislikes_enabled else 'N/A',
                'Fav_Number': item['statistics']['favoriteCount'],
                'Comment_Number': item['statistics'].get('commentCount', 'N/A'),
                'HD': 'Yes' if item['contentDetails']['definition'] == 'hd' else 'No',
                'Duration': duration_formatted,
                'Thumbnail_Link': thumbnail_link,
                'Tags': ','.join(tags)  # Join the tags with a comma
            }
            video_data.append(data)

        # Check if there is a next page
        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break

    # Convert to DataFrame
    df = pd.DataFrame(video_data)

    # Save to Excel
    excel_file_name = f'youtube_trending_hk_{trending_date}.xlsx'
    df.to_excel(excel_file_name, index=False)
        #print(f"Saved data to {excel_file_name}")

def stop_schedule():
    global day_to_run
    day_to_run -= 1
    if day_to_run < 1:
        schedule.clear()  # This will clear all scheduled jobs

# Define the scheduled job
schedule.every().day.at("13:00").do(daily_job)
schedule.every().day.at("13:01").do(stop_schedule)  # Run a minute after to check if we need to stop the schedule

# Loop to keep the script running and checking for scheduled tasks
while day_to_run > 0:
    schedule.run_pending()
    time.sleep(60)