In [None]:
import os
import pandas as pd
import googleapiclient.discovery
from datetime import datetime
from googleapiclient.errors import HttpError
import sys
import argparse

In [None]:
api_key = "AIzaSyBHCUXaE1ZN33zfnxlbJFic0Z76UY_WqMU"

In [None]:
def main():
    # Your script's main code here
    api_key = input("API key: ")
    youtube = googleapiclient.discovery.build('youtube', 'v3', developerKey = api_key)
    def get_date_input(prompt):
        while True:
            try:
                date_str = input(prompt)
                date_obj = datetime.strptime(date_str, "%Y-%m-%d")
                return date_obj
            except ValueError:
                print("Invalid date format. Please use YYYY-MM-DD.")
    start_date = get_date_input("Enter the start date (YYYY-MM-DD): ").strftime('%Y-%m-%dT00:00:00Z')
    end_date = get_date_input("Enter the end date (YYYY-MM-DD): ").strftime('%Y-%m-%dT23:59:59Z')
    all_video_ids = []
    max_results_per_page = 50
    video_search_data_list = []
    video_stats_data_list = []
    channel_data_list = []

    try:
        next_page_token = None

        while True:
            # Execute the YouTube API search with the date range filter and pagination
            sports = youtube.search().list(
                part='id',
                type='video',
                q='sport,sports',
                order='relevance',
                maxResults=max_results_per_page,
                publishedAfter=start_date,
                publishedBefore=end_date,
                pageToken=next_page_token
            ).execute()

            # Extract video IDs from the current page and add them to the list
            for item in sports.get('items', []):
                video_id = item['id']['videoId']
                all_video_ids.append(video_id)

            # Check if there are more pages of results
            next_page_token = sports.get('nextPageToken')

            # Exit the loop if there are no more pages
            if not next_page_token:
                break

    except HttpError as e:
        print(f"An error occurred: {e}")

    try:
        for video_id in all_video_ids:
            video_response = youtube.videos().list(
                part='snippet,contentDetails,statistics',
                id=video_id
            ).execute()

            # Extract the video details (search data)
            video_details = video_response.get('items', [])[0]['snippet']
            video_content_details = video_response.get('items', [])[0]['contentDetails']
            video_statistics = video_response.get('items', [])[0]['statistics']

            # Extract the channelId for the video
            channel_id = video_details['channelId']

            # Execute another request to retrieve channel details
            channel_response = youtube.channels().list(
                part='snippet',
                id=channel_id
            ).execute()

            channel_details = channel_response.get('items', [])[0]['snippet']

            # Append data to respective lists
            video_search_data_list.append({
                'videoid': video_id,
                'publishedAt': video_details['publishedAt'],
                'channelid': video_details['channelId'],
                'title': video_details['title'],
                'description': video_details['description'],
                'locationdescription': video_content_details.get('locationDescription', ''),
                'topiccategories': video_details.get('topicCategories', []),
                'duration': video_content_details.get('duration', '')
            })

            video_stats_data_list.append({
                'videoid': video_id,
                'viewcounts': video_statistics.get('viewCount', 0),
                'likecounts': video_statistics.get('likeCount', 0),
                'favouritecounts': video_statistics.get('favoriteCount', 0),
                'commentcounts': video_statistics.get('commentCount', 0)
            })

            channel_data_list.append({
                'channelid': channel_id,
                'videoid': video_id,
                'title': channel_details['title'],
                'description': channel_details['description'],
                'country': channel_details.get('country', ''),
                'customurl': channel_details.get('customUrl', ''),
                'defaultlanguage': channel_details.get('defaultLanguage', '')
            })

    except HttpError as e:
        print(f"An error occurred: {e}")



    print(f"Total video IDs: {len(all_video_ids)}")   
    print(f"Total search(videoids) processed: {len(video_search_data_list)}")
    print(f"Total stats(videoids) processed: {len(video_stats_data_list)}")
    print(f"Total channels(videoids) processed: {len(channel_data_list)}")

    # Create DataFrames
    video_search_df = pd.DataFrame(video_search_data_list)
    video_stats_df = pd.DataFrame(video_stats_data_list)
    channel_df = pd.DataFrame(channel_data_list)

    video_search_df.to_csv('video_search_data1.csv', index=False)
    video_stats_df.to_csv('video_stats_data1.csv', index=False)
    channel_df.to_csv('channel_data1.csv', index=False)

if __name__ == "__main__":
    main()
