<a href="https://www.kaggle.com/code/maimoonakhilji/scrap-worldcup-2023-data?scriptVersionId=154968814" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [1]:
!pip install isodate

Collecting isodate
  Downloading isodate-0.6.1-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.7/41.7 kB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: isodate
Successfully installed isodate-0.6.1


In [2]:
import os
import csv
import pandas as pd
import datetime
import googleapiclient.discovery
from isodate import parse_duration

In [3]:
# Set your API key here
API_KEY = 'AIzaSyBW30IqKyWTAEDXT6NEN3ZYcviJgvjs79o'
API_NAME = 'youtube'
API_VERSION = 'v3'

def get_channel_name(api_service, channel_id):
    # Get the channel details, including the channel name
    request = api_service.channels().list(part='snippet', id=channel_id, key=API_KEY)
    response = request.execute()
    
    if 'items' in response:
        return response['items'][0]['snippet']['title']
    else:
        return None

def get_videos_in_channel(api_service, channel_id, start_date, end_date):
    # Convert dates to RFC 3339 format
    start_date_rfc3339 = start_date.strftime('%Y-%m-%dT%H:%M:%SZ')
    end_date_rfc3339 = end_date.strftime('%Y-%m-%dT%H:%M:%SZ')

    # Get video details in the channel within the date range
    videos = []
    next_page_token = None
    
    while True:
        request = api_service.search().list(
            part='id',
            channelId=channel_id,
            maxResults=50,  # Maximum of 50 results per page
            order='date',
            pageToken=next_page_token,
            type='video',
            key=API_KEY,
            publishedAfter=start_date_rfc3339,
            publishedBefore=end_date_rfc3339
        )
        
        response = request.execute()
        
        if 'items' in response:
            videos.extend(response['items'])
        
        next_page_token = response.get('nextPageToken')
        
        if not next_page_token:
            break
    
    return videos

def get_video_details(api_service, video_id):
    # Get video details
    request = api_service.videos().list(
        part='snippet,statistics,contentDetails',
        id=video_id,
        key=API_KEY
    )
    response = request.execute()
    
    if 'items' in response:
        return response['items'][0]
    else:
        return None

def main():
    # Create the YouTube API service
    api_service = googleapiclient.discovery.build(API_NAME, API_VERSION, developerKey=API_KEY)

    # Read channel IDs from "YouTube Channels.csv"
    with open("/kaggle/input/pakistani-sports-channels/sports channels.csv", mode='r', newline='',encoding='latin-1') as channels_file:
        csv_reader = csv.DictReader(channels_file)

        for row in csv_reader:
            channel_id = row['Channel ID']
            channel_name = get_channel_name(api_service, channel_id)

            # Enter the start and end dates for the date range of Worldcup 2023
            start_date_str = "2023-10-05"
            end_date_str = "2023-11-20"
                
            start_date = datetime.datetime.strptime(start_date_str, '%Y-%m-%d')
            end_date = datetime.datetime.strptime(end_date_str, '%Y-%m-%d')
                
                
            if channel_name:
                # Get all videos in the channel within the specified date range and with the specified search term
                videos = get_videos_in_channel(api_service, channel_id, start_date, end_date)

                # Create a CSV file with the channel name
                csv_file_name = f"{channel_name}_worldcup2023.csv"

                with open(csv_file_name, mode='w', newline='', encoding='utf-8') as csv_file:
                    fieldnames = ['Video Name', 'Video Link', 'View Count', 'Comment Count', 'Channel Name', 'Published Date', 'Like count', 'Dislikes', 'Video Duration', 'Description', 'Tags', 'Category', 'Thumbnail URL']
                    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

                    writer.writeheader()

                    for video in videos:
                        video_id = video['id']['videoId']
                        video_details = get_video_details(api_service, video_id)

                        if video_details:
                            snippet = video_details.get("snippet", {})
                            statistics = video_details.get("statistics", {})
                            content_details = video_details.get("contentDetails", {})

                            video_name = snippet.get('title', 'N/A')
                            video_link = f"https://www.youtube.com/watch?v={video_id}"
                            view_count = statistics.get('viewCount', 'N/A')
                            comment_count = statistics.get('commentCount', 'N/A')
                            channel_name = snippet.get('channelTitle', 'N/A')
                            published_date = snippet.get('publishedAt', 'N/A')
                            like_count = statistics.get('likeCount', 'N/A')
                            dislikes = statistics.get('dislikeCount', 'N/A')
                            video_duration = content_details.get('duration', 'N/A')
                            description = snippet.get('description', 'N/A')
                            tags = snippet.get('tags', [])
                            category = snippet.get('categoryId', 'N/A')
                            thumbnail_url = snippet.get('thumbnails', {}).get('default', {}).get('url', 'N/A')

                            writer.writerow({'Video Name': video_name, 'Video Link': video_link, 'View Count': view_count,
                                                 'Comment Count': comment_count, 'Channel Name': channel_name,
                                                 'Published Date': published_date, 'Like count': like_count, 'Dislikes': dislikes,
                                                 'Video Duration': video_duration, 'Description': description,
                                                 'Tags': ','.join(tags), 'Category': category,
                                                 'Thumbnail URL': thumbnail_url})
                
                    print(f"Video data for '{channel_name}' saved.")
            else:
                print(f"{channel_name} Channel not found.")
    
    print(f"All video data saved to '{csv_file_name}'")

if __name__ == '__main__':
    main()


Video data for 'A Sports' saved.
Video data for 'Hasna Mana Hai' saved.
Video data for 'Har Lamha Purjosh' saved.
Video data for 'SAMAA TV' saved.
Video data for 'GEO SUPER' saved.
Video data for 'SUNO NEWS HD' saved.
All video data saved to 'SUNO NEWS HD_worldcup2023.csv'
