## -- YouTube Comment Scraper -- 

## Qatar 2022 World Cup

This section collects public YouTube comments related to the **FIFA World Cup Qatar 2022**.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)
- Extract top-level comments and engagement info (likes, replies)
- Save all output into a structured `.csv` file for later processing

In [5]:
import requests
import csv
import time

# YouTube API Key
API_KEY = "******************************"

# List of YouTube video IDs to scrape comments from
VIDEO_IDS = [
    "RVnQnCcVtek", "AwTaNr5DsfA", "Gczo2oc14oY", "BlKVfJg4hHE", "C-0CebFpF_s",
    "ynVikeFUvW8", "dt_Q03HNbTk", "UMqLDhl8PXw", "pesRWBLk5Gs", "pgqjGoO1PrQ",
    "4fOGEGmW6kI", "dECVkNBVlqw", "vxXfK2-2tyc", "XE-IoVirk7A", "gHjbay54F4U",
    "cmWh40u4tqc", "8ANHtDlM0Yk", "QPaGtOLkUt8", "CDc065zB85I", "Qf8TGA2yOKI",
    "Mfx-fux7NGE", "I0EsOFDA6uM", "qTlG7WyUuLQ", "zQG07WqUD10", "xyjnDBoJY7k",
    "e61NcyUJbIg", "7KL9LExsPLI", "SO23de1XJ7g", "Esc-QKITW5k", "QLNp5JA3g_A",
    "RVvfAOqau8E", "U0onvrn71Qc", "ejd7Zmz1r64", "g5ujVKuWRPI", "c9sXyihOO8Y",
    "kQdrgesj8p8", "0LG-u2RZXPo", "9HNRDGlU_-4", "RIJC9WzD7tc", "xPta_JXwSc0",
    "09MPzF03iec", "u7rzezRtkj0", "x8Hu3V4XS_o", "UehxGD04Y2U", "T7OO1mOk7_g",
    "i3qXyUX9320", "ITKOCKpEca8", "Cr3d4Oi0Mas", "H9VQPwPMxz8", "LhSPzahjZfs",
    "DI8Ksl0Rn64", "TEVev8tIq0c", "u5jTVcx_a7g"
]

# This pulls YouTube category names using the API, maps them to category IDs
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

# Get full dictionary of category ID → category name
CATEGORY_MAPPING = get_category_mapping()

# Name of the CSV where all comments will be saved
OUTPUT_FILE = "FIFA_World_Cup_2022_Qatar.csv"

# Check if the file already exists so we don’t re-write the header
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open the CSV file in append mode (add new rows without wiping old ones)
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Only write the header row if file didn’t already exist
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Track how many comments we collect in total

    # Loop through every video in the list
    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")

        # First get video metadata (title, category ID, channel name)
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                # If video metadata isn’t available, skip to the next video
                print(f" No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            # If the API request fails, skip this video
            print(f" Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")

        # Set up the initial API call for comment threads
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0  # Track how many comments we get per video

        # Keep fetching pages of comments until there are no more
        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()

                # Loop through each top-level comment
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Write comment info and metadata to the CSV
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")

                # If there's a next page, update the URL with the new page token
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                # If comment fetching fails, stop and move on
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break

            # Pause for a second to avoid hitting the rate limit
            time.sleep(1)

# Final printout of total comment count and file name
print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")


Fetching metadata for video: RVnQnCcVtek...
Fetching comments for video: RVnQnCcVtek...
✅ 68 comments fetched so far for RVnQnCcVtek
Fetching metadata for video: AwTaNr5DsfA...
Fetching comments for video: AwTaNr5DsfA...
✅ 100 comments fetched so far for AwTaNr5DsfA
✅ 200 comments fetched so far for AwTaNr5DsfA
✅ 300 comments fetched so far for AwTaNr5DsfA
Fetching metadata for video: Gczo2oc14oY...
Fetching comments for video: Gczo2oc14oY...
✅ 70 comments fetched so far for Gczo2oc14oY
Fetching metadata for video: BlKVfJg4hHE...
Fetching comments for video: BlKVfJg4hHE...
✅ 100 comments fetched so far for BlKVfJg4hHE
✅ 200 comments fetched so far for BlKVfJg4hHE
✅ 300 comments fetched so far for BlKVfJg4hHE
✅ 400 comments fetched so far for BlKVfJg4hHE
✅ 500 comments fetched so far for BlKVfJg4hHE
✅ 600 comments fetched so far for BlKVfJg4hHE
✅ 700 comments fetched so far for BlKVfJg4hHE
✅ 800 comments fetched so far for BlKVfJg4hHE
✅ 900 comments fetched so far for BlKVfJg4hHE
✅ 1000

## Newcastle Takeover

This section collects YouTube comments related to the **Saudi-led takeover of Newcastle United**.  

The script performs the following steps:

- Retrieves video metadata (title, channel, and category)
- Extracts top-level comments using the YouTube Data API
- Saves all relevant comment data into a `.csv` file

In [10]:
# List of YouTube video IDs related to the Newcastle United takeover
VIDEO_IDS = [
    "X9fdKFfOPck", "P_5l7dF6kak", "8JjNY_CCosw", "E8vssyn8kOg", "CtNX1QSztYc",
    "I7fUfqM3N2g", "kMI_HucvbUU", "y11a2fYjlpQ", "svMZFJf_DDo", "yNx4BXnHNOw",
    "CjYJa4ZQsZU", "r_cSRwCdnsk", "kS1wVgl1xSw", "1c3BSJHffI0", "crhO6qOxQEI",
    "BYgX4YnLvps", "m5pVViOwGAc", "BvE_U8KANnI", "56wWl3Uezjg", "LiaxgseTd7M",
    "7BWaCDRzN_Y", "7oVTGh-yr-Y", "fqWP6Eao8qk", "Ss3-fVMOwXU", "vLlOEOT3HKM"
]

# Function to get category names based on YouTube's category ID mapping
# Helps convert category IDs to readable labels
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

# Build the full mapping to use later
CATEGORY_MAPPING = get_category_mapping()

# Output CSV file that stores all comments + video metadata for this topic
OUTPUT_FILE = "Newcastle_Takeover_Saudi_Arabia.csv"

# Check if this CSV already exists (used to decide whether to write header)
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open the CSV in append mode so we don't erase old data if rerun
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

# Only write the header row if this is a new file
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Used to keep count of all comments collected

# Loop through each video in the list
    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")

# First: pull video-level metadata
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                # Sometimes the video metadata isn't available — skip that video
                print(f" No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            # API call failed — skip this one
            print(f" Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")

        # Starting the request for top-level comments on the video
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0  # Tracking number of comments fetched for this video

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Write the comment and video info to the CSV file
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")

                # If there's more pages of comments, keep going
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                # API call failed when trying to get comments
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break

            # Add a pause to avoid hitting API rate limits
            time.sleep(1)

# Printing the summary once it's all done
print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")


Fetching metadata for video: X9fdKFfOPck...
Fetching comments for video: X9fdKFfOPck...
✅ 100 comments fetched so far for X9fdKFfOPck
✅ 200 comments fetched so far for X9fdKFfOPck
✅ 300 comments fetched so far for X9fdKFfOPck
✅ 400 comments fetched so far for X9fdKFfOPck
✅ 500 comments fetched so far for X9fdKFfOPck
✅ 600 comments fetched so far for X9fdKFfOPck
✅ 700 comments fetched so far for X9fdKFfOPck
✅ 800 comments fetched so far for X9fdKFfOPck
✅ 900 comments fetched so far for X9fdKFfOPck
✅ 1000 comments fetched so far for X9fdKFfOPck
✅ 1100 comments fetched so far for X9fdKFfOPck
✅ 1114 comments fetched so far for X9fdKFfOPck
Fetching metadata for video: P_5l7dF6kak...
Fetching comments for video: P_5l7dF6kak...
✅ 59 comments fetched so far for P_5l7dF6kak
Fetching metadata for video: 8JjNY_CCosw...
Fetching comments for video: 8JjNY_CCosw...
✅ 100 comments fetched so far for 8JjNY_CCosw
✅ 200 comments fetched so far for 8JjNY_CCosw
✅ 300 comments fetched so far for 8JjNY_CCos

## All Events

This section collects public YouTube comments from videos that cover **multiple major sportswashing events** together.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)
- Extract top-level comments and engagement info (likes, replies)
- Save all output into a structured `.csv` file for later processing


In [13]:
# List of YouTube video IDs that cover multiple sportswashing events
VIDEO_IDS = [
    "OMTszkzVSx4", "hFw-GYAlJHc", "l9XuvDXJlZI", "y4vOuRv9jyA", "KrmqY9hyCCw",
    "jNdQb-958hA", "gHv0Fjq3GTM", "YCMIwOvegak", "dGEC3CMp0hE", "Z_3t4Me-USY",
    "K1LnvaVRwx8", "bG33XKc67mw", "0TloNP2_f-0"
]

# Get a dictionary of YouTube category IDs mapped to readable names
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

# Building the category map to use later
CATEGORY_MAPPING = get_category_mapping()

# Name of the output CSV file where everything will be stored
OUTPUT_FILE = "All_Events.csv"

# Check if the file already exists (so we know whether to write headers)
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open the CSV in append mode so we don’t overwrite previous data
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # If it’s a new file, write the column headers
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Running total of all comments collected

    # Loop through each video ID one by one
    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")

        # Pull video-level metadata: title, channel, category ID
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                # No metadata found — move on to the next video
                print(f"No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            # Metadata request failed — skip this video
            print(f"Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")

        # Set up the API call to fetch top-level comments
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0  # Track per-video comment count

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Write the full row of comment + video metadata to the file
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")

                # If there’s a next page of comments, build the next URL
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                # Comment request failed — stop for this video
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break

            # Add a short delay to stay within API rate limits
            time.sleep(1)

# Final output message once all videos are processed
print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")


Fetching metadata for video: OMTszkzVSx4...
Fetching comments for video: OMTszkzVSx4...
✅ 100 comments fetched so far for OMTszkzVSx4
✅ 200 comments fetched so far for OMTszkzVSx4
✅ 300 comments fetched so far for OMTszkzVSx4
✅ 400 comments fetched so far for OMTszkzVSx4
✅ 500 comments fetched so far for OMTszkzVSx4
✅ 600 comments fetched so far for OMTszkzVSx4
✅ 700 comments fetched so far for OMTszkzVSx4
✅ 707 comments fetched so far for OMTszkzVSx4
Fetching metadata for video: hFw-GYAlJHc...
Fetching comments for video: hFw-GYAlJHc...
✅ 100 comments fetched so far for hFw-GYAlJHc
✅ 200 comments fetched so far for hFw-GYAlJHc
✅ 300 comments fetched so far for hFw-GYAlJHc
✅ 400 comments fetched so far for hFw-GYAlJHc
✅ 500 comments fetched so far for hFw-GYAlJHc
✅ 584 comments fetched so far for hFw-GYAlJHc
Fetching metadata for video: l9XuvDXJlZI...
Fetching comments for video: l9XuvDXJlZI...
✅ 100 comments fetched so far for l9XuvDXJlZI
✅ 200 comments fetched so far for l9XuvDXJlZI


## LIV Golf

This section collects public YouTube comments related to the **LIV Golf Series**.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)
- Extract top-level comments and engagement info (likes, replies)
- Save all output into a structured `.csv` file for later processing


In [17]:
# List of YouTube Video IDs
VIDEO_IDS = [
    "mYGTNHfgZ8E", "Heo_bESl4cc", "GSJUgWgtDpU", "xKw_YRXTmCs", "aHtdkOeOUHs",
    "D9KQDzyheUI", "nuGBJOq_aCI", "cU9rmNPdn-Q", "gspb4DSvgm8", "c8AQMWnvj_Q",
    "zv9jyz3lauo", "ZQwFsV3vk54", "dqqgE85y6rM", "f6cmTZQ5CUw"
]

# Map YouTube Category IDs to Names
# This function queries YouTube API to build a dictionary like {"24": "Entertainment"}
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

# Create full mapping of category IDs to readable names
CATEGORY_MAPPING = get_category_mapping()

# CSV file to store all comments
OUTPUT_FILE = "LIV_Golf.csv"

# Check if the file exists to avoid rewriting the header
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Opening CSV file in "append mode" to keep adding data
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write header only if the file is newly created
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Track total comments collected

    # Loop through each video in the list
    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")

        # Pull metadata like title, channel name, and category ID
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                # If no snippet found, skip this video
                print(f"No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            # If metadata fetch fails, skip to next video
            print(f"Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")

        # Prepare API URL to get comment threads
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0  # Track number of comments collected for this video

        # Paginate through comment pages
        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()

                # Loop through each top-level comment
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Save one row per comment in the CSV file
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")

                # Check if there's another page of comments
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                # If the comment fetch fails, move to the next video
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break

            # Sleep to stay within API rate limits
            time.sleep(1)

# Summary printout
print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")

Fetching metadata for video: mYGTNHfgZ8E...
Fetching comments for video: mYGTNHfgZ8E...
✅ 100 comments fetched so far for mYGTNHfgZ8E
✅ 200 comments fetched so far for mYGTNHfgZ8E
✅ 300 comments fetched so far for mYGTNHfgZ8E
✅ 400 comments fetched so far for mYGTNHfgZ8E
✅ 500 comments fetched so far for mYGTNHfgZ8E
✅ 600 comments fetched so far for mYGTNHfgZ8E
✅ 700 comments fetched so far for mYGTNHfgZ8E
✅ 800 comments fetched so far for mYGTNHfgZ8E
✅ 803 comments fetched so far for mYGTNHfgZ8E
Fetching metadata for video: Heo_bESl4cc...
Fetching comments for video: Heo_bESl4cc...
✅ 100 comments fetched so far for Heo_bESl4cc
✅ 200 comments fetched so far for Heo_bESl4cc
✅ 214 comments fetched so far for Heo_bESl4cc
Fetching metadata for video: GSJUgWgtDpU...
Fetching comments for video: GSJUgWgtDpU...
✅ 100 comments fetched so far for GSJUgWgtDpU
✅ 200 comments fetched so far for GSJUgWgtDpU
✅ 300 comments fetched so far for GSJUgWgtDpU
✅ 400 comments fetched so far for GSJUgWgtDpU


## Manchester City Ownership

This section collects public YouTube comments related to **Manchester City's ownership and financial backing**.  
It uses the YouTube Data API to:

- Retrieve video metadata (title, category, channel)
- Extract top-level comments along with engagement info (likes, replies)
- Save all output into a structured `.csv` file for later processing


In [20]:
# List of YouTube Video IDs related to Manchester City's ownership
VIDEO_IDS = [
    "_VXzl5S0sUA", "t9IK5MDb9zk", "mWJXuDTaeX4", "9LMNKJO8bH8", "Z0HWQatIXK8",
    "-ugz3j9I-80", "WGyomoSnP-A", "yDjISzW4QDk", "hbkCDZcMeLA", "mBgDXoB2cms",
    "Bs9l0SaXJhs", "Fifh6otxBII", "hPmf0oSGZtM", "CF_0c3hRV9w", "AwC1J9o9Wxk",
    "t1dRiZJsXDY", "tNdOw-yNIUM", "lhiwDz4k6g4", "1o7i2jrQiZY", "7jVQRBO3QC0",
    "WkU3mtm1ygI"
]

# Function to retrieve a mapping from category IDs to human-readable names
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

# Build the mapping once so it can be reused
CATEGORY_MAPPING = get_category_mapping()

# Output file for all comments in this section
OUTPUT_FILE = "Manchester_City_Ownership.csv"

# Check if the output file already exists so we don’t re-write the header
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open the CSV file in append mode — so it doesn't overwrite previous runs
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # If the file is new, write the header row
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Total number of comments collected across all videos

    # Loop over each video
    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")

        # Step 1: Get video-level metadata
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                print(f" No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            print(f" Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")

        # Step 2: Begin fetching top-level comments (up to 100 at a time)
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0

        # Continue paginating through all available comment pages
        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Save extracted comment and metadata to CSV
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")

                # Get next page if available
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break

            # Slight delay between requests to avoid rate limiting
            time.sleep(1)

# Final log output
print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")

Fetching metadata for video: _VXzl5S0sUA...
Fetching comments for video: _VXzl5S0sUA...
✅ 100 comments fetched so far for _VXzl5S0sUA
✅ 121 comments fetched so far for _VXzl5S0sUA
Fetching metadata for video: t9IK5MDb9zk...
Fetching comments for video: t9IK5MDb9zk...
✅ 100 comments fetched so far for t9IK5MDb9zk
✅ 200 comments fetched so far for t9IK5MDb9zk
✅ 300 comments fetched so far for t9IK5MDb9zk
✅ 384 comments fetched so far for t9IK5MDb9zk
Fetching metadata for video: mWJXuDTaeX4...
Fetching comments for video: mWJXuDTaeX4...
✅ 100 comments fetched so far for mWJXuDTaeX4
✅ 200 comments fetched so far for mWJXuDTaeX4
✅ 300 comments fetched so far for mWJXuDTaeX4
✅ 400 comments fetched so far for mWJXuDTaeX4
✅ 500 comments fetched so far for mWJXuDTaeX4
✅ 600 comments fetched so far for mWJXuDTaeX4
✅ 700 comments fetched so far for mWJXuDTaeX4
✅ 727 comments fetched so far for mWJXuDTaeX4
Fetching metadata for video: 9LMNKJO8bH8...
Fetching comments for video: 9LMNKJO8bH8...
✅ 70

## PSG Ownership

This section collects public YouTube comments related to **Paris Saint-Germain's ownership and its links to Qatar**.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)
- Extract top-level comments and engagement details (likes, replies)
- Save all output into a structured `.csv` file for later processing

In [23]:
# List of YouTube Video IDs
VIDEO_IDS = [
    "Dvh2M8lONLk", "_tYH4_tmktA", "224pbl296dI", "aUpfkl_rWGs", "7TJrf1OtsJA",
    "4q0hpZD3Y8Q", "KKgxVwMtsZc", "lBRMtyrW184"
]

# Map YouTube Category IDs to Names
# Pulls a dictionary that maps category IDs (e.g. 17) to actual names (e.g. Sports).
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

CATEGORY_MAPPING = get_category_mapping()

# Everything gets saved here for later analysis.
OUTPUT_FILE = "PSG_Ownership.csv"

# Check if the file exists to avoid rewriting the header
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open CSV file in append mode so existing data stays intact
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Add header only if this is the first time creating the file
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Track how many comments we collect

    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")
        # Get the title, category, and channel for the video
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                print(f"No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            print(f"Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")
        # Pull top-level comments and basic engagement info
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    # Add comment to CSV with all metadata
                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")
                # If there’s a next page of comments, update the URL
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break
            time.sleep(1)

print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")

Fetching metadata for video: Dvh2M8lONLk...
Fetching comments for video: Dvh2M8lONLk...
✅ 100 comments fetched so far for Dvh2M8lONLk
✅ 193 comments fetched so far for Dvh2M8lONLk
Fetching metadata for video: _tYH4_tmktA...
Fetching comments for video: _tYH4_tmktA...
✅ 100 comments fetched so far for _tYH4_tmktA
✅ 200 comments fetched so far for _tYH4_tmktA
✅ 231 comments fetched so far for _tYH4_tmktA
Fetching metadata for video: 224pbl296dI...
Fetching comments for video: 224pbl296dI...
✅ 100 comments fetched so far for 224pbl296dI
✅ 200 comments fetched so far for 224pbl296dI
✅ 300 comments fetched so far for 224pbl296dI
✅ 375 comments fetched so far for 224pbl296dI
Fetching metadata for video: aUpfkl_rWGs...
Fetching comments for video: aUpfkl_rWGs...
✅ 14 comments fetched so far for aUpfkl_rWGs
Fetching metadata for video: 7TJrf1OtsJA...
Fetching comments for video: 7TJrf1OtsJA...
✅ 55 comments fetched so far for 7TJrf1OtsJA
Fetching metadata for video: 4q0hpZD3Y8Q...
Fetching com

## Saudi Pro League

This section collects public YouTube comments related to the **Saudi Pro League and its recent high-profile investments**.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)  
- Extract top-level comments and engagement details (likes, replies)  
- Save all output into a structured `.csv` file for later processing

In [26]:
# List of YouTube Video IDs related to the Saudi Pro League
VIDEO_IDS = [
    "LZn3i2YZ1BA", "_mE9K20hqSQ", "B9OZWppefsA", "21RnbXFTMQ0", "5slVNMFT0_g",
    "PmRQlxjUcOI", "KVFterzsmOg", "hhDaKszN-zA", "wvEw-ozrZNU", "MJxsx9CcnZc",
    "GUuDJIr3FUs", "wtQ4K7zcPkw", "ETaR3PvusVI", "IRV4LW_V8tk", "dIfke7u5V7M",
    "wvEw-ozrZNU", "UNRQCksZmLo", "Eiovy98OMxk", "2S5OnnuOIos", "ffAU4eXE67c",
    "Al8QYC2I4qE"
]

# This maps YouTube category IDs to actual category names (e.g. Sports, News)
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

CATEGORY_MAPPING = get_category_mapping()

# Output CSV where all extracted comments will be saved
OUTPUT_FILE = "Saudi_Pro_League.csv"

# Check if file already exists so we don't duplicate headers
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open the CSV in append mode to keep adding new comments
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Only write the column names if the file was just created
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Keep track of total comments pulled

    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                print(f"No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            print(f"Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break
            time.sleep(1)

print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")

Fetching metadata for video: LZn3i2YZ1BA...
Fetching comments for video: LZn3i2YZ1BA...
✅ 100 comments fetched so far for LZn3i2YZ1BA
✅ 200 comments fetched so far for LZn3i2YZ1BA
✅ 300 comments fetched so far for LZn3i2YZ1BA
✅ 400 comments fetched so far for LZn3i2YZ1BA
✅ 500 comments fetched so far for LZn3i2YZ1BA
✅ 600 comments fetched so far for LZn3i2YZ1BA
✅ 691 comments fetched so far for LZn3i2YZ1BA
Fetching metadata for video: _mE9K20hqSQ...
Fetching comments for video: _mE9K20hqSQ...
✅ 100 comments fetched so far for _mE9K20hqSQ
✅ 200 comments fetched so far for _mE9K20hqSQ
✅ 300 comments fetched so far for _mE9K20hqSQ
✅ 400 comments fetched so far for _mE9K20hqSQ
✅ 500 comments fetched so far for _mE9K20hqSQ
✅ 600 comments fetched so far for _mE9K20hqSQ
✅ 700 comments fetched so far for _mE9K20hqSQ
✅ 712 comments fetched so far for _mE9K20hqSQ
Fetching metadata for video: B9OZWppefsA...
Fetching comments for video: B9OZWppefsA...
✅ 42 comments fetched so far for B9OZWppefsA
F

## Formula 1

This section collects public YouTube comments related to **Formula 1 and the middle east's role in hosting Grand Prix events**.  
It uses the YouTube Data API to:

- Fetch video metadata (title, category, channel)
- Extract top-level comments and engagement details (likes, replies)
- Save all output into a structured `.csv` file for later processing

In [29]:
# List of YouTube Video IDs
VIDEO_IDS = [
    "DLgNW9lCAaU", "r4K8V8btCtY", "kRhg1dPS4Gw", "Cj8wtEFNVog", "_CLZf58vzbc",
    "dbVki3gPYZs", "C9sH8AD4jys", "6zEf9o-tTpc", "_tSI_JV5lZY", "ReDGjoFTn58"
]

# Map YouTube Category IDs to Names
def get_category_mapping():
    url = f"https://www.googleapis.com/youtube/v3/videoCategories?part=snippet&regionCode=US&key={API_KEY}"
    response = requests.get(url)
    category_map = {}
    if response.status_code == 200:
        categories = response.json().get("items", [])
        for category in categories:
            category_id = category["id"]
            category_name = category["snippet"]["title"]
            category_map[category_id] = category_name
    return category_map

CATEGORY_MAPPING = get_category_mapping()

# CSV file to store all comments
OUTPUT_FILE = "Formula_1.csv"

# Check if the file exists to avoid rewriting the header
try:
    with open(OUTPUT_FILE, "r", encoding="utf-8") as f:
        file_exists = True
except FileNotFoundError:
    file_exists = False

# Open CSV file in "append mode" to keep adding data
with open(OUTPUT_FILE, mode="a", newline="", encoding="utf-8") as csv_file:
    fieldnames = [
        "Video_ID", "Video_Title", "Video_Category_Type", "Channel_Name",
        "Comment_ID", "Comment", "Author", "Date", "Likes", "Replies_Count",
        "Data_Source"
    ]
    writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write header only if the file is newly created
    if not file_exists:
        writer.writeheader()

    total_comments = 0  # Track total comments collected

    for VIDEO_ID in VIDEO_IDS:
        print(f"Fetching metadata for video: {VIDEO_ID}...")
        video_url = f"https://www.googleapis.com/youtube/v3/videos?part=snippet&id={VIDEO_ID}&key={API_KEY}"
        video_response = requests.get(video_url)

        if video_response.status_code == 200:
            video_data = video_response.json()
            if "items" in video_data and len(video_data["items"]) > 0:
                video_info = video_data["items"][0]["snippet"]
                video_title = video_info["title"]
                category_id = video_info.get("categoryId", "Unknown")
                video_category_type = CATEGORY_MAPPING.get(category_id, "Unknown Category")
                channel_name = video_info["channelTitle"]
            else:
                print(f"No metadata found for video {VIDEO_ID}. Skipping...")
                continue
        else:
            print(f"Failed to fetch video metadata for {VIDEO_ID}: {video_response.status_code}")
            continue

        print(f"Fetching comments for video: {VIDEO_ID}...")
        url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100"
        collected_comments = 0

        while url:
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                for item in data.get("items", []):
                    snippet = item["snippet"]["topLevelComment"]["snippet"]
                    comment_id = item["id"]
                    comment_text = snippet["textDisplay"]
                    author_name = snippet["authorDisplayName"]
                    published_date = snippet["publishedAt"]
                    like_count = snippet["likeCount"]
                    replies_count = item["snippet"].get("totalReplyCount", 0)

                    writer.writerow({
                        "Video_ID": VIDEO_ID,
                        "Video_Title": video_title,
                        "Video_Category_Type": video_category_type,
                        "Channel_Name": channel_name,
                        "Comment_ID": comment_id,
                        "Comment": comment_text,
                        "Author": author_name,
                        "Date": published_date,
                        "Likes": like_count,
                        "Replies_Count": replies_count,
                        "Data_Source": "YouTube"
                    })

                    total_comments += 1
                    collected_comments += 1

                print(f"✅ {collected_comments} comments fetched so far for {VIDEO_ID}")
                next_page_token = data.get("nextPageToken")
                url = f"https://www.googleapis.com/youtube/v3/commentThreads?part=snippet&videoId={VIDEO_ID}&key={API_KEY}&maxResults=100&pageToken={next_page_token}" if next_page_token else None
            else:
                print(f"Failed to fetch comments for {VIDEO_ID}: {response.status_code} - {response.text}")
                break
            time.sleep(1)

print(f"\n✅ Total Comments Retrieved: {total_comments}")
print(f"📂 All data saved to '{OUTPUT_FILE}'")


Fetching metadata for video: DLgNW9lCAaU...
Fetching comments for video: DLgNW9lCAaU...
✅ 100 comments fetched so far for DLgNW9lCAaU
✅ 200 comments fetched so far for DLgNW9lCAaU
Fetching metadata for video: r4K8V8btCtY...
Fetching comments for video: r4K8V8btCtY...
✅ 56 comments fetched so far for r4K8V8btCtY
Fetching metadata for video: kRhg1dPS4Gw...
Fetching comments for video: kRhg1dPS4Gw...
✅ 100 comments fetched so far for kRhg1dPS4Gw
✅ 200 comments fetched so far for kRhg1dPS4Gw
✅ 300 comments fetched so far for kRhg1dPS4Gw
✅ 400 comments fetched so far for kRhg1dPS4Gw
✅ 450 comments fetched so far for kRhg1dPS4Gw
Fetching metadata for video: Cj8wtEFNVog...
Fetching comments for video: Cj8wtEFNVog...
✅ 91 comments fetched so far for Cj8wtEFNVog
Fetching metadata for video: _CLZf58vzbc...
Fetching comments for video: _CLZf58vzbc...
✅ 79 comments fetched so far for _CLZf58vzbc
Fetching metadata for video: dbVki3gPYZs...
Fetching comments for video: dbVki3gPYZs...
✅ 100 comments 

## Merging All CSV Files To Create one Data Set

In [35]:
import os
import pandas as pd

# Directory containing the CSV files
directory = "./"  # Adjust if needed

# Get list of all CSV files
csv_files = [
    "All_Events.csv",
    "FIFA_World_Cup_2022_Qatar.csv",
    "Formula_1.csv",
    "LIV_Golf.csv",
    "Manchester_City_Ownership.csv",
    "Newcastle_Takeover_Saudi_Arabia.csv",
    "PSG_Ownership.csv",
    "Saudi_Pro_League.csv",
    "Qatar_Sports_Sponsorships.csv"
]

# Create an empty list to store DataFrames
df_list = []

# Read each CSV file and append to the list
for file in csv_files:
    file_path = os.path.join(directory, file)
    if os.path.exists(file_path):  
        df = pd.read_csv(file_path)
        df["Source_File"] = file  # Add a column to indicate the original file
        df_list.append(df)

# Concatenate all DataFrames
if df_list:
    merged_df = pd.concat(df_list, ignore_index=True)
    
    # Save merged file
    merged_file = "All_YouTube_Comments.csv"
    merged_df.to_csv(merged_file, index=False)
    
    print(f"✅ Merged {len(csv_files)} files into {merged_file}")
else:
    print("No files found for merging.")


✅ Merged 9 files into All_YouTube_Comments.csv
