In [22]:
import pandas as pd
from googleapiclient.discovery import build

# Load CSV
df = pd.read_csv("top_100_canadian_youtuber_data.csv")

# Extract ChannelID from NAME
df['ChannelID'] = df['NAME'].str.split('@', n=1).str[1].str.strip()

# Set up YouTube API
API_KEY = "API KEY"
youtube = build('youtube', 'v3', developerKey=API_KEY)

# Add new columns to store API results
df['total_subscribers'] = None
df['total_views'] = None
df['total_videos'] = None
df['category'] = None  # simplified broad category

# Mapping Wikipedia topics to broad categories
CATEGORY_MAP = {
    "Music": "Music",
    "Entertainment": "Entertainment",
    "Video_game_culture": "Gaming",
    "Sport": "Sports",
    "Lifestyle": "Lifestyle",
    "Technology": "Technology",
    "Food": "Food",
    "Education": "Education",
    "Film": "Entertainment",
    "Television_program": "Entertainment",
    "Humour": "Entertainment",
    "Society": "Lifestyle",
    "Health": "Lifestyle",
    "Fashion": "Lifestyle",
}

def map_to_category(urls):
    """Map YouTube topic URLs to a single broad category."""
    if not urls:
        return "Other"
    for url in urls:
        topic = url.split("/")[-1]  # get last part of URL
        if topic in CATEGORY_MAP:
            return CATEGORY_MAP[topic]
    return "Other"

# Loop through rows and fetch stats + mapped category
for i, row in df.iterrows():
    try:
        request = youtube.channels().list(
            part="snippet,statistics,topicDetails",
            id=row['ChannelID']
        )
        response = request.execute()
        channel = response['items'][0]
        stats = channel['statistics']
        topics = channel.get('topicDetails', {}).get('topicCategories', [])

        # Store stats
        df.at[i, 'total_subscribers'] = stats.get('subscriberCount', 0)
        df.at[i, 'total_views'] = stats.get('viewCount', 0)
        df.at[i, 'total_videos'] = stats.get('videoCount', 0)

        # Assign one broad category
        df.at[i, 'category'] = map_to_category(topics)
    except Exception as e:
        print(f"Error fetching {row['ChannelID']}: {e}")

# Save updated CSV
df.to_csv("updated_top_100_canadian_youtuber_data.csv", index=False)
print("CSV created.")


CSV created.
