In [1]:
import requests
import pandas as pd
import time

In [8]:
API_KEY = "AIzaSyC3kQgtXNeK8mvDHdjOG7nGe52hpFZBCSA"  
BASE_URL = "https://www.googleapis.com/youtube/v3"

In [10]:
# Channel handles (can be handles or channel names)
channel_handles = [
    #"3blue1brown",            
    #"krishnaik06",              
    #"freecodecamp",                               
    #"TechWithTim",             
    "sentdex",                 
    "codebasics",              
    "simplilearn",              
    "coreyms",                  
    "TheCodingTrain",        
    "CodeWithHarry",            
    "realpython",              
    "statquest",               
    "LukeBarousse",             
    "TheNetNinja",             
    "mlstreettalk",             
    "aiExplained",              
    "WelchLabs",                
    "GoogleDevelopers",        
    "JomaTech"                  
]

In [11]:

# 1. Get Channel ID from handle
def get_channel_id_by_handle(handle):
    url = f"{BASE_URL}/search?part=snippet&type=channel&q={handle}&key={API_KEY}"
    res = requests.get(url)
    res.raise_for_status()
    data = res.json()
    if data['items']:
        return data['items'][0]['snippet']['channelId']
    return None

In [12]:
# 2. Get all video IDs from a channel
def get_all_video_ids(channel_id):
    video_ids = []
    next_page_token = ""
    while True:
        url = f"{BASE_URL}/search?key={API_KEY}&channelId={channel_id}&part=id&order=date&maxResults=50&pageToken={next_page_token}&type=video"
        res = requests.get(url)
        res.raise_for_status()
        data = res.json()

        for item in data["items"]:
            if item["id"]["kind"] == "youtube#video":
                video_ids.append(item["id"]["videoId"])

        if "nextPageToken" in data:
            next_page_token = data["nextPageToken"]
        else:
            break

        time.sleep(0.1)  # slight pause to avoid hitting rate limit

    return video_ids


In [13]:

# 3. Get video data for a batch of video IDs
def get_video_data(channel_title, video_ids):
    data = []
    for i in range(0, len(video_ids), 50):
        batch = video_ids[i:i+50]
        stats_url = f"{BASE_URL}/videos?part=snippet,statistics&id={','.join(batch)}&key={API_KEY}"
        stats_res = requests.get(stats_url)
        stats_res.raise_for_status()
        stats_data = stats_res.json()

        for video in stats_data["items"]:
            snippet = video["snippet"]
            stats = video["statistics"]
            data.append({
                "channel_title": channel_title,
                "video_title": snippet.get("title"),
                "views": int(stats.get("viewCount", 0)),
                "likes": int(stats.get("likeCount", 0)),
                "comments": int(stats.get("commentCount", 0)),
                "published_date": snippet.get("publishedAt").split("T")[0]
            })

        time.sleep(0.1)
    return data



In [15]:
# 4. Main script
all_data = []

for handle in channel_handles:
    try:
        print(f"Fetching channel ID for: {handle}")
        channel_id = get_channel_id_by_handle(handle)
        if not channel_id:
            print(f"Channel ID not found for {handle}")
            continue

        print(f"Fetching videos for: {handle} (ID: {channel_id})")
        video_ids = get_all_video_ids(channel_id)
        print(f"Found {len(video_ids)} videos for {handle}")

        # Get channel title from the first video metadata
        sample_video_data = get_video_data(handle, video_ids[:1])
        if sample_video_data:
            channel_title = sample_video_data[0]["channel_title"]
        else:
            channel_title = handle

        channel_data = get_video_data(channel_title, video_ids)
        all_data.extend(channel_data)

    except Exception as e:
        print(f"Error fetching data for {handle}: {e}")
        continue


Fetching channel ID for: sentdex
Fetching videos for: sentdex (ID: UCfzlCWGWYyIQ0aLC5w48gBQ)
Found 150 videos for sentdex
Fetching channel ID for: codebasics
Fetching videos for: codebasics (ID: UCh9nVJoWXmFb7sLApWGcLPQ)
Found 248 videos for codebasics
Fetching channel ID for: simplilearn
Fetching videos for: simplilearn (ID: UCsvqVGtbbyHaMoevxPAq9Fg)
Found 26 videos for simplilearn
Fetching channel ID for: coreyms
Fetching videos for: coreyms (ID: UCCezIgC97PvUuR4_gbFUs5g)
Found 242 videos for coreyms
Fetching channel ID for: TheCodingTrain
Fetching videos for: TheCodingTrain (ID: UCvjgXvBlbQiydffZU7m1_aw)
Found 165 videos for TheCodingTrain
Fetching channel ID for: CodeWithHarry
Fetching videos for: CodeWithHarry (ID: UCeVMnSShP_Iviwkknt83cww)
Found 3 videos for CodeWithHarry
Fetching channel ID for: realpython
Fetching videos for: realpython (ID: UCI0vQvr9aFn27yR6Ej6n5UA)
Found 431 videos for realpython
Fetching channel ID for: statquest
Fetching videos for: statquest (ID: UCtYLUTtg

In [16]:
# 5. Save to CSV
df = pd.DataFrame(all_data)
df.to_csv("youtube_multi_channel_data1.csv", index=False)
print("Dataset saved as youtube_multi_channel_data.csv")

Dataset saved as youtube_multi_channel_data.csv
