In [None]:
from googleapiclient.discovery import build
from datetime import datetime
import csv
import time

# Your API key here
API_KEY = 'your_API_KEY'

# Channel usernames or IDs
CHANNELS = {
    'ABC News': 'UCBi2mrWuNuyYy4gbM6fU18Q',
    'CNN': 'UCupvZG-5ko_eiXAupbDfxWw',
    'Fox News': 'UCXIJgqnII2ZOINSWNOGFThA',
    'NBC News': 'UCeY0bbntWzzVIaj2z3QigXg',
    'CBS News': 'UC8p1vwvWtl6T73JiExfWs1g',
    'The Young Turks': 'UCuMo0RRtnNDuMB8DV5stEag',
    'USA Today': 'UCP6HGa63sBC7-KHtkme-p-g',
    'TMZ': 'UCVHJesse7CQkYggLKAqBpSg',
    'The Wall Street Journal': 'UCK7tptUDHh-RYDsdxO1-5QQ',
    'The New York Times': 'UCqnbDFdCpuN8CMEg0VuEBqA'
}





In [None]:
def get_youtube_service():
    """Initialize YouTube API service"""
    return build('youtube', 'v3', developerKey=API_KEY)


In [None]:
def get_channel_stats(youtube, channel_id):
    """Get channel statistics"""
    try:
        request = youtube.channels().list(
            part='statistics,snippet,contentDetails',
            id=channel_id
        )
        response = request.execute()
    except Exception as e:
        print(f"API request failed: {e}")
        return None

    items = response.get('items')
    if not items:
        return None

    channel = items[0]
    stats = channel.get('statistics', {})
    snippet = channel.get('snippet', {})

    def cast_int(val):
        try:
            return int(val)
        except (ValueError, TypeError):
            return None

    subscribers = cast_int(stats.get('subscriberCount'))
    total_views = cast_int(stats.get('viewCount'))
    total_videos = cast_int(stats.get('videoCount'))

    return {
        'channel_name': snippet.get('title', 'Unknown'),
        'subscribers': subscribers if subscribers is not None else 'Hidden',
        'total_views': total_views,
        'total_videos': total_videos,
        'published_at': snippet.get('publishedAt', None),
        'description': snippet.get('description', '')[:200]
    }


In [None]:
def save_channel_stats_to_csv(stats_list, filename="channel_stats.csv"):
    """
    Save a list of channel stats dictionaries to a CSV file.
    """
    if not stats_list:
        print("No data to save.")
        return

    # Get CSV column names from keys of first dictionary
    fieldnames = stats_list[0].keys()

    # Write to CSV
    with open(filename, mode="w", newline="", encoding="utf-8") as file:
        writer = csv.DictWriter(file, fieldnames=fieldnames)
        writer.writeheader()
        writer.writerows(stats_list)

    print(f"Saved {len(stats_list)} records to {filename}")

In [None]:
# --- MAIN SCRIPT ---
if __name__ == "__main__":
    youtube = get_youtube_service()
    stats_list = []
    failed_channels = []

    for name, channel_id in CHANNELS.items():
        stats = None
        for attempt in range(3):  # retry up to 3 times
            stats = get_channel_stats(youtube, channel_id)
            if stats:
                break
            print(f"Retrying {name} ({attempt + 1}/3)...")
            time.sleep(2)

        if stats:
            stats["label"] = name  # human-friendly channel name
            stats_list.append(stats)
        else:
            print(f"Failed to fetch data for: {name}")
            failed_channels.append(name)

        time.sleep(1)  # avoid quota issues

    save_channel_stats_to_csv(stats_list, filename="news_channel_stats.csv")

    if failed_channels:
        print("Channels that could not be fetched:", failed_channels)

Retrying TMZ (1/3)...
Retrying TMZ (2/3)...
Retrying TMZ (3/3)...
Failed to fetch data for: TMZ
Saved 9 records to news_channel_stats.csv
Channels that could not be fetched: ['TMZ']


In [None]:
def get_recent_videos(youtube, channel_id, year, month, max_results=100):
    """Get videos from a channel published in a specific month"""
    try:
        # Get uploads playlist ID
        response = youtube.channels().list(
            part='contentDetails',
            id=channel_id
        ).execute()
        items = response.get('items')
        if not items:
            return []

        uploads_playlist = items[0]['contentDetails']['relatedPlaylists']['uploads']

        videos = []
        next_page_token = None

        while len(videos) < max_results:
            playlist_response = youtube.playlistItems().list(
                part='snippet',
                playlistId=uploads_playlist,
                maxResults=min(50, max_results - len(videos)),
                pageToken=next_page_token
            ).execute()

            for item in playlist_response.get('items', []):
                video_id = item['snippet']['resourceId']['videoId']
                published_at = item['snippet']['publishedAt']
                published_dt = datetime.fromisoformat(published_at.replace('Z', '+00:00'))

                # Only include videos from the specified month/year
                if published_dt.year == year and published_dt.month == month:
                    videos.append({
                        'video_id': video_id,
                        'title': item['snippet']['title'],
                        'published_at': published_at
                    })

            next_page_token = playlist_response.get('nextPageToken')
            if not next_page_token:
                break
            time.sleep(0.1)

        # Get detailed statistics for filtered videos
        video_details = []
        for i in range(0, len(videos), 50):
            batch_ids = [v['video_id'] for v in videos[i:i+50]]
            video_response = youtube.videos().list(
                part='statistics,contentDetails,snippet',
                id=','.join(batch_ids)
            ).execute()
            for video in video_response.get('items', []):
                stats = video.get('statistics', {})
                video_details.append({
                    'video_id': video['id'],
                    'title': video['snippet']['title'],
                    'published_at': video['snippet']['publishedAt'],
                    'views': stats.get('viewCount', 0),
                    'likes': stats.get('likeCount', 0),
                    'comments': stats.get('commentCount', 0),
                    'duration': video['contentDetails']['duration']
                })

        return video_details

    except Exception as e:
        print(f"Error fetching videos for channel {channel_id}: {e}")
        return []


In [None]:
def scrape_all_channels(year, month):
    """Scrape videos for all channels in a specific month"""
    youtube = get_youtube_service()
    all_data = []

    for channel_name, channel_id in CHANNELS.items():
        print(f"Scraping {channel_name}...")
        stats = get_channel_stats(youtube, channel_id)
        videos = get_recent_videos(youtube, channel_id, year, month)
        print(f"  Retrieved {len(videos)} videos for {year}-{month:02d}")
        all_data.append({
            'channel': channel_name,
            'stats': stats,
            'videos': videos
        })
        time.sleep(1)

    return all_data

In [None]:
def save_videos_to_csv(data, filename='october_videos.csv'):
    """Save video data to CSV"""
    with open(filename, 'w', newline='', encoding='utf-8') as f:
        writer = csv.writer(f)
        writer.writerow(['Channel', 'Video ID', 'Title', 'Published', 'Views', 'Likes', 'Comments', 'Duration'])
        for channel_data in data:
            channel = channel_data['channel']
            for video in channel_data['videos']:
                writer.writerow([
                    channel,
                    video['video_id'],
                    video['title'],
                    video['published_at'],
                    video['views'],
                    video['likes'],
                    video['comments'],
                    video['duration']
                ])
    print(f"Data saved to {filename}")

In [None]:
# --- MAIN ---
if __name__ == '__main__':
    YEAR = 2025
    MONTH = 10  # October
    data = scrape_all_channels(YEAR, MONTH)
    save_videos_to_csv(data)

Scraping ABC News...
  Retrieved 100 videos for 2025-10
Scraping CNN...
  Retrieved 100 videos for 2025-10
Scraping Fox News...
  Retrieved 100 videos for 2025-10
Scraping NBC News...
  Retrieved 100 videos for 2025-10
Scraping CBS News...
  Retrieved 100 videos for 2025-10
Scraping The Young Turks...
  Retrieved 100 videos for 2025-10
Scraping USA Today...
  Retrieved 100 videos for 2025-10
Scraping TMZ...
  Retrieved 0 videos for 2025-10
Scraping The Wall Street Journal...
  Retrieved 37 videos for 2025-10
Scraping The New York Times...
  Retrieved 81 videos for 2025-10
Data saved to october_videos.csv


In [None]:
YOUTUBE_SERVICE = build('youtube', 'v3', developerKey=API_KEY)

In [None]:
def get_video_comments(video_id):
    """Fetch all comments (including replies) for a single video."""
    comments = []
    next_page_token = None

    while True:
        try:
            response = YOUTUBE_SERVICE.commentThreads().list(
                part='snippet,replies',
                videoId=video_id,
                maxResults=100,
                pageToken=next_page_token,
                textFormat='plainText'
            ).execute()
        except Exception as e:
            print(f"Error fetching comments for video {video_id}: {e}")
            break

        for item in response.get('items', []):
            top_comment = item['snippet']['topLevelComment']['snippet']
            comments.append({
                'video_id': video_id,
                'comment_id': item['snippet']['topLevelComment']['id'],
                'author': top_comment.get('authorDisplayName'),
                'text': top_comment.get('textDisplay'),
                'published_at': top_comment.get('publishedAt'),
                'like_count': top_comment.get('likeCount', 0)
            })

            # Include replies if any
            replies = item.get('replies', {}).get('comments', [])
            for reply in replies:
                reply_snippet = reply['snippet']
                comments.append({
                    'video_id': video_id,
                    'comment_id': reply['id'],
                    'author': reply_snippet.get('authorDisplayName'),
                    'text': reply_snippet.get('textDisplay'),
                    'published_at': reply_snippet.get('publishedAt'),
                    'like_count': reply_snippet.get('likeCount', 0)
                })

        next_page_token = response.get('nextPageToken')
        if not next_page_token:
            break
        time.sleep(0.1)  # avoid hitting quota limits

    return comments

In [None]:
def scrape_comments_from_csv(input_csv='october_videos.csv'):
    """Scrape comments for all video IDs listed in a CSV."""
    video_ids = []
    with open(input_csv, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            video_ids.append(row['Video ID'])

    all_comments = []
    for vid in video_ids:
        print(f"Fetching comments for video {vid}...")
        comments = get_video_comments(vid)
        all_comments.extend(comments)
        print(f"  Retrieved {len(comments)} comments.")
        time.sleep(0.5)  # small delay between videos

    return all_comments

In [None]:
def save_comments_to_csv(comments, output_csv='october_video_comments.csv'):
    """Save comments to CSV."""
    with open(output_csv, 'w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=['video_id', 'comment_id', 'author', 'text', 'published_at', 'like_count'])
        writer.writeheader()
        writer.writerows(comments)
    print(f"Saved {len(comments)} comments to {output_csv}")

In [None]:
# --- MAIN ---
if __name__ == '__main__':
    comments_data = scrape_comments_from_csv('october_videos.csv')
    save_comments_to_csv(comments_data, 'october_video_comments.csv')

Fetching comments for video QWhBHJfVaN0...
  Retrieved 204 comments.
Fetching comments for video pHIbuFQapLc...
  Retrieved 7 comments.
Fetching comments for video fOU8qXMoUaw...
  Retrieved 152 comments.
Fetching comments for video LnxWSjNeC4s...
  Retrieved 254 comments.
Fetching comments for video NbwWV1yA--E...
  Retrieved 1413 comments.
Fetching comments for video Ha8TU_W4UAI...
  Retrieved 58 comments.
Fetching comments for video zXhpJnR5_3k...
  Retrieved 1524 comments.
Fetching comments for video xl1-2aqghsE...
  Retrieved 63 comments.
Fetching comments for video UqPaVXMUFyY...
  Retrieved 963 comments.
Fetching comments for video iV-YUVl_xUU...
  Retrieved 3 comments.
Fetching comments for video n6soopF2Cs8...
  Retrieved 41 comments.
Fetching comments for video qyfZfot-SD8...
  Retrieved 20 comments.
Fetching comments for video NUwTyWGNWeU...
  Retrieved 17 comments.
Fetching comments for video kSTpOWMC10I...
  Retrieved 66 comments.
Fetching comments for video 29tcTJfzgU4...



Fetching comments for video wDoHzU7yFec...
Error fetching comments for video wDoHzU7yFec: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=wDoHzU7yFec&maxResults=100&textFormat=plainText&key=AIzaSyDJhhiLeaDbxO8vQnWR6ROtMw8jrbvzDRo&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">
  Retrieved 0 comments.
Fetching comments for video ZSYhdiN5iRc...
  Retrieved 198 comments.
Fetching comments for video xYIGBs1FSdE...
  Retrieved 825 comments.
Fetching comments for video co7Oh0hHIkw...
  Retrieved 175 comments.
Fetching comments f



Fetching comments for video 9NbD9pPxzY0...
Error fetching comments for video 9NbD9pPxzY0: <HttpError 403 when requesting https://youtube.googleapis.com/youtube/v3/commentThreads?part=snippet%2Creplies&videoId=9NbD9pPxzY0&maxResults=100&textFormat=plainText&key=AIzaSyDJhhiLeaDbxO8vQnWR6ROtMw8jrbvzDRo&alt=json returned "The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.". Details: "[{'message': 'The video identified by the <code><a href="/youtube/v3/docs/commentThreads/list#videoId">videoId</a></code> parameter has disabled comments.', 'domain': 'youtube.commentThread', 'reason': 'commentsDisabled', 'location': 'videoId', 'locationType': 'parameter'}]">
  Retrieved 0 comments.
Fetching comments for video E9_vEi24o4M...
  Retrieved 50 comments.
Fetching comments for video vf1P-ksBW44...
  Retrieved 66 comments.
Fetching comments for video cnSpNpMjEuU...
  Retrieved 5 comments.
Fetching comments for v