In [1]:
from googleapiclient.discovery import build
import pandas as pd
import csv

In [1]:
api_key="Your-api-key-here"

# Get Channel videos Detail

In [3]:
max_videos=None

In [2]:
channel_name = "youtube channel name"

In [5]:
def get_channel_videos(channel_name, api_key, max_videos=None):
    # Initialize the YouTube API client
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    # Search for the channel by name to get the channel ID
    search_response = youtube.search().list(
        q=channel_name,
        type='channel',
        part='id,snippet',
        maxResults=1
    ).execute()
    
    if not search_response['items']:
        print("Channel not found.")
        return
    
    channel_id = search_response['items'][0]['id']['channelId']
    channel_title = search_response['items'][0]['snippet']['title']
    
    # Fetch all videos from the channel
    videos = []
    next_page_token = None
    
    while True:
        # Fetch videos in chunks (max 50 at a time)
        max_results = 50
        if max_videos is not None:
            remaining_videos = max_videos - len(videos)
            max_results = min(50, remaining_videos)  # Fetch only the remaining needed videos
        
        search_response = youtube.search().list(
            channelId=channel_id,
            part='id,snippet',
            order='date',
            maxResults=max_results,
            pageToken=next_page_token
        ).execute()
        
        for item in search_response['items']:
            if item['id']['kind'] == 'youtube#video':
                videos.append(item['id']['videoId'])
        
        next_page_token = search_response.get('nextPageToken')
        if not next_page_token or (max_videos is not None and len(videos) >= max_videos):
            break
    
    # If max_videos is set, slice the list to the maximum number of videos
    if max_videos is not None:
        videos = videos[:max_videos]
    
    # Fetch details and stats for each video
    video_details = []
    
    for i in range(0, len(videos), 50):  # Process in chunks of 50
        video_ids = videos[i:i + 50]
        video_response = youtube.videos().list(
            id=','.join(video_ids),
            part='id,snippet,statistics'
        ).execute()
        
        for video in video_response['items']:
            details = {
                'channel_id': channel_id,
                'channel_name': channel_name,
                'video_id': video['id'],
                'title': video['snippet']['title'],
                'description': video['snippet']['description'],
                'published_at': video['snippet']['publishedAt'],
                'view_count': video['statistics'].get('viewCount'),
                'like_count': video['statistics'].get('likeCount'),
                'dislike_count': video['statistics'].get('dislikeCount'),
                'comment_count': video['statistics'].get('commentCount'),
            }
            video_details.append(details)
    
    # Convert to DataFrame
    df = pd.DataFrame(video_details)
    
    # Generate a safe file name from the channel title
    safe_channel_name = ''.join(e for e in channel_title if e.isalnum() or e in "._- ")
    
    # Save DataFrame to CSV
    csv_file_name = f"{safe_channel_name}_videos.csv"
    df.to_csv(csv_file_name, index=False)
    
    print(f"Video details saved to {csv_file_name}")


In [6]:
get_channel_videos(channel_name, api_key,max_videos)

Video details saved to Aik News Digital_videos.csv


# Get Videos Comments

In [3]:
input_csv = 'yourcsv.csv' #your csv having scrapped video ids and details above scrapped  

In [4]:
def get_video_comments(video_id, api_key):
    # Initialize the YouTube API client
    youtube = build('youtube', 'v3', developerKey=api_key)
    
    # List to store comments and replies
    comments_data = []
    
    # Fetch comment threads (top-level comments)
    next_page_token = None
    
    while True:
        # Fetch comment threads (top-level comments)
        comment_threads_response = youtube.commentThreads().list(
            part='snippet,replies',
            videoId=video_id,
            maxResults=100,  # Maximum number of comments per page (API limit is 100)
            pageToken=next_page_token,
            textFormat='plainText'
        ).execute()
        
        for item in comment_threads_response['items']:
            top_comment = item['snippet']['topLevelComment']['snippet']
            comment = {
                'comment_id': item['snippet']['topLevelComment']['id'],
                'author': top_comment['authorDisplayName'],
                'text': top_comment['textDisplay'],
                'like_count': top_comment['likeCount'],
                'published_at': top_comment['publishedAt'],
                'reply_count': item['snippet']['totalReplyCount']
            }
            comments_data.append(comment)
            
            # Check if the comment has replies
            if item['snippet']['totalReplyCount'] > 0:
                for reply in item.get('replies', {}).get('comments', []):
                    reply_snippet = reply['snippet']
                    reply_comment = {
                        'comment_id': reply['id'],
                        'author': reply_snippet['authorDisplayName'],
                        'text': reply_snippet['textDisplay'],
                        'like_count': reply_snippet['likeCount'],
                        'published_at': reply_snippet['publishedAt'],
                        'reply_to': item['snippet']['topLevelComment']['id']  # Indicates which comment this is a reply to
                    }
                    comments_data.append(reply_comment)
        
        # Check if there is a next page of comments
        next_page_token = comment_threads_response.get('nextPageToken')
        if not next_page_token:
            break
    
    return comments_data

def scrape_comments_from_csv(input_csv, api_key):
    # Open the input CSV file
    with open(input_csv, 'r', newline='', encoding='utf-8') as file:
        reader = csv.DictReader(file)
        
        # Assuming CSV has 'video_id', 'video_title', and 'channel_name' columns
        for row in reader:
            video_id = row['video_id']
            video_title = row['title']
            channel_name = row['channel_name']
            
            # Scrape comments for the video
            comments = get_video_comments(video_id, api_key)
            
            # Prepare the output CSV file name
            output_csv = f"{channel_name}_comments.csv"
            
            # Write comments to the output CSV file
            with open(output_csv, 'a', newline='', encoding='utf-8') as output_file:
                writer = csv.writer(output_file)
                
                # Write the header only if the file is empty
                if output_file.tell() == 0:
                    writer.writerow(['video_id', 'video_title', 'comment_id', 'author', 'text', 'like_count', 'published_at', 'reply_to'])
                
                for comment in comments:
                    writer.writerow([
                        video_id,
                        video_title,
                        comment.get('comment_id'),
                        comment.get('author'),
                        comment.get('text'),
                        comment.get('like_count'),
                        comment.get('published_at'),
                        comment.get('reply_to', '')  # Reply_to is optional
                    ])



In [5]:
scrape_comments_from_csv(input_csv, api_key)