<a href="https://colab.research.google.com/github/Shibhaditya2/YouTube-Scrapper/blob/main/YouTubeSearch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install google-api-python-client
!pip install --upgrade google-api-python-client

from googleapiclient.discovery import build
import csv
import re
from google.colab import userdata

# YouTube Data API key
api_key = userdata.get('youtubeAPI')

youtube = build('youtube', 'v3', developerKey=api_key)

# Define the list of labels and number of results per label
labels = ['crypto', 'bitcoin', 'btc', 'ethereum', 'cryptocurrency']
x = 1000  # Number of results to retrieve for each label
search_max_results = 2000  # Number of max search results

# Function to convert ISO 8601 duration to a readable format
def format_duration(iso_duration):
    match = re.match(r'PT(?:(\d+)H)?(?:(\d+)M)?(?:(\d+)S)?', iso_duration)
    hours = int(match.group(1)) if match.group(1) else 0
    minutes = int(match.group(2)) if match.group(2) else 0
    seconds = int(match.group(3)) if match.group(3) else 0
    if hours > 0:
        return f"{hours}:{minutes:02d}:{seconds:02d}"
    else:
        return f"{minutes}:{seconds:02d}"

# List to store all video data
results = []

# Process each label
for label in labels:
    # Search for videos based on the label
    search_response = youtube.search().list(
        q=label,
        part='id,snippet',
        maxResults=search_max_results,
        type='video',
        order='relevance'
    ).execute()

    # Filter out currently live videos
    non_live_items = [item for item in search_response.get('items', []) if item['snippet']['liveBroadcastContent'] != 'live']

    # Select the top x non-live videos
    selected_items = non_live_items[:x]

    # Extract video IDs
    video_ids = [item['id']['videoId'] for item in selected_items]

    if video_ids:
        # Fetch detailed information for the selected videos
        videos_response = youtube.videos().list(
            part='snippet,contentDetails,statistics',
            id=','.join(video_ids)
        ).execute()

        # Extract and store details for each video
        for video in videos_response.get('items', []):
            video_data = {
                'Label': label,
                'Title': video['snippet']['title'],
                'Link': f"https://www.youtube.com/watch?v={video['id']}",
                'Author': video['snippet']['channelTitle'],
                'Publish Date': video['snippet']['publishedAt'],
                'View Count': video['statistics'].get('viewCount', 'N/A'),
                'Likes': video['statistics'].get('likeCount', 'N/A'),
                'Language': video['snippet'].get('defaultAudioLanguage', 'Unknown'),
                'Duration': format_duration(video['contentDetails']['duration']),
                'Comment Count': video['statistics'].get('commentCount', 'N/A'),
                'Tags': ', '.join(video['snippet'].get('tags', [])) or 'N/A'
            }
            results.append(video_data)
    else:
        print(f"No non-live videos found for label '{label}'")

# Define the CSV file name and fieldnames
csv_filename = 'youtube_search_results.csv'
fieldnames = ['Label', 'Title', 'Link', 'Author', 'Publish Date', 'View Count', 'Likes', 'Language', 'Duration', 'Comment Count', 'Tags']

# Write the results to a CSV file
with open(csv_filename, 'w', newline='', encoding='utf-8') as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(results)

print(f"Results have been written to {csv_filename}")

Results have been written to youtube_search_results.csv
