In [None]:
import re
from getpass import getpass
import os
import requests
import pandas as pd
import datetime
import os
from google.colab import files

In [None]:


def extract_video_id(youtube_url):
    """Extracts the video ID from a YouTube URL."""
    match = re.search(r"v=([a-zA-Z0-9_-]+)", youtube_url)
    return match.group(1) if match else None

# List to store video IDs
video_ids = []

# Loop to allow multiple entries
while True:
    url = input("Enter a YouTube video URL (or type 'done' to finish): ").strip()

    if url.lower() == "done":
        break  # Exit loop when the user types 'done'

    video_id = extract_video_id(url)

    if video_id:
        video_ids.append(video_id)
        print(f"✅ Video ID '{video_id}' added to the list.")
    else:
        print("❌ Invalid URL. Please enter a valid YouTube video URL.")

# Display collected video IDs
print("\nCollected Video IDs:", video_ids)


Enter a YouTube video URL (or type 'done' to finish): https://www.youtube.com/watch?v=V-Fla5hxMRg&ab_channel=CNBCTelevision
✅ Video ID 'V-Fla5hxMRg' added to the list.
Enter a YouTube video URL (or type 'done' to finish): https://www.youtube.com/watch?v=jxZ915PvdbU&t=27s&ab_channel=Firstpost
✅ Video ID 'jxZ915PvdbU' added to the list.
Enter a YouTube video URL (or type 'done' to finish): https://www.youtube.com/watch?v=x52g-BpjGHA&ab_channel=BloombergPodcasts
✅ Video ID 'x52g-BpjGHA' added to the list.
Enter a YouTube video URL (or type 'done' to finish): done

Collected Video IDs: ['V-Fla5hxMRg', 'jxZ915PvdbU', 'x52g-BpjGHA']


In [None]:


# Securely enter API Key (Hidden when typed)
os.environ["YOUTUBE_API_KEY"] = getpass("Enter your YouTube API Key (It will be hidden): ")


Enter your YouTube API Key (It will be hidden): ··········


In [None]:
API_KEY = os.getenv("YOUTUBE_API_KEY")

In [None]:


# List of Video IDs to check
video_ids = ['V-Fla5hxMRg', 'jxZ915PvdbU', 'x52g-BpjGHA']

# API request to check video validity
url = f"https://www.googleapis.com/youtube/v3/videos?key={API_KEY}&id={','.join(video_ids)}&part=id"

response = requests.get(url).json()

# Extract valid video IDs from API response
valid_ids = {item['id'] for item in response.get("items", [])}
invalid_ids = set(video_ids) - valid_ids

# Output results
print(f"✅ Valid Video IDs: {list(valid_ids)}")
print(f"❌ Invalid Video IDs: {list(invalid_ids)}")


✅ Valid Video IDs: ['x52g-BpjGHA', 'V-Fla5hxMRg', 'jxZ915PvdbU']
❌ Invalid Video IDs: []


In [None]:


# ✅ Fetch Video Stats Function
def fetch_video_stats():
    video_data = []
    timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")

    for video_id in video_ids:
        url = f"https://www.googleapis.com/youtube/v3/videos?key={API_KEY}&id={video_id}&part=snippet,statistics"
        response = requests.get(url).json()

        for video in response.get("items", []):
            video_data.append({
                "Timestamp": timestamp,
                "Video ID": video_id,
                "Video Title": video["snippet"]["title"],
                "Channel Name": video["snippet"]["channelTitle"],
                "Views": int(video["statistics"]["viewCount"]),
                "Likes": int(video["statistics"].get("likeCount", 0)),
                "Comment Count": int(video["statistics"].get("commentCount", 0))
            })

    return video_data


In [None]:
# ✅ Save data locally in Colab
def save_data_to_csv(data, filename):
    df = pd.DataFrame(data)
    df.to_csv(filename, index=False)
    print(f"✅ Data saved: {filename}")

In [None]:
csv_filename = "youtube_video_metrics.csv"
video_stats = fetch_video_stats()
save_data_to_csv(video_stats, csv_filename)
# ✅ Provide Download Link
files.download(csv_filename)

✅ Data saved: youtube_video_metrics.csv


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [None]:
import requests
import pandas as pd
import datetime
import os
from google.colab import files  # ✅ For downloading CSV in Colab

# ✅ Define local file path for saving CSV
csv_filename = f"youtube_comments_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')}.csv"

# ✅ List of Video IDs to Track
VIDEO_IDS = ["V-Fla5hxMRg", "jxZ915PvdbU", "x52g-BpjGHA"]  # Replace with actual video IDs

# ✅ Function to Fetch All Top-Level Comments Using Pagination
def fetch_all_comments(video_id):
    comments_data = []
    url = f"https://www.googleapis.com/youtube/v3/commentThreads?key={API_KEY}&videoId={video_id}&part=snippet&maxResults=100"

    while url:
        response = requests.get(url).json()

        # ✅ Extract comments
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]
            comments_data.append({
                "Video ID": video_id,
                "Channel Name": item["snippet"]["channelId"],  # Fetch channel ID (can be mapped later)
                "Comment Author": comment["authorDisplayName"],
                "Comment Text": comment["textDisplay"],
                "Published At": comment["publishedAt"],
                "Like Count": comment["likeCount"]
            })

        # ✅ Check if more comments exist (Pagination)
        url = response.get("nextPageToken")
        if url:
            url = f"https://www.googleapis.com/youtube/v3/commentThreads?key={API_KEY}&videoId={video_id}&part=snippet&maxResults=100&pageToken={url}"
        else:
            break  # No more pages

    return comments_data

# ✅ Fetch and Save Comments for Each Video
all_comments = []
for video_id in VIDEO_IDS:
    all_comments.extend(fetch_all_comments(video_id))

# ✅ Convert to DataFrame and Save as CSV
df = pd.DataFrame(all_comments)
df.to_csv(csv_filename, index=False)

# ✅ Download the CSV file automatically in Colab
files.download(csv_filename)

print(f"✅ All YouTube comments saved and downloading: {csv_filename}")


In [None]:


# ✅ Define local file path for saving CSV
csv_filename = f"youtube_comments_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M')}.csv"
log_filename = "youtube_comments_log.txt"  # Log file to track errors

# ✅ List of Video IDs to Track
VIDEO_IDS = ["V-Fla5hxMRg", "jxZ915PvdbU", "x52g-BpjGHA"]  # Replace with actual video IDs

# ✅ Function to Log Messages (Debugging & API Limit Tracking)
def log_message(message):
    with open(log_filename, "a", encoding="utf-8") as log_file:
        log_file.write(f"{datetime.datetime.now()} - {message}\n")
    print(message)  # Also print message in Colab output

# ✅ Function to Fetch All Top-Level Comments with API Limit Check
def fetch_all_comments(video_id):
    comments_data = []
    url = f"https://www.googleapis.com/youtube/v3/commentThreads?key={API_KEY}&videoId={video_id}&part=snippet&maxResults=100"
    total_comments_fetched = 0  # Track total comments

    while url:
        response = requests.get(url).json()

        # ✅ Check for API errors
        if "error" in response:
            error_message = response["error"]["errors"][0]["reason"]
            log_message(f"❌ API Error for Video {video_id}: {error_message}")
            if error_message == "quotaExceeded":
                log_message("⚠️ API quota limit reached. Stopping script.")
                return comments_data  # Stop fetching
            break

        # ✅ Extract comments
        for item in response.get("items", []):
            comment = item["snippet"]["topLevelComment"]["snippet"]
            comments_data.append({
                "Video ID": video_id,
                "Channel Name": item["snippet"]["channelId"],  # Fetch channel ID (can be mapped later)
                "Comment Author": comment["authorDisplayName"],
                "Comment Text": comment["textDisplay"],
                "Published At": comment["publishedAt"],
                "Like Count": comment["likeCount"]
            })
            total_comments_fetched += 1  # Count comments

        # ✅ Check for more pages (Pagination)
        next_page = response.get("nextPageToken")
        if next_page:
            url = f"https://www.googleapis.com/youtube/v3/commentThreads?key={API_KEY}&videoId={video_id}&part=snippet&maxResults=100&pageToken={next_page}"
        else:
            break  # No more pages

    log_message(f"✅ Successfully fetched {total_comments_fetched} comments for Video {video_id}")
    return comments_data

# ✅ Fetch and Save Comments for Each Video
all_comments = []
for video_id in VIDEO_IDS:
    all_comments.extend(fetch_all_comments(video_id))

# ✅ Convert to DataFrame and Save as CSV
df = pd.DataFrame(all_comments)
df.to_csv(csv_filename, index=False)

# ✅ Download the CSV file automatically in Colab
files.download(csv_filename)

print(f"✅ All YouTube comments saved and downloading: {csv_filename}")
log_message(f"✅ Script completed successfully. Comments saved in {csv_filename}")


✅ Successfully fetched 5444 comments for Video V-Fla5hxMRg
✅ Successfully fetched 1841 comments for Video jxZ915PvdbU
✅ Successfully fetched 341 comments for Video x52g-BpjGHA


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

✅ All YouTube comments saved and downloading: youtube_comments_2025-01-30_17-41.csv
✅ Script completed successfully. Comments saved in youtube_comments_2025-01-30_17-41.csv
