In [7]:
import requests
import re
import pandas as pd
import logging

# Set up logging to display status messages with timestamps.
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")

# Your YouTube Data API key and Channel ID for '@amitvarma'
API_KEY = "AIzaSyBgXqdHjowUANqtW6XRSJkzf7kbPD9ur5o"
CHANNEL_ID = "UCs8a-hjf6X4pa-O0orSoC8w"

def get_uploads_playlist_id(channel_id, api_key, session):
    """
    Retrieves the playlist ID of the 'uploads' playlist for the specified channel.
    """
    url = "https://www.googleapis.com/youtube/v3/channels"
    params = {
        "part": "contentDetails",
        "id": channel_id,
        "key": api_key
    }
    try:
        response = session.get(url, params=params)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        logging.error(f"Error retrieving uploads playlist ID: {e}")
        return None

    items = data.get("items")
    if items:
        return items[0]["contentDetails"]["relatedPlaylists"]["uploads"]
    return None

def get_videos_from_playlist(playlist_id, api_key, session):
    """
    Fetches all videos from the specified playlist (typically the 'uploads' playlist of a channel).
    """
    base_url = "https://www.googleapis.com/youtube/v3/playlistItems"
    videos = []
    next_page_token = None

    while True:
        params = {
            "part": "snippet",
            "playlistId": playlist_id,
            "maxResults": 50,
            "pageToken": next_page_token,
            "key": api_key
        }
        try:
            response = session.get(base_url, params=params)
            response.raise_for_status()
            data = response.json()
        except Exception as e:
            logging.error(f"Error retrieving videos: {e}")
            break

        for item in data.get("items", []):
            snippet = item.get("snippet", {})
            resource = snippet.get("resourceId", {})
            video_id = resource.get("videoId")
            if video_id:
                videos.append({
                    "title": snippet.get("title", ""),
                    "video_id": video_id,
                    "link": f"https://www.youtube.com/watch?v={video_id}"
                })

        next_page_token = data.get("nextPageToken")
        if not next_page_token:
            break

    return videos

def get_video_description(video_id, api_key, session):
    """
    Retrieves the description of a specific YouTube video.
    """
    url = "https://www.googleapis.com/youtube/v3/videos"
    params = {
        "part": "snippet",
        "id": video_id,
        "key": api_key
    }
    try:
        response = session.get(url, params=params)
        response.raise_for_status()
        data = response.json()
    except Exception as e:
        logging.error(f"Error retrieving description for video {video_id}: {e}")
        return ""
    
    items = data.get("items")
    if items:
        return items[0]["snippet"].get("description", "")
    return ""

def extract_resources(description):
    """
    Extracts resources and their links from the video description.
    """
    resources = []
    pattern = r"USEFUL RESOURCES:\s*(.*?)\s*(?:\n\n|\Z)"
    match = re.search(pattern, description, re.DOTALL)
    if match:
        resources_section = match.group(1)
        resource_pattern = r"\d+\.\s*(.*?)\s*:\s*(https?://\S+)"
        resources = re.findall(resource_pattern, resources_section)
    return resources

def main():
    with requests.Session() as session:
        logging.info("Retrieving uploads playlist ID...")
        playlist_id = get_uploads_playlist_id(CHANNEL_ID, API_KEY, session)
        if not playlist_id:
            logging.error("Failed to retrieve the uploads playlist ID.")
            return

        logging.info("Fetching videos from the playlist...")
        videos = get_videos_from_playlist(playlist_id, API_KEY, session)
        total_videos = len(videos)
        logging.info(f"Total number of videos found: {total_videos}")

        all_resources = []
        for index, video in enumerate(videos, start=1):
            logging.info(f"Processing video {index}/{total_videos}: {video['title']}")
            description = get_video_description(video["video_id"], API_KEY, session)
            resources = extract_resources(description)
            for title, link in resources:
                all_resources.append({
                    "video_title": video["title"],
                    "video_link": video["link"],
                    "resource_title": title,
                    "resource_link": link
                })

    # Create DataFrames for Excel output
    videos_df = pd.DataFrame(videos)
    resources_df = pd.DataFrame(all_resources)

    # Write DataFrames to an Excel file with multiple sheets.
    output_file = "YouTube_Channel_Data.xlsx"
    with pd.ExcelWriter(output_file, engine="openpyxl") as writer:
        videos_df.to_excel(writer, sheet_name="Video Links", index=False)
        resources_df.to_excel(writer, sheet_name="Resources", index=False)

    logging.info(f"Data has been successfully written to '{output_file}'.")

if __name__ == "__main__":
    main()


2025-02-07 15:25:18,811 - INFO - Retrieving uploads playlist ID...
2025-02-07 15:25:19,115 - INFO - Fetching videos from the playlist...
2025-02-07 15:25:21,553 - INFO - Total number of videos found: 85
2025-02-07 15:25:21,555 - INFO - Processing video 1/85: Underrated, Overrated, Complicated | Episode 85 | Everything is Everything
2025-02-07 15:25:21,670 - INFO - Processing video 2/85: The Age of Hybrid Warfare | Episode 84 | Everything is Everything
2025-02-07 15:25:21,789 - INFO - Processing video 3/85: The Atheism Episode | Episode 83 | Everything is Everything
2025-02-07 15:25:21,900 - INFO - Processing video 4/85: Invisible Infrastructure | Episode 82 | Everything is Everything
2025-02-07 15:25:22,013 - INFO - Processing video 5/85: Life Lessons From Chess in 2024 | Episode 81 | Everything is Everything
2025-02-07 15:25:22,146 - INFO - Processing video 6/85: Going Bankrupt in India | Episode 80 | Everything is Everything
2025-02-07 15:25:22,260 - INFO - Processing video 7/85: MOR

In [6]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hDownloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.1[0m[39;49m -> [0m[32;49m25.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip3 install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
