In [3]:
pip install beautifulsoup4 python-docx
pip install docx

Collecting python-docx
  Downloading python_docx-1.1.2-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.1.2-py3-none-any.whl (244 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m244.3/244.3 kB[0m [31m3.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.1.2


In [4]:
import os
from datetime import datetime, timedelta, timezone
from bs4 import BeautifulSoup
import requests
from docx import Document
from docx.shared import Inches, Pt
import time

# Function to fetch new videos from a YouTube channel without using the API
def fetch_new_videos_scrape(channel_id):
    start_time = time.time()
    url = f"https://www.youtube.com/feeds/videos.xml?channel_id={channel_id}"
    response = requests.get(url)
    if response.status_code != 200:
        print("Failed to fetch video feed.")
        return []

    soup = BeautifulSoup(response.content, 'xml')
    videos = []
    one_week_ago = (datetime.utcnow() - timedelta(days=7)).replace(tzinfo=timezone.utc)  # Make timezone-aware

    channel_title = soup.find('title').text
    channel_logo_url = soup.find('logo').text if soup.find('logo') else None

    for entry in soup.find_all('entry'):
        video_published = datetime.strptime(entry.published.text, "%Y-%m-%dT%H:%M:%S%z")
        if video_published > one_week_ago:
            high_quality_thumbnail = entry.find('media:thumbnail')['url'].replace('hqdefault.jpg', 'maxresdefault.jpg')
            video = {
                'title': entry.title.text,
                'link': entry.link['href'],
                'thumbnail': high_quality_thumbnail,
                'published': video_published.strftime('%Y-%m-%d %H:%M:%S UTC'),
                'description': entry.find('media:description').text if entry.find('media:description') else "No description available."
            }
            videos.append(video)

    elapsed_time = time.time() - start_time
    print(f"Scraping completed in {elapsed_time:.2f} seconds with {len(videos)} videos fetched.")
    return videos, elapsed_time, channel_title, channel_logo_url

# Function to create a Word document with video details
def create_word_doc(videos, elapsed_time, output_path, channel_id, channel_title, channel_logo_url):
    document = Document()

    # Add a sci-fi styled title
    title = document.add_heading(level=1)
    run = title.add_run('YouTube Weekly Video Report')
    run.font.name = 'Orbitron'  # Sci-fi font (use a similar available font if Orbitron isn't installed)
    run.font.size = Pt(24)

    # Channel details
    if channel_logo_url:
        response = requests.get(channel_logo_url)
        if response.status_code == 200:
            logo_path = os.path.join(output_path, "channel_logo.jpg")
            with open(logo_path, 'wb') as f:
                f.write(response.content)
            document.add_picture(logo_path, width=Inches(1.5))
            os.remove(logo_path)

    document.add_heading(channel_title, level=2)
    document.add_paragraph(f"Channel ID: {channel_id}")
    document.add_paragraph(f"Report Generated On: {datetime.utcnow().strftime('%Y-%m-%d %H:%M:%S UTC')}")
    document.add_paragraph(f"Time Taken to Fetch Videos: {elapsed_time:.2f} seconds")

    # Add a separator
    separator = document.add_paragraph()
    separator_run = separator.add_run("=" * 50)
    separator_run.bold = True

    for idx, video in enumerate(videos, 1):
        document.add_heading(f"{idx}. {video['title']}", level=2)
        document.add_paragraph(f"Video Link: {video['link']}")
        document.add_paragraph(f"Published On: {video['published']}")
        document.add_paragraph(f"Description: {video['description']}")

        # Fetch and insert the high-quality thumbnail
        response = requests.get(video['thumbnail'])
        if response.status_code == 200:
            thumbnail_path = os.path.join(output_path, f"temp_thumbnail_{idx}.jpg")
            with open(thumbnail_path, 'wb') as f:
                f.write(response.content)
            document.add_picture(thumbnail_path, width=Inches(3))
            os.remove(thumbnail_path)

        # Add a separator between videos
        separator = document.add_paragraph()
        separator_run = separator.add_run("-" * 50)
        separator_run.bold = True

    # Add logs at the end
    document.add_page_break()
    document.add_heading("Debug Insights", level=1)
    document.add_paragraph(f"Total Videos Fetched: {len(videos)}")
    document.add_paragraph(f"Time Taken: {elapsed_time:.2f} seconds")
    document.add_paragraph(f"Channel: {channel_title}")
    document.add_paragraph(f"Channel ID: {channel_id}")

    # Save the Word document
    doc_path = os.path.join(output_path, 'YouTube_Videos_This_Week.docx')
    document.save(doc_path)
    print(f"Document saved at: {doc_path}")

# Main execution
def main():
    channel_id = 'UCeoSGHaFePbHGtJjn0xUPrw'  # Your Channel ID
    output_path = '.'  # Current directory

    videos, elapsed_time, channel_title, channel_logo_url = fetch_new_videos_scrape(channel_id)
    if videos:
        create_word_doc(videos, elapsed_time, output_path, channel_id, channel_title, channel_logo_url)
    else:
        print("No new videos found for the past week.")

if __name__ == "__main__":
    main()


Scraping completed in 0.18 seconds with 1 videos fetched.
Document saved at: ./YouTube_Videos_This_Week.docx
