In [12]:
import pandas as pd
from datetime import datetime, timedelta
from googleapiclient.discovery import build
from reportlab.platypus import Paragraph, Spacer, Table, TableStyle, SimpleDocTemplate
from reportlab.lib.pagesizes import letter
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle # <--- FIXED LINE
from reportlab.lib import colors
from reportlab.lib.units import inch # <-- Ensure this is also present
import sys

# ... (The rest of your script follows) ...

# --- CONFIGURATION ---
API_KEY = "AIzaSyAEIcl0bbcERLcIohb8jzAaIL5FnNa0Fdk"  # <<< IMPORTANT: Replace with your actual API key
MAX_VIEWS = 5000
MIN_VIEWS = 0
DAYS_BACK = 7

# --- 1. INITIALIZATION & UTILITIES ---

def get_youtube_client(api_key):
    """Initializes the YouTube API client."""
    return build('youtube', 'v3', developerKey=api_key)

# --- 2. STEP 1: Search and Filter by Date ---

def get_recent_video_ids(youtube, search_query, days_back):
    """Fetches video IDs published in the last 'days_back' days matching the query."""
    
    # Calculate the RFC 3339 formatted date for 'publishedAfter'
    time_cutoff = (datetime.now() - timedelta(days=days_back)).isoformat("T") + "Z"
    
    video_data = []
    next_page_token = None
    
    print(f"Searching for videos in niche '{search_query}' published after: {time_cutoff}...")
    
    # The search.list method costs 100 quota units per call, use judiciously.
    while True:
        request = youtube.search().list(
            part="snippet",
            q=search_query,
            type="video",
            order="date", 
            publishedAfter=time_cutoff,
            maxResults=50, 
            pageToken=next_page_token
        )
        response = request.execute()
        
        for item in response.get("items", []):
            video_data.append({
                'video_id': item['id']['videoId'],
                'channel_id': item['snippet']['channelId'],
                'channel_title': item['snippet']['channelTitle']
            })
        
        next_page_token = response.get("nextPageToken")
        if not next_page_token:
            break
            
    return pd.DataFrame(video_data)

# --- 3. STEP 2: Retrieve Statistics (View Count) ---

def get_video_stats(youtube, video_ids):
    """Fetches video statistics (View Count) for a list of video IDs."""
    
    print(f"Fetching statistics for {len(video_ids)} videos...")
    
    stats_data = []
    # videos.list allows max 50 IDs per request and costs 1 quota unit per call.
    for i in range(0, len(video_ids), 50):
        batch_ids = video_ids[i:i + 50]
        request = youtube.videos().list(
            part="statistics",
            id=",".join(batch_ids)
        )
        response = request.execute()
        
        for item in response.get("items", []):
            stats = item.get('statistics', {})
            stats_data.append({
                'video_id': item['id'],
                'view_count': int(stats.get('viewCount', 0)) # Default to 0 if not present
            })
            
    return pd.DataFrame(stats_data)

# --- 4. STEP 3: Process, Group, and Report ---

# --- 4. STEP 3: Process, Group, and Report ---

# --- 4. STEP 3: Process, Group, and Report (Cont.) ---

from reportlab.lib.units import inch 
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib import colors

# Base URL for YouTube channels
YOUTUBE_CHANNEL_BASE_URL = "https://www.youtube.com/channel/"

def generate_pdf_report(df_final, search_term, pdf_filename='YouTube_Creator_Report.pdf'):
    """Generates a professional PDF report using ReportLab's SimpleDocTemplate (Platypus).
    Includes Average Views and Channel Link.
    """
    
    doc = SimpleDocTemplate(pdf_filename, pagesize=letter)
    styles = getSampleStyleSheet()
    story = []
    
    # Define custom styles
    styles.add(ParagraphStyle(name='ReportTitle', fontSize=16, fontName='Helvetica-Bold', spaceAfter=12))
    styles.add(ParagraphStyle(name='ReportHeader', fontSize=12, fontName='Helvetica'))
    styles.add(ParagraphStyle(name='Note', fontSize=10, fontName='Helvetica-Oblique', textColor=colors.gray))
    styles.add(ParagraphStyle(name='SmallFont', fontSize=7.5, fontName='Helvetica')) # New style for long URL

    # 1. Title and Filters
    story.append(Paragraph("<b>YouTube Low-View Creator Report</b>", styles['ReportTitle']))
    story.append(Paragraph(f"<b>Niche:</b> {search_term}", styles['ReportHeader']))
    story.append(Paragraph(f"<b>Filter:</b> Last {DAYS_BACK} Days | Views: {MIN_VIEWS:,}-{MAX_VIEWS:,}", styles['ReportHeader']))
    story.append(Spacer(1, 0.3 * inch))

    # 2. Limitation Note
    story.append(Paragraph(
        "NOTE on 'Original Creator Name': The YouTube Data API v3 does not expose the creator's real name; the <b>Channel Title</b> has been used as a substitute for both fields.", 
        styles['Note']
    ))
    story.append(Spacer(1, 0.2 * inch))
    
    # 3. Table Data Preparation (Updated for Channel Link)
    data = [
        [Paragraph("<b>Videos/Week</b>", styles['Normal']), 
         Paragraph("<b>Avg. Views (0-5k)</b>", styles['Normal']),
         Paragraph("<b>Channel Name</b>", styles['Normal']),
         Paragraph("<b>Channel Link</b>", styles['Normal'])] # <--- NEW HEADER
    ]
    for index, row in df_final.iterrows():
        avg_views = f"{int(row['average_views']):,}" 
        # Construct the URL using the channel_id
        channel_link = YOUTUBE_CHANNEL_BASE_URL + row['channel_id']
        
        # We need to wrap the potentially long URL in a small font Paragraph
        url_paragraph = Paragraph(channel_link, styles['SmallFont']) 
        
        data.append([
            str(row['videos_this_week']),
            avg_views,
            row['channel_title'], 
            url_paragraph # <--- NEW DATA FIELD (Using Paragraph for small font)
        ])

    # 4. Table Setup 
    # Adjusted colWidths for the new column and better distribution
    table = Table(data, colWidths=[1.1*inch, 1.1*inch, 2.0*inch, 3.5*inch]) 
    
    # 5. Table Style
    table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), colors.HexColor('#2E86C1')),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('ALIGN', (1, 1), (1, -1), 'RIGHT'), # Align Avg Views to the right
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('GRID', (0, 0), (-1, -1), 0.5, colors.grey),
        ('FONTSIZE', (0, 1), (-1, -1), 10),
        # Ensure the cell padding is set to handle the small font Paragraph height
        ('LEFTPADDING', (3, 1), (3, -1), 2), 
        ('RIGHTPADDING', (3, 1), (3, -1), 2),
    ]))
    
    story.append(table)

    # 6. Build the PDF
    try:
        doc.build(story)
        print(f"\nSUCCESS: PDF report generated: {pdf_filename}")
    except Exception as e:
        print(f"ERROR generating PDF: {e}")
        
# ... (The rest of the script remains unchanged) ...

# --- 4. STEP 3: Process, Group, and Report ---

# ... (Keep the generate_pdf_report function updated as in the previous fix) ...

# Base URL for YouTube channels (Ensure this constant is defined near the top with your other constants)
YOUTUBE_CHANNEL_BASE_URL = "https://www.youtube.com/channel/"

# --- 4. STEP 3: Process, Group, and Report ---

# ... (generate_pdf_report function here) ...

def process_and_report(df_videos, df_stats, max_views, min_views, search_term):
    """Joins data, filters, groups, sorts, and generates PDF with Average Views and Channel Link."""
    
    # Merge video details with statistics
    df_combined = df_videos.merge(df_stats, on='video_id')
    
    # Filter by view count (0-5000)
    df_filtered = df_combined[
        (df_combined['view_count'] >= min_views) & 
        (df_combined['view_count'] <= max_views)
    ].copy() 
    
    if df_filtered.empty:
        print("No videos found that meet the view count criteria.")
        return pd.DataFrame()

    # Group by channel ID to count videos and calculate average views 
    df_grouped = df_filtered.groupby('channel_id').agg(
        channel_title=('channel_title', 'first'),
        videos_this_week=('video_id', 'count'),
        average_views=('view_count', 'mean')
    ).reset_index()
    
    # NEW STEP: Calculate the channel link
    df_grouped['channel_link'] = YOUTUBE_CHANNEL_BASE_URL + df_grouped['channel_id']
    
    # Sort by videos posted this week (descending)
    df_final = df_grouped.sort_values(by='videos_this_week', ascending=False)
    
    print(f"Found {len(df_final)} creators meeting all criteria.")
    
    # Generate the PDF
    generate_pdf_report(df_final, search_term)
    
    # Return the data including the new column, ordered for display
    return df_final[['channel_title', 'videos_this_week', 'average_views', 'channel_link']]
# --- MAIN EXECUTION ---
if __name__ == "__main__":
    if API_KEY == "YOUR_YOUTUBE_API_KEY":
        print("ERROR: Please replace 'YOUR_YOUTUBE_API_KEY' with your actual API key in the script.")
        sys.exit(1)

    # Get Niche Type from User
    niche_type = input("Enter the niche/topic to search for (e.g., 'baking tutorial', 'indie game review'): ")
    if not niche_type:
        print("Niche cannot be empty. Exiting.")
        sys.exit(1)
        
    try:
        youtube_client = get_youtube_client(API_KEY)
        
        # Step 1: Search videos in the niche published in the last 7 days
        recent_videos_df = get_recent_video_ids(youtube_client, niche_type, DAYS_BACK)
        
        if not recent_videos_df.empty:
            video_ids_list = recent_videos_df['video_id'].unique().tolist()
            
            # Step 2: Get video statistics
            video_stats_df = get_video_stats(youtube_client, video_ids_list)
            
            # Step 3: Process, filter, group, and generate report
            final_report_df = process_and_report(recent_videos_df, video_stats_df, MAX_VIEWS, MIN_VIEWS, niche_type)
            
            if not final_report_df.empty:
                print("\n--- Top Creators Found ---")
                print(final_report_df.to_string(index=False))
            
        else:
            print("No videos found in the last 7 days matching the niche query.")
            
    except Exception as e:
        print(f"\nAn API or processing error occurred: {e}")
        print("Ensure your API key is correct and you have not exceeded your daily 10,000 quota units.")





Enter the niche/topic to search for (e.g., 'baking tutorial', 'indie game review'):  cooking


Searching for videos in niche 'cooking' published after: 2025-12-03T15:15:57.067023Z...
Fetching statistics for 125 videos...
Found 61 creators meeting all criteria.

SUCCESS: PDF report generated: YouTube_Creator_Report.pdf

--- Top Creators Found ---
                  channel_title  videos_this_week  average_views                                             channel_link
Sanju's Gardening and Lifestyle                 2         1340.5 https://www.youtube.com/channel/UCRGYKGGh9fljSZRcpIqCwbQ
            NavaNavi's kitchen                  1         1180.0 https://www.youtube.com/channel/UC1H2be7eriUsIFgbKDLIVPA
          pratidiner ranna ghar                 1         1290.0 https://www.youtube.com/channel/UC1PQY9rUwqhZTmXsQJG4Mrw
                    All in one                  1          446.0 https://www.youtube.com/channel/UC1h3-xEqCaG9On9IixG-0Zw
                    Afs Channel                 1         1836.0 https://www.youtube.com/channel/UC-pDseraLNxhVf1DNJN6EIA
               