In [1]:
import pandas as pd
import numpy as np
import os
from datetime import datetime
from openpyxl import load_workbook
from openpyxl.chart import PieChart, Reference, BarChart
from openpyxl.chart.series import DataPoint
from openpyxl.drawing.fill import ColorChoice
import matplotlib.pyplot as plt

In [None]:
def analyze_tv_programs(input_file, output_path):
    """
    Analyze TV programs data and generate required metrics
    """
    try:
        # Read the Excel file
        print("Reading Excel file...")
        df = pd.read_excel(input_file)
        
        # Clean column names (remove any extra spaces)
        df.columns = df.columns.str.strip()
        
        # Display basic info about the dataset
        print(f"Dataset shape: {df.shape}")
        print(f"Columns: {list(df.columns)}")
        
        # Check for missing values in key columns
        print("\nChecking for missing values...")
        key_columns = ['series_name', 'airing_data', 'delivery_date']
        for col in key_columns:
            if col in df.columns:
                missing_count = df[col].isna().sum()
                print(f"{col}: {missing_count} missing values")
        
        return df  # Return for inspection in notebook
    
    except Exception as e:
        print(f"Error during data loading: {str(e)}")
        return None

In [None]:
def analyze_programs(df):
    """Analyze program-level metrics"""
    print("\n=== PROGRAMS ANALYSIS ===")
    
    # Total programs
    total_programs = len(df)
    print(f"Total programs: {total_programs}")
    
    # Delivered programs
    delivered_programs = df['delivery_date'].notna().sum()
    print(f"Delivered programs: {delivered_programs}")
    
    # Remaining programs
    remaining_programs = total_programs - delivered_programs
    print(f"Remaining programs: {remaining_programs}")
    
    return total_programs, delivered_programs, remaining_programs

In [None]:
def extract_channels(airing_data):
    """Extract individual channels from airing_data string"""
    if pd.isna(airing_data):
        return []
    
    # Convert to string and clean
    airing_str = str(airing_data).strip()
    airing_str = ' '.join(airing_str.split())
    
    if not airing_str:
        return []
    
    # Try different delimiters
    delimiters = [',', ';', '|', '/', '\\']
    channels = [airing_str]  # Start with the whole string
    
    for delimiter in delimiters:
        if delimiter in airing_str:
            channels = airing_str.split(delimiter)
            break
    
    # Clean each channel
    cleaned_channels = []
    for channel in channels:
        channel = channel.strip()
        if channel and len(channel) >= 2:
            channel = channel.strip('.,;|/\\()[]{}')
            if channel:
                cleaned_channels.append(channel)
    
    return cleaned_channels

In [None]:
def analyze_channels(df):
    """Analyze channel-level metrics"""
    print("\n=== CHANNELS ANALYSIS ===")
    channel_records = []
    
    print("Processing channels from airing_data...")
    for idx, row in df.iterrows():
        channels = extract_channels(row['airing_data'])
        if not channels:
            continue
            
        for channel in channels:
            channel = channel.strip().upper()
            if len(channel) < 2 or channel.isdigit():
                continue
                
            channel_records.append({
                'channel': channel,
                'series_name': row['series_name'],
                'delivered': pd.notna(row['delivery_date']),
                'delivery_date': row['delivery_date'],
                'original_row': idx
            })
    
    # Create DataFrame
    channels_df = pd.DataFrame(channel_records)
    
    if len(channels_df) == 0:
        print("No valid channels found in airing_data")
        return None, None, None, None, None, None, None
    
    print(f"Total channel-program combinations: {len(channels_df)}")
    
    # Group by channel
    channel_summary = channels_df.groupby('channel').agg({
        'delivered': ['count', 'sum'],
        'series_name': 'nunique'
    }).reset_index()
    
    channel_summary.columns = ['channel', 'total_programs', 'delivered_programs', 'unique_series']
    channel_summary['completion_percentage'] = (
        channel_summary['delivered_programs'] / channel_summary['total_programs'] * 100
    ).round(2)
    
    channel_summary = channel_summary[channel_summary['total_programs'] >= 1]
    
    # Calculate metrics
    total_channels = len(channel_summary)
    completed_channels = (channel_summary['completion_percentage'] == 100.0).sum()
    pending_channels = total_channels - completed_channels
    near_completion = ((channel_summary['completion_percentage'] >= 90.0) & 
                     (channel_summary['completion_percentage'] < 100.0)).sum()
    focus_needed = (channel_summary['completion_percentage'] < 90.0).sum()
    focus_channels = channel_summary[channel_summary['completion_percentage'] < 100.0].nsmallest(20, 'completion_percentage')
    
    print(f"Total unique channels: {total_channels}")
    print(f"Completed channels (100%): {completed_channels}")
    print(f"Pending channels: {pending_channels}")
    print(f"Near completion (≥90%): {near_completion}")
    print(f"Focus needed (<90%): {focus_needed}")
    print(f"Next top channels to focus: {min(20, len(focus_channels))}")
    
    return channels_df, channel_summary, total_channels, completed_channels, pending_channels, near_completion, focus_needed, focus_channels

In [None]:
def compile_results(total_programs, delivered_programs, remaining_programs,
                   total_channels, completed_channels, pending_channels,
                   near_completion, focus_needed, focus_channels):
    """Compile all results into a DataFrame"""
    print("\n=== CREATING OUTPUT ===")
    
    results_data = [
        ['Programs', 'Total', total_programs],
        ['Programs', 'Delivered', delivered_programs],
        ['Programs', 'Remaining', remaining_programs],
        ['Channels', 'Total', total_channels],
        ['Channels', 'Completed', completed_channels],
        ['Channels', 'Pending', pending_channels],
        ['Focus', 'Near-Completion (≥90 %)', near_completion],
        ['Focus', 'Focus (<90 %)', focus_needed],
        ['Focus', 'Next Top Channels to focus', min(20, len(focus_channels) if focus_channels is not None else 0)]
    ]
    
    return pd.DataFrame(results_data, columns=['Section', 'Metric', 'Value'])

In [None]:
def export_to_excel(results_df, channel_summary, focus_channels, df, output_path):
    """Export results to Excel with charts"""
    os.makedirs(output_path, exist_ok=True)
    output_file = os.path.join(output_path, 'analysis_results.xlsx')
    
    # First save with pandas
    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        results_df.to_excel(writer, sheet_name='Summary', index=False)
        
        if channel_summary is not None:
            channel_summary_sorted = channel_summary.sort_values('completion_percentage')
            channel_summary_sorted.to_excel(writer, sheet_name='Channel_Details', index=False)
            
            if focus_channels is not None and len(focus_channels) > 0:
                focus_channels_output = focus_channels[['channel', 'total_programs', 'delivered_programs', 'completion_percentage']].copy()
                focus_channels_output = focus_channels_output.sort_values('completion_percentage')
                focus_channels_output.to_excel(writer, sheet_name='Focus_Channels', index=False)
        
        df.head(100).to_excel(writer, sheet_name='Original_Data_Sample', index=False)
    
    # Add charts
    print("Adding charts to Excel file...")
    try:
        wb = load_workbook(output_file)
        ws = wb['Summary']
        
        # Donut chart data
        chart_data = [
            ['Status', 'Count'],
            ['Remaining', remaining_programs],
            ['Delivered', delivered_programs]
        ]
        
        for row_idx, row_data in enumerate(chart_data, start=1):
            for col_idx, value in enumerate(row_data, start=5):
                ws.cell(row=row_idx, column=col_idx, value=value)
        
        # Create donut chart
        donut_chart = PieChart()
        donut_chart.add_data(Reference(ws, min_col=6, min_row=2, max_row=3))
        donut_chart.set_categories(Reference(ws, min_col=5, min_row=2, max_row=3))
        donut_chart.title = f"{delivered_programs/total_programs*100:.1f}% Delivered"
        donut_chart.height = 10
        donut_chart.width = 15
        donut_chart.holeSize = 50
        ws.add_chart(donut_chart, "H2")
        
        # Save workbook
        wb.save(output_file)
        print("Charts added successfully!")
        
    except Exception as e:
        print(f"Error adding charts: {e}")
    
    return output_file

In [None]:
if __name__ == "__main__":
    # File paths
    input_file = r"C:\Users\Aaryan\Documents\combined\combined.xlsx"
    output_path = r"C:\Users\Aaryan\Documents\combined\output"
    
    # Check if input file exists
    if not os.path.exists(input_file):
        print(f"Error: Input file not found at {input_file}")
    else:
        print(f"Processing file: {input_file}")
        
        # Step 1: Load data
        df = analyze_tv_programs(input_file, output_path)
        
        if df is not None:
            # Step 2: Program analysis
            total_programs, delivered_programs, remaining_programs = analyze_programs(df)
            
            # Step 3: Channel analysis
            (channels_df, channel_summary, total_channels, completed_channels, 
             pending_channels, near_completion, focus_needed, focus_channels) = analyze_channels(df)
            
            # Step 4: Compile results
            results_df = compile_results(
                total_programs, delivered_programs, remaining_programs,
                total_channels, completed_channels, pending_channels,
                near_completion, focus_needed, focus_channels
            )
            
            # Step 5: Export to Excel
            output_file = export_to_excel(
                results_df, channel_summary, focus_channels, df, output_path
            )
            
            print("\n=== FINAL RESULTS ===")
            print(results_df.to_string(index=False))
            print(f"\n✅ Analysis completed! Results saved to: {output_file}")