In [12]:
# Cell 1: Setup and Test Subject Selection for Individual Sleep Reports
# notebooks/04_sleep_report.ipynb

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
from datetime import datetime
import json

# Report generation libraries
from reportlab.lib.pagesizes import letter, A4
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak, Table, TableStyle
from reportlab.lib.styles import getSampleStyleSheet, ParagraphStyle
from reportlab.lib.units import inch
from reportlab.lib import colors
from reportlab.lib.enums import TA_CENTER, TA_LEFT, TA_RIGHT, TA_JUSTIFY

warnings.filterwarnings('ignore')

print("🚀 Individual Sleep Report Generation System")
print("=" * 60)

# Setup paths
PROJECT_ROOT = Path.cwd()
DATA_PATH = PROJECT_ROOT / "datasets"
RESULTS_PATH = PROJECT_ROOT / "results" / "sleep-edf"
REPORTS_PATH = RESULTS_PATH / "individual_reports"
VISUALIZATIONS_PATH = RESULTS_PATH / "visualizations"

# Create directories
REPORTS_PATH.mkdir(parents=True, exist_ok=True)
print(f"📁 Reports directory: {REPORTS_PATH}")

# Load the complete dataset
print("\n📊 Loading Dataset...")
df = pd.read_csv(
    '../results/sleep-edf/features/combined_sleep_edf_features_with_meta.csv')
print(f"✅ Dataset loaded: {df.shape[0]} recordings × {df.shape[1]} features")

# Verify unique subjects
unique_subjects = df.drop_duplicates(["study_type", "subject_id"])
n_unique_people = unique_subjects.shape[0]
print(f"👥 Unique subjects: {n_unique_people}")

# Breakdown by study type
sc_subjects = df[df['study_type'] == 'SC'].drop_duplicates(
    ['study_type', 'subject_id'])
st_subjects = df[df['study_type'] == 'ST'].drop_duplicates(
    ['study_type', 'subject_id'])

print(f"📈 Study breakdown:")
print(f"    └─ SC Study: {len(sc_subjects)} subjects")
print(f"    └─ ST Study: {len(st_subjects)} subjects")

# Analyze recording patterns
print(f"\n🔍 Recording Patterns Analysis:")
subject_nights = df.groupby(['study_type', 'subject_id']).size()
print(f"    └─ Subjects with 1 night: {(subject_nights == 1).sum()}")
print(f"    └─ Subjects with 2 nights: {(subject_nights == 2).sum()}")
print(f"    └─ Subjects with >2 nights: {(subject_nights > 2).sum()}")

# Select test subjects for initial development
print(f"\n🎯 Selecting Test Subjects for Report Development...")

# Find SC subject with multiple nights (if any)
sc_multiple_nights = df[df['study_type'] == 'SC'].groupby('subject_id').size()
sc_multi_subjects = sc_multiple_nights[sc_multiple_nights > 1]

if len(sc_multi_subjects) > 0:
    test_sc_subject = sc_multi_subjects.index[0]
    print(
        f"    └─ SC Test Subject: {test_sc_subject} ({sc_multi_subjects.iloc[0]} nights)")
else:
    # Fallback to single night SC subject
    test_sc_subject = sc_subjects.iloc[0]['subject_id']
    print(
        f"    └─ SC Test Subject: {test_sc_subject} (1 night - single recording)")

# Find ST subject with multiple nights (should all have 2 nights)
st_multiple_nights = df[df['study_type'] == 'ST'].groupby('subject_id').size()
st_multi_subjects = st_multiple_nights[st_multiple_nights > 1]

if len(st_multi_subjects) > 0:
    test_st_subject = st_multi_subjects.index[0]
    print(
        f"    └─ ST Test Subject: {test_st_subject} ({st_multi_subjects.iloc[0]} nights)")
else:
    # Fallback to any ST subject
    test_st_subject = st_subjects.iloc[0]['subject_id']
    print(f"    └─ ST Test Subject: {test_st_subject} (fallback)")

# Extract test subject data
print(f"\n📋 Test Subject Data Extraction...")

# SC Test Subject Data
sc_test_data = df[(df['study_type'] == 'SC') & (
    df['subject_id'] == test_sc_subject)]
print(f"    └─ SC Subject {test_sc_subject}:")
print(f"        ├─ Recordings: {len(sc_test_data)}")
print(f"        ├─ Age: {sc_test_data.iloc[0]['age']}")
print(f"        ├─ Sex: {sc_test_data.iloc[0]['sex']}")
print(f"        └─ Nights: {sc_test_data['night'].tolist()}")

# ST Test Subject Data
st_test_data = df[(df['study_type'] == 'ST') & (
    df['subject_id'] == test_st_subject)]
print(f"    └─ ST Subject {test_st_subject}:")
print(f"        ├─ Recordings: {len(st_test_data)}")
print(f"        ├─ Age: {st_test_data.iloc[0]['age']}")
print(f"        ├─ Sex: {st_test_data.iloc[0]['sex']}")
print(f"        ├─ Nights: {st_test_data['night'].tolist()}")
print(f"        └─ Conditions: {st_test_data['condition'].tolist()}")

# Store test subject information for next cells
test_subjects_info = {
    'sc_subject': {
        'subject_id': test_sc_subject,
        'data': sc_test_data,
        'n_nights': len(sc_test_data)
    },
    'st_subject': {
        'subject_id': test_st_subject,
        'data': st_test_data,
        'n_nights': len(st_test_data)
    }
}

print(f"\n✅ Cell 1 Complete - Ready for Report Structure Design")
print(f"📋 Next: Define report template and layout structure")

🚀 Individual Sleep Report Generation System
📁 Reports directory: /Users/rishabh/Documents/BDA-course/HDA3/notebooks/results/sleep-edf/individual_reports

📊 Loading Dataset...
✅ Dataset loaded: 197 recordings × 57 features
👥 Unique subjects: 100
📈 Study breakdown:
    └─ SC Study: 78 subjects
    └─ ST Study: 22 subjects

🔍 Recording Patterns Analysis:
    └─ Subjects with 1 night: 3
    └─ Subjects with 2 nights: 97
    └─ Subjects with >2 nights: 0

🎯 Selecting Test Subjects for Report Development...
    └─ SC Test Subject: 0 (2 nights)
    └─ ST Test Subject: 1 (2 nights)

📋 Test Subject Data Extraction...
    └─ SC Subject 0:
        ├─ Recordings: 2
        ├─ Age: 33
        ├─ Sex: F
        └─ Nights: [2, 1]
    └─ ST Subject 1:
        ├─ Recordings: 2
        ├─ Age: 60
        ├─ Sex: M
        ├─ Nights: [2, 1]
        └─ Conditions: ['temazepam', 'placebo']

✅ Cell 1 Complete - Ready for Report Structure Design
📋 Next: Define report template and layout structure


In [13]:
# Cell 2: Report Template Design and Structure
# Professional Individual Sleep Analysis Report Template

from reportlab.lib.colors import HexColor
from reportlab.graphics.shapes import Drawing, Rect, String
from reportlab.graphics.charts.barcharts import VerticalBarChart
from reportlab.graphics.charts.linecharts import HorizontalLineChart
from io import BytesIO
import matplotlib.pyplot as plt

print("📋 Designing Professional Sleep Report Template")
print("=" * 55)

# Define professional color scheme
COLORS = {
    'primary': HexColor('#2E4057'),      # Dark blue-gray
    'secondary': HexColor('#048A81'),     # Teal
    'accent': HexColor('#54C6EB'),       # Light blue
    'success': HexColor('#16A085'),      # Green
    'warning': HexColor('#F39C12'),      # Orange
    'danger': HexColor('#E74C3C'),       # Red
    'light_gray': HexColor('#ECF0F1'),   # Very light gray
    'dark_gray': HexColor('#7F8C8D'),    # Medium gray
    'text': HexColor('#2C3E50'),         # Dark text
    'bg_light': HexColor('#F8F9FA')      # Background light
}

# Define report styles


def create_report_styles():
    """Create custom styles for the sleep report"""

    styles = getSampleStyleSheet()

    # Title style
    styles.add(ParagraphStyle(
        name='ReportTitle',
        parent=styles['Title'],
        fontSize=24,
        fontName='Helvetica-Bold',
        textColor=COLORS['primary'],
        alignment=TA_CENTER,
        spaceAfter=24
    ))

    # Subtitle style
    styles.add(ParagraphStyle(
        name='ReportSubtitle',
        parent=styles['Heading1'],
        fontSize=16,
        fontName='Helvetica',
        textColor=COLORS['secondary'],
        alignment=TA_CENTER,
        spaceAfter=18
    ))

    # Section header style
    styles.add(ParagraphStyle(
        name='SectionHeader',
        parent=styles['Heading2'],
        fontSize=14,
        fontName='Helvetica-Bold',
        textColor=COLORS['primary'],
        alignment=TA_LEFT,
        spaceBefore=20,
        spaceAfter=10,
        borderWidth=0,
        borderColor=COLORS['primary']
    ))

    # Subsection header style
    styles.add(ParagraphStyle(
        name='SubsectionHeader',
        parent=styles['Heading3'],
        fontSize=12,
        fontName='Helvetica-Bold',
        textColor=COLORS['secondary'],
        alignment=TA_LEFT,
        spaceBefore=12,
        spaceAfter=6
    ))

    # Body text style
    styles.add(ParagraphStyle(
        name='ReportBody',
        parent=styles['Normal'],
        fontSize=11,
        fontName='Helvetica',
        textColor=COLORS['text'],
        alignment=TA_JUSTIFY,
        spaceBefore=6,
        spaceAfter=6,
        leftIndent=0,
        rightIndent=0
    ))

    # Metrics style (for key values)
    styles.add(ParagraphStyle(
        name='MetricValue',
        parent=styles['Normal'],
        fontSize=16,
        fontName='Helvetica-Bold',
        textColor=COLORS['primary'],
        alignment=TA_CENTER,
        spaceBefore=6,
        spaceAfter=6
    ))

    # Clinical interpretation style
    styles.add(ParagraphStyle(
        name='ClinicalNote',
        parent=styles['Normal'],
        fontSize=10,
        fontName='Helvetica-Oblique',
        textColor=COLORS['dark_gray'],
        alignment=TA_JUSTIFY,
        spaceBefore=4,
        spaceAfter=8,
        leftIndent=20,
        rightIndent=20
    ))

    # Footer style
    styles.add(ParagraphStyle(
        name='Footer',
        parent=styles['Normal'],
        fontSize=9,
        fontName='Helvetica',
        textColor=COLORS['dark_gray'],
        alignment=TA_CENTER,
        spaceBefore=6,
        spaceAfter=6
    ))

    return styles

# Define report structure template


def define_report_template():
    """Define the structure of individual sleep reports"""

    template = {
        'header': {
            'title': 'Individual Sleep Analysis Report',
            'subtitle': 'Comprehensive Polysomnographic Assessment',
            'date': datetime.now().strftime('%B %d, %Y'),
            'logo_space': True
        },

        'subject_info': {
            'title': 'Subject Information',
            'fields': ['subject_id', 'age', 'sex', 'study_type', 'n_nights', 'conditions']
        },

        'executive_summary': {
            'title': 'Executive Summary',
            'key_metrics': [
                'sleep_efficiency', 'sleep_latency_min', 'rem_latency_min',
                'REM_percentage', 'waso_min'
            ],
            'interpretation': 'overall_assessment'
        },

        'sleep_architecture': {
            'title': 'Sleep Architecture Analysis',
            'subsections': [
                'Sleep Efficiency & Quality',
                'Sleep Stage Distribution',
                'Sleep Latencies',
                'Night-by-Night Comparison (if multiple nights)'
            ]
        },

        'neurophysiological_analysis': {
            'title': 'Neurophysiological Analysis',
            'subsections': [
                'EEG Power Spectral Analysis',
                'Brain Wave Patterns',
                'Frequency Band Analysis'
            ]
        },

        'signal_quality': {
            'title': 'Signal Quality Assessment',
            'subsections': [
                'EMG Activity Patterns',
                'EOG Movement Analysis',
                'Technical Quality Metrics'
            ]
        },

        'comparative_analysis': {
            'title': 'Comparative Analysis',
            'subsections': [
                'Population Comparison',
                'Age-Matched Norms',
                'Study Group Classification'
            ]
        },

        'clinical_interpretation': {
            'title': 'Clinical Interpretation',
            'subsections': [
                'Sleep Health Assessment',
                'Risk Factors',
                'Recommendations'
            ]
        },

        'appendix': {
            'title': 'Technical Appendix',
            'subsections': [
                'Methodology',
                'Data Quality',
                'Feature Definitions'
            ]
        }
    }

    return template

# Create helper functions for report sections


def create_subject_demographics_table(subject_data):
    """Create demographics table for subject info section"""

    # Get subject info (use first row if multiple nights)
    subject_info = subject_data.iloc[0]

    # Create demographics data
    demo_data = [
        ['Subject ID', str(subject_info['subject_id'])],
        ['Age', f"{subject_info['age']} years"],
        ['Sex', subject_info['sex']],
        ['Study Type', 'Healthy Controls' if subject_info['study_type']
            == 'SC' else 'Sleep Difficulty'],
        ['Number of Nights', str(len(subject_data))],
        ['Recording Dates', 'Multiple nights' if len(
            subject_data) > 1 else 'Single night']
    ]

    # Add conditions for ST subjects
    if subject_info['study_type'] == 'ST' and len(subject_data) > 1:
        conditions = subject_data['condition'].tolist()
        demo_data.append(['Study Conditions', ', '.join(conditions)])

    return demo_data


def calculate_key_metrics(subject_data):
    """Calculate key sleep metrics for executive summary"""

    if len(subject_data) == 1:
        # Single night
        metrics = subject_data.iloc[0]
        return {
            'sleep_efficiency': f"{metrics['sleep_efficiency']:.1f}%",
            'sleep_latency': f"{metrics['sleep_latency_min']:.1f} min",
            'rem_latency': f"{metrics['rem_latency_min']:.1f} min",
            'rem_percentage': f"{metrics['REM_percentage']:.1f}%",
            'waso': f"{metrics['waso_min']:.1f} min",
            'nights_analyzed': 1
        }
    else:
        # Multiple nights - calculate averages
        avg_metrics = subject_data.mean(numeric_only=True)
        return {
            'sleep_efficiency': f"{avg_metrics['sleep_efficiency']:.1f}%",
            'sleep_latency': f"{avg_metrics['sleep_latency_min']:.1f} min",
            'rem_latency': f"{avg_metrics['rem_latency_min']:.1f} min",
            'rem_percentage': f"{avg_metrics['REM_percentage']:.1f}%",
            'waso': f"{avg_metrics['waso_min']:.1f} min",
            'nights_analyzed': len(subject_data)
        }


# Initialize report configuration
REPORT_CONFIG = {
    'page_size': A4,
    'margins': {
        'top': 72,    # 1 inch
        'bottom': 72,
        'left': 72,
        'right': 72
    },
    'styles': create_report_styles(),
    'template': define_report_template(),
    'colors': COLORS
}

print("✅ Report Template Design Complete")
print(f"📊 Template Structure:")
print(f"    ├─ Header & Subject Info")
print(f"    ├─ Executive Summary")
print(f"    ├─ Sleep Architecture Analysis")
print(f"    ├─ Neurophysiological Analysis")
print(f"    ├─ Signal Quality Assessment")
print(f"    ├─ Comparative Analysis")
print(f"    ├─ Clinical Interpretation")
print(f"    └─ Technical Appendix")

print(f"\n🎨 Professional Styling:")
print(f"    ├─ Color Scheme: Clinical blue/teal theme")
print(f"    ├─ Typography: Helvetica family")
print(f"    ├─ Layout: A4 with 1-inch margins")
print(f"    └─ Sections: 8 main sections with subsections")

print(f"\n📋 Report Features:")
print(f"    ├─ Demographics table")
print(f"    ├─ Key metrics calculation")
print(f"    ├─ Multi-night handling")
print(f"    ├─ Comparative analysis")
print(f"    └─ Clinical interpretations")

print(f"\n✅ Cell 2 Complete - Ready for Visualization Functions")
print(f"📊 Next: Create visualization functions for charts and graphs")

📋 Designing Professional Sleep Report Template
✅ Report Template Design Complete
📊 Template Structure:
    ├─ Header & Subject Info
    ├─ Executive Summary
    ├─ Sleep Architecture Analysis
    ├─ Neurophysiological Analysis
    ├─ Signal Quality Assessment
    ├─ Comparative Analysis
    ├─ Clinical Interpretation
    └─ Technical Appendix

🎨 Professional Styling:
    ├─ Color Scheme: Clinical blue/teal theme
    ├─ Typography: Helvetica family
    ├─ Layout: A4 with 1-inch margins
    └─ Sections: 8 main sections with subsections

📋 Report Features:
    ├─ Demographics table
    ├─ Key metrics calculation
    ├─ Multi-night handling
    ├─ Comparative analysis
    └─ Clinical interpretations

✅ Cell 2 Complete - Ready for Visualization Functions
📊 Next: Create visualization functions for charts and graphs


In [14]:
# Cell 3: Fixed Visualization Functions with Data Sanitization
# Create robust charts and graphs for individual sleep reports

import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.patches import Rectangle
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
import io
import numpy as np

print("📊 Creating Fixed Visualization Functions (Data Sanitization)")
print("=" * 65)

# Set matplotlib backend to Agg to prevent display issues
matplotlib.use('Agg')

# Set consistent matplotlib parameters
plt.rcParams.update({
    'figure.max_open_warning': 50,
    'figure.figsize': [12, 10],
    'figure.dpi': 100,
    'savefig.dpi': 300,
    'font.size': 10,
    'axes.titlesize': 12,
    'axes.labelsize': 10,
    'xtick.labelsize': 9,
    'ytick.labelsize': 9
})

# Define consistent color scheme
VIZ_COLORS = {
    'primary': '#2E4057',
    'secondary': '#048A81',
    'accent': '#54C6EB',
    'success': '#16A085',
    'warning': '#F39C12',
    'danger': '#E74C3C',
    'healthy': '#27AE60',
    'impaired': '#E74C3C',
    'neutral': '#95A5A6'
}


def sanitize_data(data, min_val=1e-10, max_val=1e6):
    """Sanitize data for plotting by removing inf, nan, and extreme values"""

    if isinstance(data, (list, np.ndarray, pd.Series)):
        # Convert to numpy array for easier processing
        data_array = np.array(data, dtype=float)

        # Replace inf and -inf with NaN
        data_array = np.where(np.isinf(data_array), np.nan, data_array)

        # Replace values outside reasonable range
        data_array = np.where(data_array < min_val, min_val, data_array)
        data_array = np.where(data_array > max_val, max_val, data_array)

        # Replace remaining NaN with min_val
        data_array = np.where(np.isnan(data_array), min_val, data_array)

        return data_array.tolist()

    elif isinstance(data, (int, float)):
        if np.isinf(data) or np.isnan(data):
            return min_val
        elif data < min_val:
            return min_val
        elif data > max_val:
            return max_val
        else:
            return data

    return data


def create_sleep_architecture_chart(subject_data, save_path=None):
    """Create sleep architecture visualization with data sanitization"""

    try:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('Sleep Architecture Analysis', fontsize=16,
                     fontweight='bold', color=VIZ_COLORS['primary'])

        # Sleep stages data
        sleep_stages = ['Wake_percentage', 'N1_percentage', 'N2_percentage',
                        'N3_percentage', 'N4_percentage', 'REM_percentage']
        stage_labels = [
            'Wake', 'N1 (Light)', 'N2 (Light)', 'N3 (Deep)', 'N4 (Deep)', 'REM']

        if len(subject_data) == 1:
            # Single night analysis
            night_data = subject_data.iloc[0]

            # Sanitize sleep stage values
            stage_values = []
            for stage in sleep_stages:
                val = night_data.get(stage, 0)
                val = sanitize_data(val, min_val=0, max_val=100)
                stage_values.append(val)

            # Ensure values sum to reasonable total
            total_stages = sum(stage_values)
            if total_stages > 100:
                stage_values = [v * 100 / total_stages for v in stage_values]

            colors_stages = ['#FF6B6B', '#4ECDC4',
                             '#45B7D1', '#96CEB4', '#FFEAA7', '#DDA0DD']

            # Pie chart with error handling
            try:
                axes[0, 0].pie(stage_values, labels=stage_labels,
                               colors=colors_stages, autopct='%1.1f%%', startangle=90)
                axes[0, 0].set_title(
                    'Sleep Stage Distribution', fontweight='bold')
            except:
                axes[0, 0].text(0.5, 0.5, 'Sleep Stages\nData Unavailable',
                                ha='center', va='center', transform=axes[0, 0].transAxes)
                axes[0, 0].set_title(
                    'Sleep Stage Distribution', fontweight='bold')

            # Sleep efficiency with bounds checking
            efficiency = sanitize_data(night_data.get(
                'sleep_efficiency', 0), min_val=0, max_val=100)
            axes[0, 1].bar(['Sleep Efficiency'], [
                           efficiency], color=VIZ_COLORS['success'] if efficiency >= 85 else VIZ_COLORS['warning'])
            axes[0, 1].set_ylim(0, 100)
            axes[0, 1].set_ylabel('Percentage (%)')
            axes[0, 1].set_title('Sleep Efficiency', fontweight='bold')
            axes[0, 1].axhline(y=85, color='red', linestyle='--',
                               alpha=0.7, label='Normal threshold (85%)')
            axes[0, 1].legend()

            # Sleep latencies with sanitization
            sleep_lat = sanitize_data(night_data.get(
                'sleep_latency_min', 0), min_val=0, max_val=300)
            rem_lat = sanitize_data(night_data.get(
                'rem_latency_min', 0), min_val=0, max_val=300)
            latencies = [sleep_lat, rem_lat]
            latency_labels = ['Sleep Onset', 'REM Onset']

            bars = axes[1, 0].bar(latency_labels, latencies, color=[
                                  VIZ_COLORS['accent'], VIZ_COLORS['secondary']])
            axes[1, 0].set_ylabel('Minutes')
            axes[1, 0].set_title('Sleep Latencies', fontweight='bold')
            axes[1, 0].set_ylim(0, max(latencies) *
                                1.2 if max(latencies) > 0 else 60)

            # Add value labels
            for bar, value in zip(bars, latencies):
                axes[1, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
                                f'{value:.1f}', ha='center', va='bottom', fontweight='bold')

            # Quality metrics
            waso = sanitize_data(night_data.get(
                'waso_min', 0), min_val=0, max_val=300)
            total_sleep_min = sum([
                night_data.get('REM_duration_min', 0),
                night_data.get('N1_duration_min', 0),
                night_data.get('N2_duration_min', 0),
                night_data.get('N3_duration_min', 0),
                night_data.get('N4_duration_min', 0)
            ])
            total_sleep_hr = sanitize_data(
                total_sleep_min / 60, min_val=0, max_val=12)

            quality_metrics = [waso, total_sleep_hr]
            quality_labels = ['WASO (min)', 'Total Sleep (hrs)']

            bars = axes[1, 1].bar(quality_labels, quality_metrics, color=[
                                  VIZ_COLORS['danger'], VIZ_COLORS['success']])
            axes[1, 1].set_title('Sleep Quality Metrics', fontweight='bold')
            axes[1, 1].set_ylim(0, max(quality_metrics) *
                                1.2 if max(quality_metrics) > 0 else 10)

            # Add value labels
            for bar, value in zip(bars, quality_metrics):
                axes[1, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                                f'{value:.1f}', ha='center', va='bottom', fontweight='bold')

        else:
            # Multiple nights comparison
            nights = list(range(1, len(subject_data) + 1))

            # Sleep efficiency across nights
            efficiencies = [sanitize_data(eff, min_val=0, max_val=100)
                            for eff in subject_data['sleep_efficiency'].values]
            axes[0, 0].plot(nights, efficiencies, marker='o',
                            linewidth=2, markersize=8, color=VIZ_COLORS['primary'])
            axes[0, 0].set_xlabel('Night')
            axes[0, 0].set_ylabel('Sleep Efficiency (%)')
            axes[0, 0].set_title(
                'Sleep Efficiency Across Nights', fontweight='bold')
            axes[0, 0].set_ylim(0, 100)
            axes[0, 0].axhline(y=85, color='red', linestyle='--',
                               alpha=0.7, label='Normal threshold')
            axes[0, 0].legend()
            axes[0, 0].grid(True, alpha=0.3)

            # REM percentage comparison
            rem_percentages = [sanitize_data(
                rem, min_val=0, max_val=50) for rem in subject_data['REM_percentage'].values]
            axes[0, 1].bar(nights, rem_percentages,
                           color=VIZ_COLORS['accent'], alpha=0.7)
            axes[0, 1].set_xlabel('Night')
            axes[0, 1].set_ylabel('REM (%)')
            axes[0, 1].set_title('REM Sleep Across Nights', fontweight='bold')
            axes[0, 1].set_ylim(0, 40)
            axes[0, 1].axhline(y=20, color='green',
                               linestyle='--', alpha=0.7, label='Normal range')
            axes[0, 1].axhline(y=25, color='green', linestyle='--', alpha=0.7)
            axes[0, 1].legend()

            # Sleep latencies comparison
            sleep_latencies = [sanitize_data(
                lat, min_val=0, max_val=300) for lat in subject_data['sleep_latency_min'].values]
            rem_latencies = [sanitize_data(
                lat, min_val=0, max_val=300) for lat in subject_data['rem_latency_min'].values]

            x_pos = range(len(nights))
            width = 0.35
            axes[1, 0].bar([x - width/2 for x in x_pos], sleep_latencies,
                           width, label='Sleep Onset', color=VIZ_COLORS['accent'])
            axes[1, 0].bar([x + width/2 for x in x_pos], rem_latencies,
                           width, label='REM Onset', color=VIZ_COLORS['secondary'])
            axes[1, 0].set_xlabel('Night')
            axes[1, 0].set_ylabel('Minutes')
            axes[1, 0].set_title(
                'Sleep Latencies Comparison', fontweight='bold')
            axes[1, 0].set_xticks(x_pos)
            axes[1, 0].set_xticklabels([f'Night {i}' for i in nights])
            axes[1, 0].set_ylim(
                0, max(max(sleep_latencies), max(rem_latencies)) * 1.2)
            axes[1, 0].legend()

            # Overall quality summary
            avg_efficiency = np.mean([sanitize_data(x, 0, 100)
                                     for x in subject_data['sleep_efficiency'].values])
            avg_rem = np.mean([sanitize_data(x, 0, 50)
                              for x in subject_data['REM_percentage'].values])
            avg_deep = np.mean([sanitize_data(x, 0, 50) for x in (
                subject_data['N3_percentage'] + subject_data['N4_percentage']).values])
            avg_waso = np.mean([sanitize_data(x, 0, 300)
                               for x in subject_data['waso_min'].values])

            metrics = [avg_efficiency, avg_rem,
                       avg_deep, 100 - min(avg_waso, 100)]
            metric_labels = ['Sleep\nEfficiency',
                             'REM\nSleep', 'Deep\nSleep', 'Sleep\nContinuity']

            bars = axes[1, 1].bar(metric_labels, metrics, color=[
                                  VIZ_COLORS['success'], VIZ_COLORS['accent'], VIZ_COLORS['primary'], VIZ_COLORS['secondary']])
            axes[1, 1].set_title(
                'Average Sleep Quality Metrics', fontweight='bold')
            axes[1, 1].set_ylabel('Percentage / Score')
            axes[1, 1].set_ylim(0, 100)

            # Add value labels
            for bar, value in zip(bars, metrics):
                axes[1, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 1,
                                f'{value:.1f}', ha='center', va='bottom', fontweight='bold')

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer

    except Exception as e:
        print(f"❌ Error in create_sleep_architecture_chart: {str(e)}")
        plt.close()
        # Return a simple placeholder
        fig, ax = plt.subplots(1, 1, figsize=(12, 10))
        ax.text(0.5, 0.5, 'Sleep Architecture\nVisualization Error',
                ha='center', va='center', fontsize=16)
        ax.set_title('Sleep Architecture Analysis', fontweight='bold')

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer


def create_eeg_power_analysis(subject_data, save_path=None):
    """Create EEG power spectral analysis with proper scaling"""

    try:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('EEG Power Spectral Analysis', fontsize=16,
                     fontweight='bold', color=VIZ_COLORS['primary'])

        # EEG frequency bands
        eeg_bands = ['delta', 'theta', 'alpha', 'beta', 'gamma']
        band_labels = ['Delta\n(0.5-4 Hz)', 'Theta\n(4-8 Hz)',
                       'Alpha\n(8-12 Hz)', 'Beta\n(12-30 Hz)', 'Gamma\n(30-50 Hz)']
        band_colors = ['#8E44AD', '#3498DB', '#2ECC71', '#F39C12', '#E74C3C']

        # Get EEG channels
        eeg_channels = ['EEG Fpz-Cz', 'EEG Pz-Oz']

        for idx, channel in enumerate(eeg_channels):
            ax_row = idx

            if len(subject_data) == 1:
                # Single night
                night_data = subject_data.iloc[0]

                # Absolute power with sanitization
                power_values = []
                for band in eeg_bands:
                    col_name = f'{channel}_{band}_power'
                    power_val = night_data.get(col_name, 1e-6)
                    # Reasonable EEG power range
                    power_val = sanitize_data(
                        power_val, min_val=1e-8, max_val=1e-2)
                    power_values.append(power_val)

                axes[ax_row, 0].bar(band_labels, power_values,
                                    color=band_colors, alpha=0.7)
                axes[ax_row, 0].set_title(
                    f'{channel} - Absolute Power', fontweight='bold')
                axes[ax_row, 0].set_ylabel('Power (μV²/Hz)')
                axes[ax_row, 0].set_yscale('log')
                axes[ax_row, 0].set_ylim(
                    min(power_values) * 0.1, max(power_values) * 10)

                # Relative power with sanitization
                rel_power_values = []
                for band in eeg_bands:
                    col_name = f'{channel}_{band}_rel_power'
                    rel_power_val = night_data.get(col_name, 1)
                    rel_power_val = sanitize_data(
                        rel_power_val, min_val=0.1, max_val=99)
                    rel_power_values.append(rel_power_val)

                axes[ax_row, 1].bar(
                    band_labels, rel_power_values, color=band_colors, alpha=0.7)
                axes[ax_row, 1].set_title(
                    f'{channel} - Relative Power', fontweight='bold')
                axes[ax_row, 1].set_ylabel('Relative Power (%)')
                axes[ax_row, 1].set_ylim(0, 100)

            else:
                # Multiple nights
                nights = list(range(1, len(subject_data) + 1))

                # Average powers with sanitization
                avg_powers = []
                for band in eeg_bands:
                    col_name = f'{channel}_{band}_power'
                    powers = [sanitize_data(
                        val, 1e-8, 1e-2) for val in subject_data[col_name].values if col_name in subject_data.columns]
                    avg_power = np.mean(powers) if powers else 1e-6
                    avg_powers.append(avg_power)

                axes[ax_row, 0].bar(band_labels, avg_powers,
                                    color=band_colors, alpha=0.7)
                axes[ax_row, 0].set_title(
                    f'{channel} - Average Absolute Power', fontweight='bold')
                axes[ax_row, 0].set_ylabel('Power (μV²/Hz)')
                axes[ax_row, 0].set_yscale('log')
                axes[ax_row, 0].set_ylim(
                    min(avg_powers) * 0.1, max(avg_powers) * 10)

                # Night variation for key bands
                delta_col = f'{channel}_delta_power'
                alpha_col = f'{channel}_alpha_power'

                if delta_col in subject_data.columns and alpha_col in subject_data.columns:
                    delta_powers = [sanitize_data(
                        val, 1e-8, 1e-2) for val in subject_data[delta_col].values]
                    alpha_powers = [sanitize_data(
                        val, 1e-8, 1e-2) for val in subject_data[alpha_col].values]

                    axes[ax_row, 1].plot(
                        nights, delta_powers, marker='o', label='Delta', color=band_colors[0], linewidth=2)
                    axes[ax_row, 1].plot(
                        nights, alpha_powers, marker='s', label='Alpha', color=band_colors[2], linewidth=2)
                    axes[ax_row, 1].set_title(
                        f'{channel} - Night Variation', fontweight='bold')
                    axes[ax_row, 1].set_xlabel('Night')
                    axes[ax_row, 1].set_ylabel('Power (μV²/Hz)')
                    axes[ax_row, 1].set_yscale('log')
                    axes[ax_row, 1].legend()
                    axes[ax_row, 1].grid(True, alpha=0.3)
                else:
                    axes[ax_row, 1].text(0.5, 0.5, 'EEG Power\nData Unavailable',
                                         ha='center', va='center', transform=axes[ax_row, 1].transAxes)

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer

    except Exception as e:
        print(f"❌ Error in create_eeg_power_analysis: {str(e)}")
        plt.close()
        # Return placeholder
        fig, ax = plt.subplots(1, 1, figsize=(12, 10))
        ax.text(0.5, 0.5, 'EEG Power Analysis\nVisualization Error',
                ha='center', va='center', fontsize=16)
        ax.set_title('EEG Power Spectral Analysis', fontweight='bold')

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer


def create_signal_quality_analysis(subject_data, save_path=None):
    """Create signal quality analysis with bounds checking"""

    try:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('Signal Quality & Physiological Analysis',
                     fontsize=16, fontweight='bold', color=VIZ_COLORS['primary'])

        # EMG analysis with sanitization
        emg_metrics = ['EMG submental_mean',
                       'EMG submental_std', 'EMG submental_rms']
        emg_labels = ['Mean', 'Std Dev', 'RMS']

        if len(subject_data) == 1:
            night_data = subject_data.iloc[0]

            # EMG metrics
            emg_values = []
            for metric in emg_metrics:
                val = night_data.get(metric, 0)
                # Reasonable EMG range
                val = sanitize_data(val, min_val=0, max_val=100)
                emg_values.append(val)

            bars = axes[0, 0].bar(emg_labels, emg_values,
                                  color=VIZ_COLORS['secondary'], alpha=0.7)
            axes[0, 0].set_title('EMG Activity Metrics', fontweight='bold')
            axes[0, 0].set_ylabel('Amplitude (μV)')
            axes[0, 0].set_ylim(0, max(emg_values) *
                                1.2 if max(emg_values) > 0 else 10)

            # Add value labels
            for bar, value in zip(bars, emg_values):
                axes[0, 0].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.1,
                                f'{value:.2f}', ha='center', va='bottom', fontweight='bold')

            # EOG analysis
            eog_metrics = ['EOG horizontal_mean',
                           'EOG horizontal_std', 'EOG horizontal_rms']
            eog_values = []
            for metric in eog_metrics:
                val = night_data.get(metric, 0)
                # Reasonable EOG range
                val = sanitize_data(val, min_val=0, max_val=200)
                eog_values.append(val)

            bars = axes[0, 1].bar(emg_labels, eog_values,
                                  color=VIZ_COLORS['accent'], alpha=0.7)
            axes[0, 1].set_title('EOG Activity Metrics', fontweight='bold')
            axes[0, 1].set_ylabel('Amplitude (μV)')
            axes[0, 1].set_ylim(0, max(eog_values) *
                                1.2 if max(eog_values) > 0 else 10)

            for bar, value in zip(bars, eog_values):
                axes[0, 1].text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
                                f'{value:.2f}', ha='center', va='bottom', fontweight='bold')

        else:
            # Multiple nights comparison
            nights = list(range(1, len(subject_data) + 1))

            # EMG across nights
            if 'EMG submental_mean' in subject_data.columns and 'EMG submental_std' in subject_data.columns:
                emg_means = [sanitize_data(
                    val, 0, 100) for val in subject_data['EMG submental_mean'].values]
                emg_stds = [sanitize_data(
                    val, 0, 100) for val in subject_data['EMG submental_std'].values]

                axes[0, 0].plot(nights, emg_means, marker='o', label='Mean',
                                color=VIZ_COLORS['secondary'], linewidth=2)
                axes[0, 0].plot(nights, emg_stds, marker='s', label='Std Dev',
                                color=VIZ_COLORS['warning'], linewidth=2)
                axes[0, 0].set_title(
                    'EMG Activity Across Nights', fontweight='bold')
                axes[0, 0].set_xlabel('Night')
                axes[0, 0].set_ylabel('Amplitude (μV)')
                axes[0, 0].set_ylim(
                    0, max(max(emg_means), max(emg_stds)) * 1.2)
                axes[0, 0].legend()
                axes[0, 0].grid(True, alpha=0.3)
            else:
                axes[0, 0].text(0.5, 0.5, 'EMG Data\nUnavailable',
                                ha='center', va='center', transform=axes[0, 0].transAxes)
                axes[0, 0].set_title(
                    'EMG Activity Across Nights', fontweight='bold')

            # EOG across nights
            if 'EOG horizontal_mean' in subject_data.columns and 'EOG horizontal_std' in subject_data.columns:
                eog_means = [sanitize_data(
                    val, 0, 200) for val in subject_data['EOG horizontal_mean'].values]
                eog_stds = [sanitize_data(
                    val, 0, 200) for val in subject_data['EOG horizontal_std'].values]

                axes[0, 1].plot(nights, eog_means, marker='o',
                                label='Mean', color=VIZ_COLORS['accent'], linewidth=2)
                axes[0, 1].plot(nights, eog_stds, marker='s', label='Std Dev',
                                color=VIZ_COLORS['danger'], linewidth=2)
                axes[0, 1].set_title(
                    'EOG Activity Across Nights', fontweight='bold')
                axes[0, 1].set_xlabel('Night')
                axes[0, 1].set_ylabel('Amplitude (μV)')
                axes[0, 1].set_ylim(
                    0, max(max(eog_means), max(eog_stds)) * 1.2)
                axes[0, 1].legend()
                axes[0, 1].grid(True, alpha=0.3)
            else:
                axes[0, 1].text(0.5, 0.5, 'EOG Data\nUnavailable',
                                ha='center', va='center', transform=axes[0, 1].transAxes)
                axes[0, 1].set_title(
                    'EOG Activity Across Nights', fontweight='bold')

        # Recording quality indicators
        sampling_rates = [sanitize_data(val, 50, 1000)
                          for val in subject_data['sampling_rate'].values]
        durations = [sanitize_data(val, 1, 24)
                     for val in subject_data['duration_hours'].values]

        axes[1, 0].bar(range(len(subject_data)), sampling_rates,
                       color=VIZ_COLORS['success'], alpha=0.7)
        axes[1, 0].set_title('Sampling Rate Quality', fontweight='bold')
        axes[1, 0].set_ylabel('Sampling Rate (Hz)')
        axes[1, 0].set_xlabel('Recording')
        axes[1, 0].set_ylim(0, max(sampling_rates) * 1.1)

        axes[1, 1].bar(range(len(subject_data)), durations,
                       color=VIZ_COLORS['primary'], alpha=0.7)
        axes[1, 1].set_title('Recording Duration', fontweight='bold')
        axes[1, 1].set_ylabel('Duration (hours)')
        axes[1, 1].set_xlabel('Recording')
        axes[1, 1].set_ylim(0, max(durations) * 1.1)

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer

    except Exception as e:
        print(f"❌ Error in create_signal_quality_analysis: {str(e)}")
        plt.close()
        # Return placeholder
        fig, ax = plt.subplots(1, 1, figsize=(12, 10))
        ax.text(0.5, 0.5, 'Signal Quality\nVisualization Error',
                ha='center', va='center', fontsize=16)
        ax.set_title('Signal Quality & Physiological Analysis',
                     fontweight='bold')

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer


def create_comparative_analysis_chart(subject_data, population_data, save_path=None):
    """Create comparative analysis with robust error handling"""

    try:
        fig, axes = plt.subplots(2, 2, figsize=(12, 10))
        fig.suptitle('Comparative Population Analysis', fontsize=16,
                     fontweight='bold', color=VIZ_COLORS['primary'])

        # Subject's average values
        if len(subject_data) == 1:
            subject_avg = subject_data.iloc[0]
        else:
            subject_avg = subject_data.mean(numeric_only=True)

        # Population comparisons for key metrics
        key_metrics = ['sleep_efficiency', 'REM_percentage',
                       'sleep_latency_min', 'waso_min']
        metric_labels = [
            'Sleep Efficiency (%)', 'REM Sleep (%)', 'Sleep Latency (min)', 'WASO (min)']

        # Get population stats by study type
        study_type = subject_data.iloc[0]['study_type']
        same_study = population_data[population_data['study_type'] == study_type]
        other_study = population_data[population_data['study_type'] != study_type]

        for idx, (metric, label) in enumerate(zip(key_metrics, metric_labels)):
            ax = axes[idx//2, idx % 2]

            if metric in same_study.columns and metric in other_study.columns:
                # Population distributions with sanitization
                same_values = [sanitize_data(
                    val, 0, 1000) for val in same_study[metric].values if not pd.isna(val)]
                other_values = [sanitize_data(
                    val, 0, 1000) for val in other_study[metric].values if not pd.isna(val)]
                subject_value = sanitize_data(
                    subject_avg.get(metric, 0), 0, 1000)

                if same_values and other_values:
                    # Create box plots instead of violin plots for better reliability
                    box_data = [same_values, other_values]
                    box1 = ax.boxplot(box_data, positions=[
                                      1, 2], widths=0.6, patch_artist=True)

                    # Color the boxes
                    colors = [VIZ_COLORS['healthy'] if study_type == 'SC' else VIZ_COLORS['impaired'],
                              VIZ_COLORS['impaired'] if study_type == 'SC' else VIZ_COLORS['healthy']]

                    for patch, color in zip(box1['boxes'], colors):
                        patch.set_facecolor(color)
                        patch.set_alpha(0.6)

                    # Mark subject's value
                    subject_pos = 1 if study_type == same_study.iloc[0]['study_type'] else 2
                    ax.scatter([subject_pos], [subject_value], color='red',
                               s=100, zorder=10, marker='*', label='Subject')

                    ax.set_title(label, fontweight='bold')
                    ax.set_xticks([1, 2])
                    study_labels = ['Healthy Controls', 'Sleep Difficulty'] if study_type == 'SC' else [
                        'Sleep Difficulty', 'Healthy Controls']
                    ax.set_xticklabels(study_labels)
                    ax.grid(True, alpha=0.3)

                    if idx == 0:  # Add legend to first plot
                        ax.legend()
                else:
                    ax.text(0.5, 0.5, f'{label}\nData Unavailable',
                            ha='center', va='center', transform=ax.transAxes)
                    ax.set_title(label, fontweight='bold')
            else:
                ax.text(0.5, 0.5, f'{label}\nData Unavailable',
                        ha='center', va='center', transform=ax.transAxes)
                ax.set_title(label, fontweight='bold')

        plt.tight_layout()

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer

    except Exception as e:
        print(f"❌ Error in create_comparative_analysis_chart: {str(e)}")
        plt.close()
        # Return placeholder
        fig, ax = plt.subplots(1, 1, figsize=(12, 10))
        ax.text(0.5, 0.5, 'Comparative Analysis\nVisualization Error',
                ha='center', va='center', fontsize=16)
        ax.set_title('Comparative Population Analysis', fontweight='bold')

        if save_path:
            plt.savefig(save_path, dpi=300,
                        bbox_inches='tight', facecolor='white')
            plt.close()
            return save_path
        else:
            img_buffer = io.BytesIO()
            plt.savefig(img_buffer, format='png', dpi=300,
                        bbox_inches='tight', facecolor='white')
            img_buffer.seek(0)
            plt.close()
            return img_buffer


print("✅ Fixed Visualization Functions Created with Data Sanitization")
print(f"📊 Key Improvements:")
print(f"    ├─ Data sanitization for extreme values")
print(f"    ├─ Proper bounds checking for plot dimensions")
print(f"    ├─ Error handling with placeholder generation")
print(f"    ├─ Safe logarithmic scaling")
print(f"    ├─ Matplotlib backend optimization")
print(f"    └─ Memory-efficient plot closure")

print(f"\n🔧 Data Sanitization Features:")
print(f"    ├─ Removes inf and NaN values")
print(f"    ├─ Bounds extreme values to reasonable ranges")
print(f"    ├─ Uses box plots instead of violin plots for reliability")
print(f"    ├─ Safe logarithmic scaling with minimum values")
print(f"    └─ Graceful error handling with informative placeholders")

print(f"\n✅ Cell 3 (Fixed) Complete - Ready for Testing")
print(f"🚀 Run this version and then proceed with Cell 5 robust generation")

📊 Creating Fixed Visualization Functions (Data Sanitization)
✅ Fixed Visualization Functions Created with Data Sanitization
📊 Key Improvements:
    ├─ Data sanitization for extreme values
    ├─ Proper bounds checking for plot dimensions
    ├─ Error handling with placeholder generation
    ├─ Safe logarithmic scaling
    ├─ Matplotlib backend optimization
    └─ Memory-efficient plot closure

🔧 Data Sanitization Features:
    ├─ Removes inf and NaN values
    ├─ Bounds extreme values to reasonable ranges
    ├─ Uses box plots instead of violin plots for reliability
    ├─ Safe logarithmic scaling with minimum values
    └─ Graceful error handling with informative placeholders

✅ Cell 3 (Fixed) Complete - Ready for Testing
🚀 Run this version and then proceed with Cell 5 robust generation


In [15]:
# Cell 4: PDF Report Assembly Functions
# Complete PDF report generation system for individual sleep analysis

from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer, Image, PageBreak, Table, TableStyle, KeepTogether
from reportlab.lib.utils import ImageReader
import tempfile
import os

print("📄 Creating PDF Report Assembly Functions")
print("=" * 50)


def create_report_header(subject_data, styles):
    """Create report header with title and subject info"""

    elements = []
    subject_info = subject_data.iloc[0]

    # Main title
    title = Paragraph("Individual Sleep Analysis Report",
                      styles['ReportTitle'])
    elements.append(title)

    # Subtitle with subject ID
    subtitle = Paragraph(f"Subject ID: {subject_info['subject_id']} | " +
                         f"{'Healthy Control' if subject_info['study_type'] == 'SC' else 'Sleep Difficulty'} Study",
                         styles['ReportSubtitle'])
    elements.append(subtitle)

    # Date and analysis info
    analysis_info = Paragraph(f"Analysis Date: {datetime.now().strftime('%B %d, %Y')} | " +
                              f"Nights Analyzed: {len(subject_data)} | " +
                              f"Report Generated by: Sleep-EDF Analysis System",
                              styles['Footer'])
    elements.append(analysis_info)
    elements.append(Spacer(1, 20))

    return elements


def create_subject_info_section(subject_data, styles):
    """Create subject demographics and study information section"""

    elements = []
    subject_info = subject_data.iloc[0]

    # Section header
    header = Paragraph("Subject Information", styles['SectionHeader'])
    elements.append(header)

    # Demographics table
    demo_data = create_subject_demographics_table(subject_data)

    # Create table
    demo_table = Table(demo_data, colWidths=[2*inch, 3*inch])
    demo_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (0, -1), COLORS['light_gray']),
        ('TEXTCOLOR', (0, 0), (-1, -1), COLORS['text']),
        ('ALIGN', (0, 0), (-1, -1), 'LEFT'),
        ('FONTNAME', (0, 0), (0, -1), 'Helvetica-Bold'),
        ('FONTNAME', (1, 0), (1, -1), 'Helvetica'),
        ('FONTSIZE', (0, 0), (-1, -1), 11),
        ('GRID', (0, 0), (-1, -1), 1, COLORS['dark_gray']),
        ('VALIGN', (0, 0), (-1, -1), 'MIDDLE'),
        ('ROWBACKGROUNDS', (0, 0), (-1, -1), [COLORS['bg_light'], None])
    ]))

    elements.append(demo_table)
    elements.append(Spacer(1, 20))

    return elements


def create_executive_summary_section(subject_data, population_data, styles):
    """Create executive summary with key metrics"""

    elements = []

    # Section header
    header = Paragraph("Executive Summary", styles['SectionHeader'])
    elements.append(header)

    # Calculate key metrics
    metrics = calculate_key_metrics(subject_data)
    subject_info = subject_data.iloc[0]

    # Key findings paragraph
    if len(subject_data) == 1:
        summary_text = f"""
        This report presents a comprehensive analysis of a single night polysomnographic recording for 
        Subject {subject_info['subject_id']}, a {subject_info['age']}-year-old {subject_info['sex']} participant 
        from the {'Sleep Cassette (healthy controls)' if subject_info['study_type'] == 'SC' else 'Sleep Telemetry (sleep difficulty)'} study.
        """
    else:
        conditions = subject_data['condition'].unique()
        condition_text = f" under {' and '.join(conditions)} conditions" if len(
            conditions) > 1 else f" under {conditions[0]} condition"
        summary_text = f"""
        This report presents a comprehensive analysis of {len(subject_data)} night polysomnographic recordings for 
        Subject {subject_info['subject_id']}, a {subject_info['age']}-year-old {subject_info['sex']} participant 
        from the {'Sleep Cassette (healthy controls)' if subject_info['study_type'] == 'SC' else 'Sleep Telemetry (sleep difficulty)'} study{condition_text}.
        """

    summary_para = Paragraph(summary_text, styles['ReportBody'])
    elements.append(summary_para)
    elements.append(Spacer(1, 12))

    # Key metrics table
    metrics_data = [
        ['Metric', 'Value', 'Clinical Interpretation'],
        ['Sleep Efficiency', metrics['sleep_efficiency'],
         'Normal (≥85%)' if float(metrics['sleep_efficiency'].rstrip('%')) >= 85 else 'Below Normal (<85%)'],
        ['Sleep Latency', metrics['sleep_latency'],
         'Normal (≤30min)' if float(metrics['sleep_latency'].rstrip(' min')) <= 30 else 'Prolonged (>30min)'],
        ['REM Latency', metrics['rem_latency'],
         'Normal (60-120min)' if 60 <= float(metrics['rem_latency'].rstrip(' min')) <= 120 else 'Atypical'],
        ['REM Sleep', metrics['rem_percentage'],
         'Normal (20-25%)' if 20 <= float(metrics['rem_percentage'].rstrip('%')) <= 25 else 'Atypical'],
        ['Wake After Sleep Onset', metrics['waso'],
         'Normal (≤30min)' if float(metrics['waso'].rstrip(' min')) <= 30 else 'Elevated (>30min)']
    ]

    metrics_table = Table(metrics_data, colWidths=[2*inch, 1.5*inch, 2*inch])
    metrics_table.setStyle(TableStyle([
        ('BACKGROUND', (0, 0), (-1, 0), COLORS['primary']),
        ('TEXTCOLOR', (0, 0), (-1, 0), colors.whitesmoke),
        ('ALIGN', (0, 0), (-1, -1), 'CENTER'),
        ('FONTNAME', (0, 0), (-1, 0), 'Helvetica-Bold'),
        ('FONTNAME', (0, 1), (-1, -1), 'Helvetica'),
        ('FONTSIZE', (0, 0), (-1, -1), 10),
        ('GRID', (0, 0), (-1, -1), 1, COLORS['dark_gray']),
        ('ROWBACKGROUNDS', (0, 1), (-1, -1), [COLORS['bg_light'], None])
    ]))

    elements.append(metrics_table)
    elements.append(Spacer(1, 20))

    return elements


def create_visualization_section(subject_data, population_data, section_title, viz_function, styles):
    """Generic function to create a section with visualization"""

    elements = []

    # Section header
    header = Paragraph(section_title, styles['SectionHeader'])
    elements.append(header)

    # Generate visualization
    img_buffer = viz_function(subject_data) if 'comparative' not in viz_function.__name__ else viz_function(
        subject_data, population_data)

    # Use ImageReader directly with BytesIO buffer (no temporary file needed)
    img_buffer.seek(0)  # Reset buffer position
    img_reader = ImageReader(img_buffer)
    img = Image(img_reader, width=6.5*inch, height=5.2*inch)
    elements.append(img)

    elements.append(Spacer(1, 15))

    return elements


def create_clinical_interpretation_section(subject_data, population_data, styles):
    """Create clinical interpretation and recommendations section"""

    elements = []
    subject_info = subject_data.iloc[0]

    # Section header
    header = Paragraph(
        "Clinical Interpretation & Recommendations", styles['SectionHeader'])
    elements.append(header)

    # Calculate metrics for interpretation
    if len(subject_data) == 1:
        metrics = subject_data.iloc[0]
    else:
        metrics = subject_data.mean(numeric_only=True)

    # Sleep quality assessment
    sleep_efficiency = metrics['sleep_efficiency']
    rem_percentage = metrics['REM_percentage']
    deep_sleep = metrics['N3_percentage'] + metrics['N4_percentage']
    waso = metrics['waso_min']

    # Overall assessment
    assessment_header = Paragraph(
        "Overall Sleep Health Assessment", styles['SubsectionHeader'])
    elements.append(assessment_header)

    # Determine overall sleep quality
    quality_score = 0
    if sleep_efficiency >= 85:
        quality_score += 1
    if 20 <= rem_percentage <= 25:
        quality_score += 1
    if deep_sleep >= 15:
        quality_score += 1
    if waso <= 30:
        quality_score += 1

    if quality_score >= 3:
        overall_assessment = "Excellent to Good sleep quality with most metrics within normal ranges."
        quality_level = "GOOD"
        quality_color = COLORS['success']
    elif quality_score == 2:
        overall_assessment = "Fair sleep quality with some metrics outside normal ranges."
        quality_level = "FAIR"
        quality_color = COLORS['warning']
    else:
        overall_assessment = "Poor sleep quality with multiple metrics outside normal ranges."
        quality_level = "POOR"
        quality_color = COLORS['danger']

    assessment_text = f"""
    <b>Sleep Quality Level: <font color='{quality_color}'>{quality_level}</font></b><br/><br/>
    {overall_assessment} The subject's sleep architecture shows:
    <br/><br/>
    • Sleep Efficiency: {sleep_efficiency:.1f}% ({'Normal' if sleep_efficiency >= 85 else 'Below normal'})<br/>
    • REM Sleep: {rem_percentage:.1f}% ({'Normal' if 20 <= rem_percentage <= 25 else 'Atypical'})<br/>
    • Deep Sleep: {deep_sleep:.1f}% ({'Adequate' if deep_sleep >= 15 else 'Reduced'})<br/>
    • Sleep Continuity: {'Good' if waso <= 30 else 'Fragmented'} (WASO: {waso:.1f} min)
    """

    assessment_para = Paragraph(assessment_text, styles['ReportBody'])
    elements.append(assessment_para)
    elements.append(Spacer(1, 15))

    # Specific findings
    findings_header = Paragraph("Key Findings", styles['SubsectionHeader'])
    elements.append(findings_header)

    findings = []

    # Sleep efficiency findings
    if sleep_efficiency < 85:
        findings.append(
            f"• <b>Reduced Sleep Efficiency</b>: At {sleep_efficiency:.1f}%, sleep efficiency is below the normal threshold of 85%, indicating potential sleep quality issues.")
    else:
        findings.append(
            f"• <b>Good Sleep Efficiency</b>: At {sleep_efficiency:.1f}%, sleep efficiency is within normal range, indicating good sleep quality.")

    # REM findings
    if rem_percentage < 20:
        findings.append(
            f"• <b>Reduced REM Sleep</b>: REM sleep comprises {rem_percentage:.1f}% of total sleep, which is below the normal range of 20-25%.")
    elif rem_percentage > 25:
        findings.append(
            f"• <b>Elevated REM Sleep</b>: REM sleep comprises {rem_percentage:.1f}% of total sleep, which is above the typical range of 20-25%.")
    else:
        findings.append(
            f"• <b>Normal REM Sleep</b>: REM sleep comprises {rem_percentage:.1f}% of total sleep, which is within the normal range.")

    # Deep sleep findings
    if deep_sleep < 15:
        findings.append(
            f"• <b>Reduced Deep Sleep</b>: Deep sleep stages (N3+N4) comprise {deep_sleep:.1f}% of sleep, which may indicate reduced sleep restoration.")
    else:
        findings.append(
            f"• <b>Adequate Deep Sleep</b>: Deep sleep stages comprise {deep_sleep:.1f}% of sleep, indicating good restorative sleep.")

    # Multiple nights specific findings
    if len(subject_data) > 1:
        night_variability = subject_data['sleep_efficiency'].std()
        if night_variability > 10:
            findings.append(
                f"• <b>High Night-to-Night Variability</b>: Sleep efficiency varies significantly across nights (SD: {night_variability:.1f}%), suggesting inconsistent sleep patterns.")

        # ST study specific - condition comparison
        if subject_info['study_type'] == 'ST':
            placebo_data = subject_data[subject_data['condition'] == 'placebo']
            temazepam_data = subject_data[subject_data['condition']
                                          == 'temazepam']

            if len(placebo_data) > 0 and len(temazepam_data) > 0:
                placebo_eff = placebo_data['sleep_efficiency'].iloc[0]
                temazepam_eff = temazepam_data['sleep_efficiency'].iloc[0]
                diff = temazepam_eff - placebo_eff

                if abs(diff) > 5:
                    findings.append(
                        f"• <b>Medication Effect</b>: Temazepam {'improved' if diff > 0 else 'reduced'} sleep efficiency by {abs(diff):.1f}% compared to placebo night.")

    findings_text = "<br/>".join(findings)
    findings_para = Paragraph(findings_text, styles['ReportBody'])
    elements.append(findings_para)
    elements.append(Spacer(1, 15))

    # Recommendations
    recommendations_header = Paragraph(
        "Recommendations", styles['SubsectionHeader'])
    elements.append(recommendations_header)

    recommendations = []

    if sleep_efficiency < 85:
        recommendations.append(
            "• Consider sleep hygiene counseling and evaluation of factors affecting sleep quality")

    if rem_percentage < 20:
        recommendations.append(
            "• Evaluate for potential REM sleep disorders or medications affecting REM sleep")

    if deep_sleep < 15:
        recommendations.append(
            "• Assess sleep environment and factors that may be disrupting deep sleep stages")

    if waso > 30:
        recommendations.append(
            "• Investigation of factors causing sleep fragmentation may be beneficial")

    if len(subject_data) > 1 and subject_data['sleep_efficiency'].std() > 10:
        recommendations.append(
            "• Sleep diary and lifestyle factor assessment recommended due to high night-to-night variability")

    if not recommendations:  # If sleep is good
        recommendations.append(
            "• Continue current sleep practices as sleep quality metrics are within normal ranges")
        recommendations.append(
            "• Maintain good sleep hygiene for continued sleep health")

    recommendations_text = "<br/>".join(recommendations)
    recommendations_para = Paragraph(
        recommendations_text, styles['ReportBody'])
    elements.append(recommendations_para)
    elements.append(Spacer(1, 20))

    return elements


def generate_individual_report(subject_data, population_data, output_path, styles):
    """Generate complete individual sleep report PDF"""

    print(
        f"📄 Generating report for Subject {subject_data.iloc[0]['subject_id']}...")

    # Create PDF document
    doc = SimpleDocTemplate(str(output_path),
                            pagesize=REPORT_CONFIG['page_size'],
                            rightMargin=REPORT_CONFIG['margins']['right'],
                            leftMargin=REPORT_CONFIG['margins']['left'],
                            topMargin=REPORT_CONFIG['margins']['top'],
                            bottomMargin=REPORT_CONFIG['margins']['bottom'])

    elements = []

    # 1. Header and Subject Info
    elements.extend(create_report_header(subject_data, styles))
    elements.extend(create_subject_info_section(subject_data, styles))

    # 2. Executive Summary
    elements.extend(create_executive_summary_section(
        subject_data, population_data, styles))
    elements.append(PageBreak())

    # 3. Sleep Architecture Analysis
    elements.extend(create_visualization_section(
        subject_data, population_data,
        "Sleep Architecture Analysis",
        create_sleep_architecture_chart, styles))

    # 4. EEG Power Analysis
    elements.extend(create_visualization_section(
        subject_data, population_data,
        "Neurophysiological Analysis - EEG Power Spectrum",
        create_eeg_power_analysis, styles))
    elements.append(PageBreak())

    # 5. Signal Quality Analysis
    elements.extend(create_visualization_section(
        subject_data, population_data,
        "Signal Quality & Physiological Assessment",
        create_signal_quality_analysis, styles))

    # 6. Comparative Analysis
    elements.extend(create_visualization_section(
        subject_data, population_data,
        "Population Comparative Analysis",
        create_comparative_analysis_chart, styles))
    elements.append(PageBreak())

    # 7. Clinical Interpretation
    elements.extend(create_clinical_interpretation_section(
        subject_data, population_data, styles))

    # Build PDF
    doc.build(elements)
    print(f"✅ Report generated: {output_path}")

    return output_path


def generate_test_reports(test_subjects_info, population_data, styles):
    """Generate test reports for selected subjects"""

    print(f"\n🧪 Generating Test Reports...")
    print("=" * 35)

    generated_reports = []

    for study_type, subject_info in test_subjects_info.items():
        subject_id = subject_info['subject_id']
        subject_data = subject_info['data']

        # Create output filename
        study_label = 'SC' if study_type == 'sc_subject' else 'ST'
        output_filename = f"Individual_Sleep_Report_{study_label}_{subject_id}.pdf"
        output_path = REPORTS_PATH / output_filename

        try:
            # Generate report
            report_path = generate_individual_report(
                subject_data, population_data, output_path, styles)
            generated_reports.append(report_path)

            print(f"✅ {study_label} Subject {subject_id}: {output_filename}")

        except Exception as e:
            print(
                f"❌ Error generating report for {study_label} Subject {subject_id}: {str(e)}")

    return generated_reports


print("✅ PDF Report Assembly Functions Created")
print(f"📄 Available Functions:")
print(f"    ├─ create_report_header()")
print(f"    ├─ create_subject_info_section()")
print(f"    ├─ create_executive_summary_section()")
print(f"    ├─ create_visualization_section()")
print(f"    ├─ create_clinical_interpretation_section()")
print(f"    ├─ generate_individual_report()")
print(f"    └─ generate_test_reports()")

print(f"\n📋 Report Structure:")
print(f"    ├─ Professional header with subject info")
print(f"    ├─ Executive summary with key metrics")
print(f"    ├─ Sleep architecture visualizations")
print(f"    ├─ EEG power spectrum analysis")
print(f"    ├─ Signal quality assessment")
print(f"    ├─ Population comparative analysis")
print(f"    ├─ Clinical interpretation & recommendations")
print(f"    └─ Multi-page PDF with professional formatting")

print(f"\n✅ Cell 4 Complete - Ready to Generate Test Reports")
print(f"🚀 Next: Execute test report generation")

📄 Creating PDF Report Assembly Functions
✅ PDF Report Assembly Functions Created
📄 Available Functions:
    ├─ create_report_header()
    ├─ create_subject_info_section()
    ├─ create_executive_summary_section()
    ├─ create_visualization_section()
    ├─ create_clinical_interpretation_section()
    ├─ generate_individual_report()
    └─ generate_test_reports()

📋 Report Structure:
    ├─ Professional header with subject info
    ├─ Executive summary with key metrics
    ├─ Sleep architecture visualizations
    ├─ EEG power spectrum analysis
    ├─ Signal quality assessment
    ├─ Population comparative analysis
    ├─ Clinical interpretation & recommendations
    └─ Multi-page PDF with professional formatting

✅ Cell 4 Complete - Ready to Generate Test Reports
🚀 Next: Execute test report generation


In [16]:
# Cell 5: Test Report Generation (Robust Image Handling)
# Generate individual sleep reports with reliable image handling

print("🚀 Executing Test Report Generation (Robust Version)")
print("=" * 55)

# Create a robust image handling function


def create_visualization_section_robust(subject_data, population_data, section_title, viz_function, styles, temp_dir):
    """Robust function to create a section with visualization using saved files"""

    elements = []

    # Section header
    header = Paragraph(section_title, styles['SectionHeader'])
    elements.append(header)

    # Create unique filename for this visualization
    subject_id = subject_data.iloc[0]['subject_id']
    study_type = subject_data.iloc[0]['study_type']
    viz_name = viz_function.__name__.replace('create_', '').replace(
        '_chart', '').replace('_analysis', '')

    img_filename = f"{study_type}_subject_{subject_id}_{viz_name}.png"
    img_path = temp_dir / img_filename

    try:
        # Generate visualization and save to file
        if 'comparative' in viz_function.__name__:
            viz_function(subject_data, population_data, save_path=img_path)
        else:
            viz_function(subject_data, save_path=img_path)

        # Verify file was created
        if img_path.exists():
            # Add image to PDF
            img = Image(str(img_path), width=6.5*inch, height=5.2*inch)
            elements.append(img)
            print(f"    ✅ Added visualization: {img_filename}")
        else:
            print(f"    ❌ Failed to create visualization: {img_filename}")
            # Add placeholder text
            placeholder = Paragraph(
                f"[Visualization: {section_title}]", styles['ReportBody'])
            elements.append(placeholder)

    except Exception as e:
        print(f"    ❌ Error creating visualization {viz_name}: {str(e)}")
        # Add error placeholder
        placeholder = Paragraph(
            f"[Error generating {section_title} visualization]", styles['ReportBody'])
        elements.append(placeholder)

    elements.append(Spacer(1, 15))

    return elements


def generate_individual_report_robust(subject_data, population_data, output_path, styles):
    """Generate complete individual sleep report PDF with robust image handling"""

    subject_id = subject_data.iloc[0]['subject_id']
    study_type = subject_data.iloc[0]['study_type']
    print(f"📄 Generating report for {study_type} Subject {subject_id}...")

    # Create temporary directory for images
    temp_images_dir = REPORTS_PATH / f"temp_images_{study_type}_{subject_id}"
    temp_images_dir.mkdir(exist_ok=True)

    try:
        # Create PDF document
        doc = SimpleDocTemplate(str(output_path),
                                pagesize=REPORT_CONFIG['page_size'],
                                rightMargin=REPORT_CONFIG['margins']['right'],
                                leftMargin=REPORT_CONFIG['margins']['left'],
                                topMargin=REPORT_CONFIG['margins']['top'],
                                bottomMargin=REPORT_CONFIG['margins']['bottom'])

        elements = []

        print(f"    📝 Creating report sections...")

        # 1. Header and Subject Info
        elements.extend(create_report_header(subject_data, styles))
        elements.extend(create_subject_info_section(subject_data, styles))
        print(f"    ✅ Added header and subject info")

        # 2. Executive Summary
        elements.extend(create_executive_summary_section(
            subject_data, population_data, styles))
        elements.append(PageBreak())
        print(f"    ✅ Added executive summary")

        # 3. Sleep Architecture Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Sleep Architecture Analysis",
            create_sleep_architecture_chart, styles, temp_images_dir))

        # 4. EEG Power Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Neurophysiological Analysis - EEG Power Spectrum",
            create_eeg_power_analysis, styles, temp_images_dir))
        elements.append(PageBreak())

        # 5. Signal Quality Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Signal Quality & Physiological Assessment",
            create_signal_quality_analysis, styles, temp_images_dir))

        # 6. Comparative Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Population Comparative Analysis",
            create_comparative_analysis_chart, styles, temp_images_dir))
        elements.append(PageBreak())

        # 7. Clinical Interpretation
        elements.extend(create_clinical_interpretation_section(
            subject_data, population_data, styles))
        print(f"    ✅ Added clinical interpretation")

        # Build PDF
        print(f"    📄 Building PDF document...")
        doc.build(elements)
        print(f"    ✅ PDF generated successfully")

        # Clean up temporary images
        import shutil
        if temp_images_dir.exists():
            shutil.rmtree(temp_images_dir)
            print(f"    🗑️  Cleaned up temporary images")

        return output_path

    except Exception as e:
        print(f"    ❌ Error during PDF generation: {str(e)}")
        # Clean up on error
        import shutil
        if temp_images_dir.exists():
            shutil.rmtree(temp_images_dir)
        raise e


def generate_test_reports_robust(test_subjects_info, population_data, styles):
    """Generate test reports with robust error handling"""

    print(f"\n🧪 Generating Test Reports (Robust Method)...")
    print("=" * 45)

    generated_reports = []

    for study_type, subject_info in test_subjects_info.items():
        subject_id = subject_info['subject_id']
        subject_data = subject_info['data']

        # Create output filename
        study_label = 'SC' if study_type == 'sc_subject' else 'ST'
        output_filename = f"Individual_Sleep_Report_{study_label}_{subject_id}.pdf"
        output_path = REPORTS_PATH / output_filename

        try:
            # Generate report using robust method
            report_path = generate_individual_report_robust(
                subject_data, population_data, output_path, styles)
            generated_reports.append(report_path)

            # Verify file size
            file_size_kb = report_path.stat().st_size / 1024
            print(
                f"✅ {study_label} Subject {subject_id}: {output_filename} ({file_size_kb:.1f} KB)")

        except Exception as e:
            print(
                f"❌ Error generating report for {study_label} Subject {subject_id}: {str(e)}")
            import traceback
            traceback.print_exc()

    return generated_reports


# Run the robust report generation
print("🔍 Pre-flight Check...")

# Verify required components
required_components = [
    ('test_subjects_info', 'Test subjects data'),
    ('df', 'Population dataset'),
    ('REPORT_CONFIG', 'Report configuration'),
    ('create_sleep_architecture_chart', 'Sleep architecture visualization'),
    ('create_eeg_power_analysis', 'EEG power analysis visualization'),
    ('create_signal_quality_analysis', 'Signal quality visualization'),
    ('create_comparative_analysis_chart', 'Comparative analysis visualization')
]

all_ready = True
for component, description in required_components:
    if component in globals():
        print(f"✅ {description} available")
    else:
        print(f"❌ Missing: {description}")
        all_ready = False

if not all_ready:
    print("❌ Cannot proceed - missing required components")
else:
    print(f"\n📊 Test Subjects Summary:")
    print("-" * 30)

    for study_key, subject_info in test_subjects_info.items():
        subject_id = subject_info['subject_id']
        subject_data = subject_info['data']
        n_nights = subject_info['n_nights']
        study_type = 'SC (Healthy)' if study_key == 'sc_subject' else 'ST (Sleep Difficulty)'

        print(f"{study_type} Subject:")
        print(f"  └─ ID: {subject_id}")
        print(f"  └─ Nights: {n_nights}")
        print(f"  └─ Age: {subject_data.iloc[0]['age']}")
        print(f"  └─ Sex: {subject_data.iloc[0]['sex']}")

        if n_nights > 1 and 'condition' in subject_data.columns:
            conditions = subject_data['condition'].dropna().tolist()
            if conditions:
                print(f"  └─ Conditions: {conditions}")

    print(f"\n🎯 Starting Robust Report Generation...")
    print("=" * 45)

    try:
        # Generate reports using robust method
        generated_reports = generate_test_reports_robust(
            test_subjects_info,
            df,  # Population data for comparisons
            REPORT_CONFIG['styles']
        )

        if generated_reports:
            print(f"\n🎉 Report Generation Successful!")
            print(f"📁 Generated {len(generated_reports)} reports:")

            total_size_kb = 0
            for report_path in generated_reports:
                file_size = report_path.stat().st_size / 1024  # Size in KB
                total_size_kb += file_size
                print(f"  └─ {report_path.name} ({file_size:.1f} KB)")

            print(f"\n📂 Reports saved to: {REPORTS_PATH}")

            # Quality assessment
            print(f"\n🔍 Report Quality Assessment:")
            print("-" * 30)

            excellent_reports = 0
            good_reports = 0

            for report_path in generated_reports:
                file_size_kb = report_path.stat().st_size / 1024

                if file_size_kb > 200:  # Lowered threshold for realistic expectations
                    quality_status = "✅ EXCELLENT (with visualizations)"
                    excellent_reports += 1
                elif file_size_kb > 50:
                    quality_status = "✅ GOOD (proper content)"
                    good_reports += 1
                else:
                    quality_status = "⚠️  BASIC (minimal content)"

                print(
                    f"  {report_path.name}: {file_size_kb:.1f} KB - {quality_status}")

            print(f"\n📊 Generation Statistics:")
            print(f"  ├─ Total reports: {len(generated_reports)}")
            print(f"  ├─ Excellent quality: {excellent_reports}")
            print(f"  ├─ Good quality: {good_reports}")
            print(
                f"  ├─ Average size: {total_size_kb/len(generated_reports):.1f} KB")
            print(f"  └─ Total size: {total_size_kb:.1f} KB")

            # Store results
            TEST_GENERATION_RESULTS = {
                'generated_reports': generated_reports,
                'generation_success': True,
                'total_reports': len(generated_reports),
                'reports_path': REPORTS_PATH,
                'average_file_size_kb': total_size_kb / len(generated_reports),
                'excellent_count': excellent_reports,
                'good_count': good_reports
            }

            print(f"\n🎯 Next Steps:")
            print("=" * 15)
            print(f"1. ✅ Open and review the generated PDF reports")
            print(f"2. ✅ Verify all sections and visualizations appear correctly")
            print(f"3. ✅ Check clinical interpretations for accuracy")
            print(f"4. 🚀 If satisfied, proceed to Cell 6 for full-scale generation")

        else:
            print("❌ No reports were generated successfully.")
            TEST_GENERATION_RESULTS = {
                'generated_reports': [],
                'generation_success': False,
                'error_message': "No reports generated"
            }

    except Exception as e:
        print(f"❌ Critical error during report generation: {str(e)}")
        import traceback
        traceback.print_exc()

        TEST_GENERATION_RESULTS = {
            'generated_reports': [],
            'generation_success': False,
            'error_message': str(e)
        }

print(f"\n✅ Cell 5 Complete!")

if TEST_GENERATION_RESULTS.get('generation_success', False):
    print(f"🎉 SUCCESS! Professional individual sleep reports generated")
    print(f"📁 Location: {REPORTS_PATH}")
else:
    print(f"❌ Generation failed. Please review error details above.")

🚀 Executing Test Report Generation (Robust Version)
🔍 Pre-flight Check...
✅ Test subjects data available
✅ Population dataset available
✅ Report configuration available
✅ Sleep architecture visualization available
✅ EEG power analysis visualization available
✅ Signal quality visualization available
✅ Comparative analysis visualization available

📊 Test Subjects Summary:
------------------------------
SC (Healthy) Subject:
  └─ ID: 0
  └─ Nights: 2
  └─ Age: 33
  └─ Sex: F
ST (Sleep Difficulty) Subject:
  └─ ID: 1
  └─ Nights: 2
  └─ Age: 60
  └─ Sex: M
  └─ Conditions: ['temazepam', 'placebo']

🎯 Starting Robust Report Generation...

🧪 Generating Test Reports (Robust Method)...
📄 Generating report for SC Subject 0...
    📝 Creating report sections...
    ✅ Added header and subject info
    ✅ Added executive summary
    ✅ Added visualization: SC_subject_0_sleep_architecture.png
    ✅ Added visualization: SC_subject_0_eeg_power.png
    ✅ Added visualization: SC_subject_0_signal_quality.p

In [17]:
# Cell 6: Full-Scale Report Generation for All 100 Subjects
# Generate individual sleep reports for all unique subjects in the dataset

import time
from datetime import datetime

print("🚀 Full-Scale Individual Report Generation System")
print("=" * 60)


def create_academic_footer(styles):
    """Create academic footer with student information"""

    current_time = datetime.now().strftime('%B %d, %Y at %I:%M %p')

    footer_text = f"""
    <br/><br/>
    <b>Report Analysis and Generation:</b><br/>
    Report Analysed and created by the following students of IIIT Allahabad,<br/>
    Part of Big Data Analytics Course:<br/>
    • Aditya Singh Mertia (IIT2022125) - [iit2022125@iiita.ac.in]<br/>
    • Rishabh Kumar (IIT2022131) - [iit2022131@iiita.ac.in]<br/>
    • Karan Singh (IIT2022132) - [iit2022132@iiita.ac.in]<br/>
    • Tejas Sharma (IIT2022161) - [iit2022161@iiita.ac.in]<br/><br/>
    Report Version: 1.0 | Generated: {current_time}
    """

    footer_paragraph = Paragraph(footer_text, styles['Footer'])
    return footer_paragraph


def generate_individual_report_with_footer(subject_data, population_data, output_path, styles):
    """Generate complete individual sleep report PDF with academic footer"""

    subject_id = subject_data.iloc[0]['subject_id']
    study_type = subject_data.iloc[0]['study_type']

    # Create temporary directory for images
    temp_images_dir = REPORTS_PATH / f"temp_images_{study_type}_{subject_id}"
    temp_images_dir.mkdir(exist_ok=True)

    try:
        # Create PDF document
        doc = SimpleDocTemplate(str(output_path),
                                pagesize=REPORT_CONFIG['page_size'],
                                rightMargin=REPORT_CONFIG['margins']['right'],
                                leftMargin=REPORT_CONFIG['margins']['left'],
                                topMargin=REPORT_CONFIG['margins']['top'],
                                bottomMargin=REPORT_CONFIG['margins']['bottom'])

        elements = []

        # 1. Header and Subject Info
        elements.extend(create_report_header(subject_data, styles))
        elements.extend(create_subject_info_section(subject_data, styles))

        # 2. Executive Summary
        elements.extend(create_executive_summary_section(
            subject_data, population_data, styles))
        elements.append(PageBreak())

        # 3. Sleep Architecture Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Sleep Architecture Analysis",
            create_sleep_architecture_chart, styles, temp_images_dir))

        # 4. EEG Power Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Neurophysiological Analysis - EEG Power Spectrum",
            create_eeg_power_analysis, styles, temp_images_dir))
        elements.append(PageBreak())

        # 5. Signal Quality Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Signal Quality & Physiological Assessment",
            create_signal_quality_analysis, styles, temp_images_dir))

        # 6. Comparative Analysis
        elements.extend(create_visualization_section_robust(
            subject_data, population_data,
            "Population Comparative Analysis",
            create_comparative_analysis_chart, styles, temp_images_dir))
        elements.append(PageBreak())

        # 7. Clinical Interpretation
        elements.extend(create_clinical_interpretation_section(
            subject_data, population_data, styles))

        # 8. Academic Footer
        elements.append(Spacer(1, 30))
        elements.append(create_academic_footer(styles))

        # Build PDF
        doc.build(elements)

        # Clean up temporary images
        import shutil
        if temp_images_dir.exists():
            shutil.rmtree(temp_images_dir)

        return output_path

    except Exception as e:
        # Clean up on error
        import shutil
        if temp_images_dir.exists():
            shutil.rmtree(temp_images_dir)
        raise e


def get_all_unique_subjects(df):
    """Get all unique subjects from the dataset"""

    print("🔍 Identifying all unique subjects...")

    # Get unique subjects by (study_type, subject_id)
    unique_subjects = df.drop_duplicates(['study_type', 'subject_id'])

    subjects_list = []

    for _, subject_row in unique_subjects.iterrows():
        study_type = subject_row['study_type']
        subject_id = subject_row['subject_id']

        # Get all recordings for this subject
        subject_data = df[(df['study_type'] == study_type) &
                          (df['subject_id'] == subject_id)]

        subjects_list.append({
            'study_type': study_type,
            'subject_id': subject_id,
            'data': subject_data,
            'n_nights': len(subject_data),
            'age': subject_data.iloc[0]['age'],
            'sex': subject_data.iloc[0]['sex']
        })

    # Sort subjects for organized processing
    subjects_list.sort(key=lambda x: (x['study_type'], x['subject_id']))

    print(f"✅ Found {len(subjects_list)} unique subjects:")

    sc_count = len([s for s in subjects_list if s['study_type'] == 'SC'])
    st_count = len([s for s in subjects_list if s['study_type'] == 'ST'])

    print(f"    ├─ SC (Healthy): {sc_count} subjects")
    print(f"    └─ ST (Sleep Difficulty): {st_count} subjects")

    return subjects_list


def generate_all_individual_reports(df, styles, batch_size=10):
    """Generate individual reports for all unique subjects"""

    print(f"\n📊 Full-Scale Report Generation")
    print("=" * 40)

    # Get all unique subjects
    all_subjects = get_all_unique_subjects(df)
    total_subjects = len(all_subjects)

    print(f"\n🎯 Processing {total_subjects} subjects...")
    print(f"📁 Output directory: {REPORTS_PATH}")

    # Create summary tracking
    generation_summary = {
        'total_subjects': total_subjects,
        'successful_reports': [],
        'failed_reports': [],
        'start_time': datetime.now(),
        'batch_size': batch_size
    }

    # Process subjects in batches
    for batch_start in range(0, total_subjects, batch_size):
        batch_end = min(batch_start + batch_size, total_subjects)
        current_batch = all_subjects[batch_start:batch_end]

        batch_num = (batch_start // batch_size) + 1
        total_batches = (total_subjects + batch_size - 1) // batch_size

        print(
            f"\n📦 Processing Batch {batch_num}/{total_batches} ({len(current_batch)} subjects)")
        print("-" * 50)

        for i, subject_info in enumerate(current_batch):
            subject_id = subject_info['subject_id']
            study_type = subject_info['study_type']
            subject_data = subject_info['data']

            # Progress indicator
            overall_progress = batch_start + i + 1
            progress_pct = (overall_progress / total_subjects) * 100

            print(f"[{overall_progress:3d}/{total_subjects}] ({progress_pct:5.1f}%) Processing {study_type} Subject {subject_id}...", end=" ")

            # Create output filename
            output_filename = f"Individual_Sleep_Report_{study_type}_{subject_id:03d}.pdf"
            output_path = REPORTS_PATH / output_filename

            try:
                # Generate report
                start_time = time.time()
                report_path = generate_individual_report_with_footer(
                    subject_data, df, output_path, styles)
                generation_time = time.time() - start_time

                # Check file size
                file_size_kb = report_path.stat().st_size / 1024

                # Record success
                generation_summary['successful_reports'].append({
                    'subject_id': subject_id,
                    'study_type': study_type,
                    'filename': output_filename,
                    'file_size_kb': file_size_kb,
                    'generation_time': generation_time,
                    'nights': subject_info['n_nights']
                })

                print(f"✅ {file_size_kb:6.1f}KB ({generation_time:.1f}s)")

            except Exception as e:
                # Record failure
                generation_summary['failed_reports'].append({
                    'subject_id': subject_id,
                    'study_type': study_type,
                    'error': str(e)
                })

                print(f"❌ ERROR: {str(e)[:50]}...")

        # Batch completion summary
        successful_in_batch = len([r for r in generation_summary['successful_reports'] if r in [
                                  {'subject_id': s['subject_id'], 'study_type': s['study_type']} for s in current_batch]])
        print(f"📊 Batch {batch_num} Complete: {len(current_batch)} processed")

        # Memory cleanup between batches
        import gc
        gc.collect()

    # Final processing summary
    generation_summary['end_time'] = datetime.now()
    generation_summary['total_time'] = generation_summary['end_time'] - \
        generation_summary['start_time']

    return generation_summary


def display_generation_summary(summary):
    """Display comprehensive generation summary"""

    print(f"\n🎉 FULL-SCALE REPORT GENERATION COMPLETE!")
    print("=" * 55)

    successful = len(summary['successful_reports'])
    failed = len(summary['failed_reports'])
    total = summary['total_subjects']
    success_rate = (successful / total) * 100 if total > 0 else 0

    print(f"📊 Generation Statistics:")
    print(f"    ├─ Total Subjects: {total}")
    print(f"    ├─ Successful Reports: {successful}")
    print(f"    ├─ Failed Reports: {failed}")
    print(f"    ├─ Success Rate: {success_rate:.1f}%")
    print(f"    └─ Total Time: {summary['total_time']}")

    if summary['successful_reports']:
        # File size statistics
        sizes = [r['file_size_kb'] for r in summary['successful_reports']]
        avg_size = sum(sizes) / len(sizes)
        total_size_mb = sum(sizes) / 1024

        # Generation time statistics
        times = [r['generation_time'] for r in summary['successful_reports']]
        avg_time = sum(times) / len(times)

        print(f"\n📁 File Statistics:")
        print(f"    ├─ Average Size: {avg_size:.1f} KB")
        print(f"    ├─ Total Size: {total_size_mb:.1f} MB")
        print(f"    ├─ Largest File: {max(sizes):.1f} KB")
        print(f"    ├─ Smallest File: {min(sizes):.1f} KB")
        print(f"    └─ Average Generation Time: {avg_time:.1f} seconds")

        # Study type breakdown
        sc_reports = [r for r in summary['successful_reports']
                      if r['study_type'] == 'SC']
        st_reports = [r for r in summary['successful_reports']
                      if r['study_type'] == 'ST']

        print(f"\n🏥 Study Type Breakdown:")
        print(f"    ├─ SC (Healthy) Reports: {len(sc_reports)}")
        print(f"    └─ ST (Sleep Difficulty) Reports: {len(st_reports)}")

        # Quality assessment
        excellent = len(
            [r for r in summary['successful_reports'] if r['file_size_kb'] > 200])
        good = len([r for r in summary['successful_reports']
                   if 50 < r['file_size_kb'] <= 200])
        basic = len([r for r in summary['successful_reports']
                    if r['file_size_kb'] <= 50])

        print(f"\n⭐ Quality Assessment:")
        print(f"    ├─ Excellent (>200KB): {excellent} reports")
        print(f"    ├─ Good (50-200KB): {good} reports")
        print(f"    └─ Basic (<50KB): {basic} reports")

    if summary['failed_reports']:
        print(f"\n❌ Failed Reports:")
        for failure in summary['failed_reports'][:5]:  # Show first 5 failures
            print(
                f"    ├─ {failure['study_type']} Subject {failure['subject_id']}: {failure['error'][:50]}...")
        if len(summary['failed_reports']) > 5:
            print(
                f"    └─ ... and {len(summary['failed_reports']) - 5} more failures")

    print(f"\n📂 All reports saved to: {REPORTS_PATH}")
    print(f"🎓 Academic footer included in all reports with student information")


# Execute full-scale generation
if __name__ == "__main__" or True:  # Always run when cell is executed

    print("🔍 Pre-execution Verification...")

    # Check required components
    required_items = [
        ('df', 'Population dataset'),
        ('REPORT_CONFIG', 'Report configuration'),
        ('REPORTS_PATH', 'Output directory'),
        ('create_sleep_architecture_chart', 'Visualization functions')
    ]

    all_ready = True
    for item, description in required_items:
        if item in globals():
            print(f"✅ {description}")
        else:
            print(f"❌ Missing: {description}")
            all_ready = False

    if all_ready:
        # Confirm with user before starting
        print(f"\n⚠️  ABOUT TO GENERATE ~100 INDIVIDUAL SLEEP REPORTS")
        print(f"📊 This will create professional PDF reports for all unique subjects")
        print(f"📁 Output location: {REPORTS_PATH}")
        print(f"⏱️  Estimated time: 10-20 minutes")
        print(f"💾 Estimated disk space: 50-100 MB")

        proceed = input(
            f"\n🚀 Proceed with full-scale generation? (yes/no): ").lower().strip()

        if proceed in ['yes', 'y']:
            print(f"\n🎯 Starting Full-Scale Report Generation...")

            try:
                # Generate all reports
                generation_summary = generate_all_individual_reports(
                    df,
                    REPORT_CONFIG['styles'],
                    batch_size=10  # Process 10 subjects at a time
                )

                # Display final summary
                display_generation_summary(generation_summary)

                print(f"\n✅ MISSION ACCOMPLISHED!")
                print(
                    f"🎓 All individual sleep reports generated with academic attribution")

            except KeyboardInterrupt:
                print(f"\n⚠️  Generation interrupted by user")
                print(f"📁 Partial results may be available in: {REPORTS_PATH}")

            except Exception as e:
                print(f"\n❌ Critical error during generation: {str(e)}")
                import traceback
                traceback.print_exc()
        else:
            print(f"⏹️  Generation cancelled by user")

    else:
        print(f"\n❌ Cannot proceed - missing required components")
        print(f"Please ensure all previous cells have been run successfully")

print(f"\n✅ Cell 6 Complete!")
print(f"🎓 Individual sleep reports with academic attribution ready!")

🚀 Full-Scale Individual Report Generation System
🔍 Pre-execution Verification...
✅ Population dataset
✅ Report configuration
✅ Output directory
✅ Visualization functions

⚠️  ABOUT TO GENERATE ~100 INDIVIDUAL SLEEP REPORTS
📊 This will create professional PDF reports for all unique subjects
📁 Output location: /Users/rishabh/Documents/BDA-course/HDA3/notebooks/results/sleep-edf/individual_reports
⏱️  Estimated time: 10-20 minutes
💾 Estimated disk space: 50-100 MB

🎯 Starting Full-Scale Report Generation...

📊 Full-Scale Report Generation
🔍 Identifying all unique subjects...
✅ Found 100 unique subjects:
    ├─ SC (Healthy): 78 subjects
    └─ ST (Sleep Difficulty): 22 subjects

🎯 Processing 100 subjects...
📁 Output directory: /Users/rishabh/Documents/BDA-course/HDA3/notebooks/results/sleep-edf/individual_reports

📦 Processing Batch 1/10 (10 subjects)
--------------------------------------------------
[  1/100] (  1.0%) Processing SC Subject 0...     ✅ Added visualization: SC_subject_0_sle