In [None]:
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
from datetime import datetime
from fpdf import FPDF  # We'll use this for PDF reports

print("LAYER 5: DISSEMINATION - AUTOMATED REPORTING")
print("=" * 60)

class ReportGenerator:
    def __init__(self):
        self.metrics_dir = "metrics"
        self.reports_dir = "reports"
        self.output_dir = "outputs"
        
        # Create output directories
        os.makedirs(self.output_dir, exist_ok=True)
        os.makedirs(f"{self.output_dir}/visualizations", exist_ok=True)
        
        # Load metrics data
        self.monthly = pd.read_csv(f"{self.metrics_dir}/monthly_metrics.csv")
        self.partner = pd.read_csv(f"{self.metrics_dir}/partner_metrics.csv")
        self.regional = pd.read_csv(f"{self.metrics_dir}/regional_metrics.csv")
        
        # Load summary
        with open(f"{self.metrics_dir}/summary_statistics.json", 'r') as f:
            self.summary = json.load(f)
        
        print(f"Loaded metrics data:")
        print(f"   • {len(self.monthly)} months of data")
        print(f"   • {len(self.partner)} partners")
        print(f"   • {len(self.regional)} regions")
    
    def generate_text_report(self):
        "Generate a comprehensive text report"
        print("\n Generating text report")
        
        report = f"""
IMPACT REPORTING SYSTEM - AUTOMATED OUTPUT
==========================================================
Report Generated: {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}
Data Source: {self.summary.get('data_source', 'Standardized Data')}

EXECUTIVE SUMMARY
==========================================================
• Total Households Reached: {self.summary['total_households_supported']:,}
• Total Funds Disbursed: ${self.summary['total_amount_disbursed_usd']:,.2f}
• Average Support per Household: ${self.summary['average_amount_per_household']:,.2f}
• Partners Engaged: {self.summary['unique_partners']}
• Regions Covered: {self.summary['unique_regions']}
• Reporting Period: {self.summary['reporting_months']} months

MONTHLY PERFORMANCE
==========================================================
"""
        for _, row in self.monthly.iterrows():
            report += f"• {row['month']}: {row['total_households']:,} households | "
            report += f"${row['total_amount_usd']:,.2f} | "
            report += f"{row['unique_partners']} partners active\n"
        
        report += f"""
PARTNER CONTRIBUTIONS
==========================================================
"""
        for _, row in self.partner.iterrows():
            report += f"• {row['partner_id']}: {row['total_households']:,} households | "
            report += f"${row['total_amount_usd']:,.2f} | "
            report += f"Avg: {row['avg_monthly_households']:,.1f} households/month\n"
        
        report += f"""
REGIONAL DISTRIBUTION
==========================================================
"""
        for _, row in self.regional.iterrows():
            report += f"• {row['region']}: {row['total_households']:,} households "
            report += f"({row['percentage_of_total_households']}%) | "
            report += f"${row['total_amount_usd']:,.2f}\n"
        
        report += f"""
DATA QUALITY & AUDIT TRAIL
==========================================================
• Total Validated Records: {self.summary['total_records']}
• Calculation Timestamp: {self.summary['calculation_timestamp']}
• Raw Data Preserved: Yes (immutable storage)
• Validation Rules Applied: Yes (Layer 3)
• Standardization Documented: Yes (mapping rules)

SYSTEM ARCHITECTURE
==========================================================
1. INGESTION: Raw partner data preserved
2. STANDARDIZATION: Unified schema applied
3. VALIDATION: Business rules as code
4. METRICS: Traceable calculations
5. DISSEMINATION: Automated reporting (this document)

NOTES
==========================================================
This report was automatically generated by the 5-Layer
Audit-Ready Impact Reporting System. All metrics can be
traced back to source data through the audit trail.
"""
        
        # Save report
        report_path = f"{self.output_dir}/impact_report_{datetime.now().strftime('%Y%m%d')}.txt"
        with open(report_path, 'w') as f:
            f.write(report)
        
        print(f"Text report saved: {report_path}")
        return report_path
    
    def generate_visualizations(self):
        "Generate charts and visualizations"
        print("\n Generating visualizations")
        
        # Set style
        plt.style.use('seaborn-v0_8-darkgrid')
        
        # 1. Monthly Households Chart
        fig1, ax1 = plt.subplots(figsize=(10, 6))
        ax1.bar(self.monthly['month'], self.monthly['total_households'], color='skyblue')
        ax1.set_title('Households Supported by Month', fontsize=14, fontweight='bold')
        ax1.set_xlabel('Month')
        ax1.set_ylabel('Number of Households')
        ax1.tick_params(axis='x', rotation=45)
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/visualizations/monthly_households.png", dpi=300, bbox_inches='tight')
        plt.close()
        
        # 2. Regional Distribution Chart
        fig2, ax2 = plt.subplots(figsize=(10, 6))
        colors = ['#ff9999', '#66b3ff', '#99ff99', '#ffcc99', '#c2c2f0']
        ax2.pie(self.regional['total_households'], 
                labels=self.regional['region'], 
                colors=colors[:len(self.regional)],
                autopct='%1.1f%%',
                startangle=90)
        ax2.set_title('Households by Region', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/visualizations/regional_distribution.png", dpi=300, bbox_inches='tight')
        plt.close()
        
        # 3. Partner Contribution Chart
        fig3, ax3 = plt.subplots(figsize=(12, 6))
        ax3.bar(self.partner['partner_id'], self.partner['total_households'], color='lightgreen')
        ax3.set_title('Households Supported by Partner', fontsize=14, fontweight='bold')
        ax3.set_xlabel('Partner ID')
        ax3.set_ylabel('Number of Households')
        ax3.tick_params(axis='x', rotation=45)
        plt.tight_layout()
        plt.savefig(f"{self.output_dir}/visualizations/partner_contributions.png", dpi=300, bbox_inches='tight')
        plt.close()
        
        print(f"Visualizations saved to: {self.output_dir}/visualizations/")
    
    def generate_csv_outputs(self):
        "Generate CSV files for data sharing"
        print("\n Generating CSV outputs")
        
        # Create a donor-ready summary CSV
        donor_summary = pd.DataFrame({
            'metric': [
                'Total Households Supported',
                'Total Amount Disbursed (USD)',
                'Average per Household (USD)',
                'Partners Engaged',
                'Regions Covered',
                'Reporting Months'
            ],
            'value': [
                self.summary['total_households_supported'],
                f"${self.summary['total_amount_disbursed_usd']:,.2f}",
                f"${self.summary['average_amount_per_household']:,.2f}",
                self.summary['unique_partners'],
                self.summary['unique_regions'],
                self.summary['reporting_months']
            ],
            'calculation_date': [datetime.now().strftime("%Y-%m-%d")] * 6
        })
        
        # Save all CSV files
        donor_summary.to_csv(f"{self.output_dir}/donor_summary.csv", index=False)
        self.monthly.to_csv(f"{self.output_dir}/monthly_detailed.csv", index=False)
        self.partner.to_csv(f"{self.output_dir}/partner_detailed.csv", index=False)
        self.regional.to_csv(f"{self.output_dir}/regional_detailed.csv", index=False)
        
        print(f"CSV outputs saved to: {self.output_dir}/")
    
    def generate_pdf_report(self):
        "Generate a PDF report (optional - requires fpdf)"
        try:
            print("\n Generating PDF report")
            
            pdf = FPDF()
            pdf.add_page()
            
            # Title
            pdf.set_font('Arial', 'B', 16)
            pdf.cell(0, 10, 'Impact Reporting System - Automated Report', 0, 1, 'C')
            pdf.ln(5)
            
            # Metadata
            pdf.set_font('Arial', '', 10)
            pdf.cell(0, 10, f'Generated: {datetime.now().strftime("%Y-%m-%d %H:%M")}', 0, 1)
            pdf.cell(0, 10, f'Data Source: {self.summary.get("data_source", "Standardized Data")}', 0, 1)
            pdf.ln(10)
            
            # Summary
            pdf.set_font('Arial', 'B', 12)
            pdf.cell(0, 10, 'Executive Summary', 0, 1)
            pdf.set_font('Arial', '', 10)
            
            summary_text = f"""
Total Households Reached: {self.summary['total_households_supported']:,}
Total Funds Disbursed: ${self.summary['total_amount_disbursed_usd']:,.2f}
Average Support per Household: ${self.summary['average_amount_per_household']:,.2f}
Partners Engaged: {self.summary['unique_partners']}
Regions Covered: {self.summary['unique_regions']}
            """
            
            pdf.multi_cell(0, 8, summary_text)
            pdf.ln(10)
            
            # Monthly Data Table
            pdf.set_font('Arial', 'B', 12)
            pdf.cell(0, 10, 'Monthly Performance', 0, 1)
            
            # Create table header
            pdf.set_font('Arial', 'B', 10)
            pdf.cell(60, 10, 'Month', 1)
            pdf.cell(40, 10, 'Households', 1)
            pdf.cell(40, 10, 'Amount (USD)', 1)
            pdf.cell(30, 10, 'Partners', 1)
            pdf.ln()
            
            # Table rows
            pdf.set_font('Arial', '', 10)
            for _, row in self.monthly.iterrows():
                pdf.cell(60, 10, row['month'], 1)
                pdf.cell(40, 10, f"{row['total_households']:,}", 1)
                pdf.cell(40, 10, f"${row['total_amount_usd']:,.2f}", 1)
                pdf.cell(30, 10, str(row['unique_partners']), 1)
                pdf.ln()
            
            pdf.ln(10)
            
            # Footer
            pdf.set_font('Arial', 'I', 8)
            pdf.cell(0, 10, 'Automatically generated by Audit-Ready Impact Reporting System', 0, 1, 'C')
            
            pdf_path = f"{self.output_dir}/impact_report_{datetime.now().strftime('%Y%m%d_%H%M')}.pdf"
            pdf.output(pdf_path)
            
            print(f"PDF report saved: {pdf_path}")
            
        except ImportError:
            print("FPDF not installed. Skipping PDF generation.")
            print("Install with: pip install fpdf")
            return None
    
    def generate_audit_trail(self):
        "Generate audit trail documentation"
        print("\n Generating audit trail")
        
        audit_trail = {
            'system_name': '5-Layer Audit-Ready Impact Reporting',
            'report_generation_date': datetime.now().isoformat(),
            'data_pipeline_stages': [
                {
                    'stage': 'Layer 1 - Ingestion',
                    'status': 'Complete',
                    'output': 'Raw data preserved in data_raw/',
                    'audit_note': 'Immutable storage, no modifications'
                },
                {
                    'stage': 'Layer 2 - Standardization',
                    'status': 'Complete',
                    'output': 'Standardized data in data_standardized/',
                    'audit_note': 'Mapping rules documented in JSON'
                },
                {
                    'stage': 'Layer 3 - Validation',
                    'status': 'Complete',
                    'output': 'Accepted/Rejected data in reports/',
                    'audit_note': 'Business rules as code, traceable rejections'
                },
                {
                    'stage': 'Layer 4 - Metrics',
                    'status': 'Complete',
                    'output': 'Calculated metrics in metrics/',
                    'audit_note': 'Traceable calculations, no black boxes'
                },
                {
                    'stage': 'Layer 5 - Dissemination',
                    'status': 'Complete',
                    'output': 'Automated reports in outputs/',
                    'audit_note': 'No manual assembly, reproducible'
                }
            ],
            'key_metrics': self.summary,
            'file_manifest': self.get_file_manifest(),
            'system_integrity_check': self.integrity_check()
        }
        
        audit_path = f"{self.output_dir}/audit_trail_{datetime.now().strftime('%Y%m%d')}.json"
        with open(audit_path, 'w') as f:
            json.dump(audit_trail, f, indent=2)
        
        print(f" Audit trail saved: {audit_path}")
        return audit_trail
    
    def get_file_manifest(self):
        "Get list of all files in the project"
        manifest = []
        for root, dirs, files in os.walk('..'):
            # Skip hidden directories
            dirs[:] = [d for d in dirs if not d.startswith('.')]
            
            for file in files:
                if not file.startswith('.'):  # Skip hidden files
                    filepath = os.path.join(root, file)
                    size = os.path.getsize(filepath) if os.path.exists(filepath) else 0
                    manifest.append({
                        'path': filepath.replace('..', ''),
                        'size_bytes': size,
                        'modified': datetime.fromtimestamp(os.path.getmtime(filepath)).isoformat() if os.path.exists(filepath) else ''
                    })
        return manifest
    
    def integrity_check(self):
        "Perform system integrity check"
        checks = {
            'raw_data_preserved': os.path.exists('../data_raw/') and len(os.listdir('../data_raw/')) > 0,
            'standardized_data_exists': os.path.exists('../data_standardized/') and len(os.listdir('../data_standardized/')) > 0,
            'metrics_calculated': os.path.exists('../metrics/') and len(os.listdir('../metrics/')) > 0,
            'validation_performed': os.path.exists('../reports/') and len(os.listdir('../reports/')) > 0,
            'all_layers_complete': True  # Will be updated based on other checks
        }
        
        checks['all_layers_complete'] = all(checks.values())
        checks['timestamp'] = datetime.now().isoformat()
        
        return checks
    
    def run(self):
        "Main execution method"
        print(f"\n Starting Layer 5: Dissemination")
        
        # Generate all outputs
        text_report = self.generate_text_report()
        self.generate_visualizations()
        self.generate_csv_outputs()
        pdf_report = self.generate_pdf_report()
        audit_trail = self.generate_audit_trail()
        
        # Summary
        print("\n" + "=" * 60)
        print("LAYER 5 COMPLETE - OUTPUTS GENERATED")
        print("=" * 60)
        print(f"Output Directory: {self.output_dir}/")
        print(f"impact_report_*.txt    - Comprehensive text report")
        print(f"visualizations/        - Charts and graphs")
        print(f"donor_summary.csv      - Donor-ready summary")
        print(f"*_detailed.csv         - Detailed data files")
        if pdf_report:
            print(f"impact_report_*.pdf   - Formal PDF report")
        print(f"audit_trail_*.json    - Complete audit trail")
        
        print(f"\n SYSTEM STATUS:")
        integrity = self.integrity_check()
        for check, status in integrity.items():
            if check != 'timestamp':
                symbol = ">>" if status else "<<"
                print(f"   {symbol} {check.replace('_', ' ').title()}")
        
        print(f"\n KEY METRICS REPORTED:")
        print(f"   • Households: {self.summary['total_households_supported']:,}")
        print(f"   • Amount: ${self.summary['total_amount_disbursed_usd']:,.2f}")
        print(f"   • Partners: {self.summary['unique_partners']}")
        print(f"   • Regions: {self.summary['unique_regions']}")
        
        print(f"\n PIPELINE COMPLETE!")
        print("   All 5 layers executed successfully.")
        print("   Reports are now automated and reproducible.")

if __name__ == "__main__":
    # Check if metrics exist first
    if not os.path.exists("metrics/"):
        print("Metrics not found! Run Layer 4 first.")
        print("Run: python calculate_metrics_fixed.py")
    else:
        generator = ReportGenerator()
        generator.run()
        
        print("\n" + "=" * 60)
        print("5-LAYER PIPELINE COMPLETE!")
        print("=" * 60)
        print("Layer 1: Ingestion - Raw data preserved")
        print("Layer 2: Standardization - Unified format")
        print("Layer 3: Validation - Quality controlled")
        print("Layer 4: Metrics - Calculated indicators")
        print("Layer 5: Dissemination - Automated reports")
        print("\n Project: Audit-Ready Impact Reporting System")
        print("   Status: OPERATIONAL")

In [15]:
pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'done'
  Preparing metadata (pyproject.toml): started
  Preparing metadata (pyproject.toml): finished with status 'done'
Building wheels for collected packages: fpdf
  Building wheel for fpdf (pyproject.toml): started
  Building wheel for fpdf (pyproject.toml): finished with status 'done'
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40770 sha256=3fdb2d5cf7ce50dd1822a0c9737fb4080f547628ac6dc8178cf197f7cdeacaa7
  Stored in directory: c:\users\user\appdata\local\pip\cache\wheels\aa\da\11\a3189f34ddc13c26a2d0f329eac46b728c7f31c39e4dc26243
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2
Note: you may need to restart the kernel to use updated packages.
