In [11]:
import pandas as pd
import json
import os
from datetime import datetime

print("LAYER 4: METRICS CALCULATION")
print("=" * 60)

class MetricsCalculator:
    def __init__(self):
        # Input: Accepted data from Layer 3
        self.input_file = "reports/accepted_data.csv"
        
        # Fallback: Use standardized data if validation wasn't run
        if not os.path.exists(self.input_file):
            print("No validation output found, using standardized data")
            self.input_file = "data_standardized/all_standardized.csv"
        
        # Load data
        self.df = pd.read_csv(self.input_file)
        print(f"Loaded {len(self.df)} records for metrics calculation")
        
        # Output directories
        self.metrics_dir = "metrics"
        self.reports_dir = "reports"
        os.makedirs(self.metrics_dir, exist_ok=True)
        os.makedirs(self.reports_dir, exist_ok=True)
        
        # Metadata
        self.calculation_timestamp = datetime.now().isoformat()
    
    def calculate_monthly_metrics(self):
        "Calculate metrics aggregated by month."
        print("\n Calculating monthly metrics")
        
        monthly = self.df.groupby('reporting_month').agg({
            'households_supported': 'sum',
            'amount_disbursed_usd': 'sum',
            'partner_id': 'nunique'
        }).reset_index()
        
        monthly.columns = ['month', 'total_households', 'total_amount_usd', 'unique_partners']
        
        # Add derived metrics
        monthly['avg_households_per_partner'] = monthly['total_households'] / monthly['unique_partners']
        monthly['avg_amount_per_household'] = monthly['total_amount_usd'] / monthly['total_households']
        
        # Round values
        monthly['avg_households_per_partner'] = monthly['avg_households_per_partner'].round(1)
        monthly['avg_amount_per_household'] = monthly['avg_amount_per_household'].round(2)
        
        return monthly
    
    def calculate_partner_metrics(self):
        "Calculate metrics by partner"
        print("Calculating partner metrics")
        
        partner = self.df.groupby('partner_id').agg({
            'households_supported': 'sum',
            'amount_disbursed_usd': 'sum',
            'reporting_month': 'nunique'
        }).reset_index()
        
        partner.columns = ['partner_id', 'total_households', 'total_amount_usd', 'months_reported']
        
        # Add derived metrics
        partner['avg_monthly_households'] = partner['total_households'] / partner['months_reported']
        partner['avg_monthly_amount'] = partner['total_amount_usd'] / partner['months_reported']
        
        # Round values
        partner['avg_monthly_households'] = partner['avg_monthly_households'].round(1)
        partner['avg_monthly_amount'] = partner['avg_monthly_amount'].round(2)
        
        return partner
    
    def calculate_regional_metrics(self):
        "Calculate metrics by region"
        print("Calculating regional metrics")
        
        regional = self.df.groupby('region').agg({
            'households_supported': 'sum',
            'amount_disbursed_usd': 'sum',
            'partner_id': 'nunique'
        }).reset_index()
        
        regional.columns = ['region', 'total_households', 'total_amount_usd', 'unique_partners']
        
        # Add derived metrics
        regional['percentage_of_total_households'] = (regional['total_households'] / regional['total_households'].sum() * 100).round(1)
        regional['percentage_of_total_amount'] = (regional['total_amount_usd'] / regional['total_amount_usd'].sum() * 100).round(1)
        
        return regional
    
    def calculate_cross_tab_metrics(self):
        "Calculate partner x region metrics"
        print("Calculating partner-region cross tab")
        
        cross = self.df.groupby(['partner_id', 'region']).agg({
            'households_supported': 'sum',
            'amount_disbursed_usd': 'sum'
        }).reset_index()
        
        cross.columns = ['partner_id', 'region', 'households', 'amount_usd']
        
        return cross
    
    def calculate_summary_statistics(self):
        "Calculate overall summary statistics"
        print("Calculating summary statistics")
        
        summary = {
            'total_households_supported': int(self.df['households_supported'].sum()),
            'total_amount_disbursed_usd': float(self.df['amount_disbursed_usd'].sum().round(2)),
            'average_households_per_month': float((self.df['households_supported'].sum() / self.df['reporting_month'].nunique()).round(1)),
            'average_amount_per_month': float((self.df['amount_disbursed_usd'].sum() / self.df['reporting_month'].nunique()).round(2)),
            'average_amount_per_household': float((self.df['amount_disbursed_usd'].sum() / self.df['households_supported'].sum()).round(2)),
            'unique_partners': int(self.df['partner_id'].nunique()),
            'unique_regions': int(self.df['region'].nunique()),
            'reporting_months': int(self.df['reporting_month'].nunique()),
            'total_records': int(len(self.df)),
            'calculation_timestamp': self.calculation_timestamp,
            'data_source': self.input_file
        }
        
        return summary
    
    def generate_metrics_report(self, monthly, partner, regional, cross, summary):
        "Generate comprehensive metrics report"
        print("\n Generating metrics reports")
        
        # Save individual metric files
        monthly.to_csv(f"{self.metrics_dir}/monthly_metrics.csv", index=False)
        print(f"Monthly metrics: {len(monthly)} months ‚Üí metrics/monthly_metrics.csv")
        
        partner.to_csv(f"{self.metrics_dir}/partner_metrics.csv", index=False)
        print(f"Partner metrics: {len(partner)} partners ‚Üí metrics/partner_metrics.csv")
        
        regional.to_csv(f"{self.metrics_dir}/regional_metrics.csv", index=False)
        print(f"Regional metrics: {len(regional)} regions ‚Üí metrics/regional_metrics.csv")
        
        cross.to_csv(f"{self.metrics_dir}/partner_region_cross.csv", index=False)
        print(f"Partner-region cross tab ‚Üí metrics/partner_region_cross.csv")
        
        # Save summary as JSON
        summary_path = f"{self.metrics_dir}/summary_statistics.json"
        with open(summary_path, 'w') as f:
            json.dump(summary, f, indent=2)
        print(f"Summary statistics ‚Üí {summary_path}")
        
        # Create a combined report for donors
        self.create_donor_report(monthly, partner, regional, summary)
        
        return summary
    
    def create_donor_report(self, monthly, partner, regional, summary):
        "Create a simple donor-friendly report"
        print("\n Creating donor summary report")
        
        donor_report = {
            'report_generated': datetime.now().strftime("%Y-%m-%d %H:%M"),
            'program_overview': {
                'total_households_reached': summary['total_households_supported'],
                'total_funds_disbursed_usd': summary['total_amount_disbursed_usd'],
                'average_support_per_household_usd': summary['average_amount_per_household'],
                'partners_engaged': summary['unique_partners'],
                'regions_covered': summary['unique_regions']
            },
            'monthly_performance': monthly.to_dict('records'),
            'partner_contributions': partner.to_dict('records'),
            'regional_distribution': regional.to_dict('records'),
            'data_quality_note': f"Based on {summary['total_records']} validated data points"
        }
        
        donor_path = f"{self.reports_dir}/donor_summary_report.json"
        with open(donor_path, 'w') as f:
            json.dump(donor_report, f, indent=2)
        
        print(f"Donor report ‚Üí {donor_path}")
        
        # Also create a simple text summary
        text_summary = f"""
IMPACT REPORT - GENERATED {datetime.now().strftime("%Y-%m-%d")}
==========================================================
OVERVIEW
‚Ä¢ Households Reached: {summary['total_households_supported']:,}
‚Ä¢ Total Funds Disbursed: ${summary['total_amount_disbursed_usd']:,.2f}
‚Ä¢ Average per Household: ${summary['average_amount_per_household']:,.2f}
‚Ä¢ Partners Engaged: {summary['unique_partners']}
‚Ä¢ Regions Covered: {summary['unique_regions']}

MONTHLY TREND
"""
        for _, row in monthly.iterrows():
            text_summary += f"‚Ä¢ {row['month']}: {row['total_households']:,} households (${row['total_amount_usd']:,.2f})\n"
        
        text_summary += f"""
REGIONAL DISTRIBUTION
"""
        for _, row in regional.iterrows():
            text_summary += f"‚Ä¢ {row['region']}: {row['total_households']:,} households ({row['percentage_of_total_households']}%)\n"
        
        text_summary += f"""
DATA QUALITY
‚Ä¢ Based on {summary['total_records']} validated records
‚Ä¢ Calculation timestamp: {summary['calculation_timestamp']}
‚Ä¢ Raw data preserved for audit trail
"""
        
        text_path = f"{self.reports_dir}/donor_summary.txt"
        with open(text_path, 'w') as f:
            f.write(text_summary)
        
        print(f"üìù Text summary ‚Üí {text_path}")
    
    def run(self):
        "Main execution method"
        print(f"\n Starting metrics calculation on {len(self.df)} records")
        
        # Calculate all metrics
        monthly = self.calculate_monthly_metrics()
        partner = self.calculate_partner_metrics()
        regional = self.calculate_regional_metrics()
        cross = self.calculate_cross_tab_metrics()
        summary = self.calculate_summary_statistics()
        
        # Generate reports
        self.generate_metrics_report(monthly, partner, regional, cross, summary)
        
        # Print key insights
        print("\n" + "=" * 60)
        print("KEY INSIGHTS")
        print("=" * 60)
        print(f"Total Households Supported: {summary['total_households_supported']:,}")
        print(f"Total Amount Disbursed: ${summary['total_amount_disbursed_usd']:,.2f}")
        print(f"Average per Household: ${summary['average_amount_per_household']:,.2f}")
        print(f"Partners Reporting: {summary['unique_partners']}")
        print(f"Regions Covered: {summary['unique_regions']}")
        
        # Show monthly trend
        print(f"\n Monthly Trend (Latest Month):")
        latest_month = monthly.iloc[-1]
        print(f"   {latest_month['month']}: {latest_month['total_households']:,} households")
        print(f"   ${latest_month['total_amount_usd']:,.2f} disbursed")
        print(f"   {latest_month['unique_partners']} partners active")
        
        # Show top region
        top_region = regional.loc[regional['total_households'].idxmax()]
        print(f"\n Top Performing Region:")
        print(f"   {top_region['region']}: {top_region['total_households']:,} households")
        print(f"   {top_region['percentage_of_total_households']}% of total")
        
        return summary

if __name__ == "__main__":
    calculator = MetricsCalculator()
    calculator.run()
    
    print("\n" + "=" * 60)
    print("Layer 4: Metrics Calculation Complete!")
    print("Next: Layer 5 - Dissemination")

LAYER 4: METRICS CALCULATION
Loaded 18 records for metrics calculation

 Starting metrics calculation on 18 records

 Calculating monthly metrics
Calculating partner metrics
Calculating regional metrics
Calculating partner-region cross tab
Calculating summary statistics

 Generating metrics reports
Monthly metrics: 3 months ‚Üí metrics/monthly_metrics.csv
Partner metrics: 8 partners ‚Üí metrics/partner_metrics.csv
Regional metrics: 5 regions ‚Üí metrics/regional_metrics.csv
Partner-region cross tab ‚Üí metrics/partner_region_cross.csv
Summary statistics ‚Üí metrics/summary_statistics.json

 Creating donor summary report
Donor report ‚Üí reports/donor_summary_report.json
üìù Text summary ‚Üí reports/donor_summary.txt

KEY INSIGHTS
Total Households Supported: 2,517
Total Amount Disbursed: $125,850.00
Average per Household: $50.00
Partners Reporting: 8
Regions Covered: 5

 Monthly Trend (Latest Month):
   2024-03-01: 525 households
   $26,250.00 disbursed
   3 partners active

 Top Perfo