In [None]:
import streamlit as st
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import io

# Page configuration
st.set_page_config(
    page_title="PMTCT Dashboard",
    page_icon="üè•",
    layout="wide",
    initial_sidebar_state="expanded"
)

# Custom CSS for better styling
st.markdown("""
<style>
    .main-header {
        font-size: 2.5rem;
        color: #1f77b4;
        text-align: center;
        margin-bottom: 2rem;
    }
    .metric-card {
        background-color: #f0f2f6;
        padding: 1rem;
        border-radius: 10px;
        border-left: 4px solid #1f77b4;
        margin: 0.5rem 0;
    }
    .gauge-container {
        text-align: center;
        margin: 1rem 0;
    }
    .success-text { color: #28a745; }
    .warning-text { color: #ffc107; }
    .danger-text { color: #dc3545; }
</style>
""", unsafe_allow_html=True)

class PMTCTDashboard:
    def __init__(self, data):
        self.data = data
        self.setup_targets()
        self.clean_data()
        
    def clean_data(self):
        """Clean and preprocess the data"""
        # Convert all potential numeric columns to numeric, coercing errors to NaN
        for col in self.data.columns:
            if col not in ['periodid', 'periodname', 'periodcode', 'perioddescription', 
                          'orgunitlevel1', 'orgunitlevel2', 'orgunitlevel3', 
                          'organisationunitid', 'organisationunitname', 'organisationunitcode', 
                          'organisationunitdescription']:
                self.data[col] = pd.to_numeric(self.data[col], errors='coerce')
        
        # Fill NaN values with 0 for analysis
        self.data = self.data.fillna(0)
    
    def setup_targets(self):
        """Define targets for each PMTCT indicator based on historical data or benchmarks"""
        # These targets can be adjusted based on your program goals
        # Using reasonable targets based on typical PMTCT program expectations
        
        # Calculate potential targets based on data distribution (you can modify these)
        total_facilities = len(self.data)
        
        self.targets = {
            'PMTCT_ANC_1 Number of New ANC clients': self.calculate_target('PMTCT_ANC_1 Number of New ANC clients', 0.8),
            'PMTCT_HTS_6 Number of  pregnant women HIV tested and received results ANC': self.calculate_target('PMTCT_HTS_6 Number of  pregnant women HIV tested and received results ANC', 0.85),
            'PMTCT_HTS_7. Number of pregnant women tested HIV positive_ ANC': self.calculate_target('PMTCT_HTS_7. Number of pregnant women tested HIV positive_ ANC', 1.0),
            'PMTCT_ART_15b. Number of HIV positive pregnant women newly started on  ART during ANC  <36wks of pregnancy': self.calculate_target('PMTCT_ART_15b. Number of HIV positive pregnant women newly started on  ART during ANC  <36wks of pregnancy', 1.0),
            'PMTCT_ART_15c. Number of HIV positive pregnant women newly started on  ART during ANC >36wks of pregnancy': self.calculate_target('PMTCT_ART_15c. Number of HIV positive pregnant women newly started on  ART during ANC >36wks of pregnancy', 1.0),
            'PMTCT_HTS_6 Number of  pregnant women HIV tested and received results L&D': self.calculate_target('PMTCT_HTS_6 Number of  pregnant women HIV tested and received results L&D', 0.8),
            'PMTCT_ART_15d. Number of HIV positive pregnant women newly started on  ART during Labour': self.calculate_target('PMTCT_ART_15d. Number of HIV positive pregnant women newly started on  ART during Labour', 1.0),
            'PMTCT_EID_31. Number of Infants born to HIV positive women whose blood samples were taken for DNA PCR test between >72 hrs - < 2 months of birth': self.calculate_target('PMTCT_EID_31. Number of Infants born to HIV positive women whose blood samples were taken for DNA PCR test between >72 hrs - < 2 months of birth', 0.9),
            'PMTCT_EID_27. Number of HIV-exposed infants born to HIV positive women who received ARV prophylaxis within 72 hrs of delivery': self.calculate_target('PMTCT_EID_27. Number of HIV-exposed infants born to HIV positive women who received ARV prophylaxis within 72 hrs of delivery', 0.95)
        }
    
    def calculate_target(self, column_name, achievement_factor=0.8):
        """Calculate target based on data distribution"""
        if column_name in self.data.columns:
            current_total = self.data[column_name].sum()
            # Target is current total multiplied by achievement factor (for demonstration)
            # In real scenario, this would be based on program targets
            return max(current_total * (1 + (1 - achievement_factor)), current_total * 1.1)
        return 1000  # Default target
    
    def calculate_metrics(self):
        """Calculate actual values from the uploaded data"""
        metrics = {}
        
        for indicator, target in self.targets.items():
            # Find the exact column name in the data
            matching_cols = [col for col in self.data.columns if indicator in col]
            if matching_cols:
                col_name = matching_cols[0]
                actual_value = self.data[col_name].sum()
                
                # Calculate achievement rate (capped at 100%)
                achievement_rate = min((actual_value / target) * 100, 100) if target > 0 else 0
                
                metrics[indicator] = {
                    'actual': actual_value,
                    'target': target,
                    'achievement_rate': achievement_rate,
                    'column_name': col_name
                }
            else:
                # If column not found, set to 0
                metrics[indicator] = {
                    'actual': 0,
                    'target': target,
                    'achievement_rate': 0,
                    'column_name': 'Not Found'
                }
        
        return metrics
    
    def create_gauge_chart(self, value, max_value, title, color_scale=None):
        """Create a gauge chart using plotly"""
        if color_scale is None:
            color_scale = [(0, "red"), (0.5, "yellow"), (1, "green")]
        
        percentage = (value / max_value * 100) if max_value > 0 else 0
        
        fig = go.Figure(go.Indicator(
            mode = "gauge+number+delta",
            value = value,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': f"{title}<br><span style='font-size:0.8em;color:gray'>{percentage:.1f}% of Target</span>", 
                    'font': {'size': 14}},
            delta = {'reference': max_value, 'relative': False, 'position': "bottom"},
            gauge = {
                'axis': {'range': [None, max_value], 'tickwidth': 1, 'tickcolor': "darkblue"},
                'bar': {'color': "darkblue"},
                'bgcolor': "white",
                'borderwidth': 2,
                'bordercolor': "gray",
                'steps': [
                    {'range': [0, max_value*0.6], 'color': 'lightcoral'},
                    {'range': [max_value*0.6, max_value*0.8], 'color': 'lightyellow'},
                    {'range': [max_value*0.8, max_value], 'color': 'lightgreen'}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': max_value}
            }
        ))
        
        fig.update_layout(
            height=300,
            margin=dict(l=20, r=20, t=80, b=20),
            font={'size': 10}
        )
        
        return fig
    
    def create_percentage_gauge(self, percentage, title):
        """Create a percentage gauge for achievement rates"""
        fig = go.Figure(go.Indicator(
            mode = "gauge+number",
            value = percentage,
            domain = {'x': [0, 1], 'y': [0, 1]},
            title = {'text': f"{title}", 'font': {'size': 14}},
            number = {'suffix': "%"},
            gauge = {
                'axis': {'range': [0, 100]},
                'bar': {'color': "darkblue"},
                'steps': [
                    {'range': [0, 60], 'color': "lightcoral"},
                    {'range': [60, 80], 'color': "lightyellow"},
                    {'range': [80, 100], 'color': "lightgreen"}],
                'threshold': {
                    'line': {'color': "red", 'width': 4},
                    'thickness': 0.75,
                    'value': 100}
            }
        ))
        
        fig.update_layout(height=250, margin=dict(l=20, r=20, t=60, b=20))
        return fig

def load_and_preprocess_data(uploaded_file):
    """Load and preprocess the uploaded CSV file"""
    try:
        # Read the CSV file
        df = pd.read_csv(uploaded_file)
        
        st.success(f"‚úÖ Data loaded successfully: {len(df)} facilities found")
        
        # Display basic info about the data
        with st.expander("üìÅ Data Overview"):
            col1, col2, col3 = st.columns(3)
            with col1:
                st.metric("Total Facilities", len(df))
            with col2:
                st.metric("States Covered", df['orgunitlevel1'].nunique())
            with col3:
                st.metric("Period", df['periodname'].iloc[0] if 'periodname' in df.columns else "Unknown")
            
            st.write("**First few rows of data:**")
            st.dataframe(df.head(), use_container_width=True)
        
        return df
    except Exception as e:
        st.error(f"‚ùå Error loading file: {str(e)}")
        return None

def get_indicator_short_name(full_name):
    """Get shortened name for indicators for display"""
    short_names = {
        'PMTCT_ANC_1 Number of New ANC clients': 'New ANC Clients',
        'PMTCT_HTS_6 Number of  pregnant women HIV tested and received results ANC': 'HIV Tested (ANC)',
        'PMTCT_HTS_7. Number of pregnant women tested HIV positive_ ANC': 'HIV Positive (ANC)',
        'PMTCT_ART_15b. Number of HIV positive pregnant women newly started on  ART during ANC  <36wks of pregnancy': 'ART Started <36wks',
        'PMTCT_ART_15c. Number of HIV positive pregnant women newly started on  ART during ANC >36wks of pregnancy': 'ART Started >36wks',
        'PMTCT_HTS_6 Number of  pregnant women HIV tested and received results L&D': 'HIV Tested (L&D)',
        'PMTCT_ART_15d. Number of HIV positive pregnant women newly started on  ART during Labour': 'ART Started (Labour)',
        'PMTCT_EID_31. Number of Infants born to HIV positive women whose blood samples were taken for DNA PCR test between >72 hrs - < 2 months of birth': 'Infant DNA PCR',
        'PMTCT_EID_27. Number of HIV-exposed infants born to HIV positive women who received ARV prophylaxis within 72 hrs of delivery': 'Infant ARV Prophylaxis'
    }
    return short_names.get(full_name, full_name.split('Number of')[-1][:30].strip())

def main():
    # Header
    st.markdown('<h1 class="main-header">üè• PMTCT Program Dashboard</h1>', unsafe_allow_html=True)
    
    # Sidebar for file upload and filters
    st.sidebar.header("üìä Data Configuration")
    
    uploaded_file = st.sidebar.file_uploader(
        "Upload PMTCT Data CSV", 
        type=['csv'],
        help="Upload your TESTING VS TARGET.csv file"
    )
    
    if uploaded_file is None:
        st.info("üëÜ Please upload your PMTCT CSV file to begin analysis")
        st.markdown("""
        ### Expected Data Format:
        Your CSV should contain columns like:
        - `PMTCT_ANC_1 Number of New ANC clients`
        - `PMTCT_HTS_6 Number of pregnant women HIV tested and received results ANC`
        - `PMTCT_HTS_7. Number of pregnant women tested HIV positive_ ANC`
        - `PMTCT_ART_15b. Number of HIV positive pregnant women newly started on ART during ANC <36wks of pregnancy`
        - And other PMTCT indicators
        """)
        return
    
    # Load data
    data = load_and_preprocess_data(uploaded_file)
    if data is None:
        return
    
    # Create dashboard instance
    dashboard = PMTCTDashboard(data)
    metrics = dashboard.calculate_metrics()
    
    # Filters
    st.sidebar.header("üîç Filters")
    
    # State filter
    states = ['All States'] + sorted(data['orgunitlevel1'].unique().tolist())
    selected_state = st.sidebar.selectbox("Filter by State", states)
    
    # Facility type filter (based on naming pattern)
    facility_types = ['All Facilities'] + sorted(data['organisationunitname'].apply(lambda x: 'Hospital' if 'Hospital' in str(x) else 'Clinic' if 'Clinic' in str(x) else 'Health Center' if 'Health' in str(x) else 'Other').unique().tolist())
    selected_facility_type = st.sidebar.selectbox("Filter by Facility Type", facility_types)
    
    # Apply filters
    filtered_data = data.copy()
    if selected_state != 'All States':
        filtered_data = filtered_data[filtered_data['orgunitlevel1'] == selected_state]
    
    if selected_facility_type != 'All Facilities':
        if selected_facility_type == 'Hospital':
            filtered_data = filtered_data[filtered_data['organisationunitname'].str.contains('Hospital', na=False)]
        elif selected_facility_type == 'Clinic':
            filtered_data = filtered_data[filtered_data['organisationunitname'].str.contains('Clinic', na=False)]
        elif selected_facility_type == 'Health Center':
            filtered_data = filtered_data[filtered_data['organisationunitname'].str.contains('Health', na=False)]
    
    # Update dashboard with filtered data
    if len(filtered_data) < len(data):
        dashboard = PMTCTDashboard(filtered_data)
        metrics = dashboard.calculate_metrics()
        st.sidebar.success(f"üìã Showing {len(filtered_data)} facilities")
    
    # Main dashboard layout
    
    # Key Performance Indicators
    st.subheader("üìà Key Performance Indicators")
    
    # Select top 4 key indicators for summary
    key_indicators = [
        'PMTCT_ANC_1 Number of New ANC clients',
        'PMTCT_HTS_6 Number of  pregnant women HIV tested and received results ANC', 
        'PMTCT_HTS_7. Number of pregnant women tested HIV positive_ ANC',
        'PMTCT_ART_15b. Number of HIV positive pregnant women newly started on  ART during ANC  <36wks of pregnancy'
    ]
    
    cols = st.columns(4)
    for i, indicator in enumerate(key_indicators):
        with cols[i]:
            if indicator in metrics:
                metric_data = metrics[indicator]
                delta_color = "normal"
                if metric_data['achievement_rate'] >= 80:
                    delta_color = "normal"
                elif metric_data['achievement_rate'] >= 60:
                    delta_color = "off"
                else:
                    delta_color = "inverse"
                
                st.metric(
                    label=get_indicator_short_name(indicator),
                    value=f"{metric_data['actual']:,.0f}",
                    delta=f"{metric_data['achievement_rate']:.1f}%",
                    delta_color=delta_color
                )
    
    # Gauge charts section
    st.subheader("üéØ Target vs Achievement - Detailed View")
    
    # Create two rows of gauges for all indicators
    all_indicators = list(metrics.keys())
    
    for i in range(0, len(all_indicators), 3):
        cols = st.columns(3)
        row_indicators = all_indicators[i:i+3]
        
        for j, indicator in enumerate(row_indicators):
            with cols[j]:
                if indicator in metrics:
                    metric_data = metrics[indicator]
                    
                    # Create gauge chart
                    fig = dashboard.create_gauge_chart(
                        value=metric_data['actual'],
                        max_value=metric_data['target'],
                        title=get_indicator_short_name(indicator)
                    )
                    
                    st.plotly_chart(fig, use_container_width=True)
                    
                    # Display detailed metrics with color coding
                    status_class = "success-text" if metric_data['achievement_rate'] >= 80 else "warning-text" if metric_data['achievement_rate'] >= 60 else "danger-text"
                    
                    st.markdown(f"""
                    <div class="metric-card">
                        <strong>Actual:</strong> {metric_data['actual']:,.0f}<br>
                        <strong>Target:</strong> {metric_data['target']:,.0f}<br>
                        <strong class="{status_class}">Achievement: {metric_data['achievement_rate']:.1f}%</strong>
                    </div>
                    """, unsafe_allow_html=True)
    
    # Achievement rates overview
    st.subheader("üìä Achievement Rates Overview")
    
    # Create achievement rate bars
    achievement_data = []
    for indicator, data in metrics.items():
        achievement_data.append({
            'Indicator': get_indicator_short_name(indicator),
            'Achievement Rate': data['achievement_rate'],
            'Actual': data['actual'],
            'Target': data['target']
        })
    
    achievement_df = pd.DataFrame(achievement_data)
    
    # Sort by achievement rate
    achievement_df = achievement_df.sort_values('Achievement Rate', ascending=True)
    
    # Create horizontal bar chart
    fig = px.bar(
        achievement_df, 
        y='Indicator', 
        x='Achievement Rate',
        orientation='h',
        title='Achievement Rates by Indicator',
        color='Achievement Rate',
        color_continuous_scale=['red', 'yellow', 'green'],
        range_color=[0, 100]
    )
    
    fig.update_layout(
        yaxis_title="",
        xaxis_title="Achievement Rate (%)",
        height=400
    )
    
    st.plotly_chart(fig, use_container_width=True)
    
    # Detailed data table
    st.subheader("üìã Comprehensive Metrics Summary")
    
    summary_data = []
    for indicator, data in metrics.items():
        status = '‚úÖ Above Target' if data['actual'] >= data['target'] else '‚ö†Ô∏è Below Target'
        summary_data.append({
            'Indicator': get_indicator_short_name(indicator),
            'Actual': f"{data['actual']:,.0f}",
            'Target': f"{data['target']:,.0f}",
            'Achievement Rate': f"{data['achievement_rate']:.1f}%",
            'Status': status,
            'Gap': f"{max(0, data['target'] - data['actual']):,.0f}"
        })
    
    summary_df = pd.DataFrame(summary_data)
    st.dataframe(summary_df, use_container_width=True)
    
    # Facility performance analysis
    if 'organisationunitname' in data.columns:
        st.subheader("üè≠ Facility Performance Analysis")
        
        col1, col2 = st.columns(2)
        
        with col1:
            # Top performing facilities by ANC clients
            top_facilities = data.nlargest(10, 'PMTCT_ANC_1 Number of New ANC clients')[['organisationunitname', 'PMTCT_ANC_1 Number of New ANC clients']]
            st.write("**Top 10 Facilities by ANC Clients:**")
            st.dataframe(top_facilities, use_container_width=True)
        
        with col2:
            # Facilities with zero reporting
            zero_reporting = data[data['PMTCT_ANC_1 Number of New ANC clients'] == 0]
            if len(zero_reporting) > 0:
                st.write(f"**Facilities with Zero Reporting ({len(zero_reporting)}):**")
                st.dataframe(zero_reporting[['organisationunitname', 'orgunitlevel1']], use_container_width=True)
            else:
                st.success("üéâ All facilities have reported data!")
    
    # Export options
    st.sidebar.header("üì§ Export")
    if st.sidebar.button("Generate Summary Report"):
        # Create a downloadable summary
        summary_report = f"""
        PMTCT DASHBOARD SUMMARY REPORT
        Generated on: {pd.Timestamp.now().strftime('%Y-%m-%d %H:%M')}
        Period: {data['periodname'].iloc[0] if 'periodname' in data.columns else 'Unknown'}
        Total Facilities: {len(data)}
        Filter: {selected_state} | {selected_facility_type}
        
        KEY METRICS:
        """
        
        for indicator, data in metrics.items():
            summary_report += f"\n{get_indicator_short_name(indicator)}: {data['actual']:,} / {data['target']:,} ({data['achievement_rate']:.1f}%)"
        
        st.sidebar.download_button(
            label="Download Summary Report",
            data=summary_report,
            file_name=f"pmtct_report_{pd.Timestamp.now().strftime('%Y%m%d')}.txt",
            mime="text/plain"
        )

if __name__ == "__main__":
    main()

2025-10-12 12:38:34.227 
  command:

    streamlit run C:\Users\DELL\AppData\Roaming\Python\Python312\site-packages\ipykernel_launcher.py [ARGUMENTS]
