### Great work team!👏 👏 👏

# Anostep Weekly survey as completed by each CHP (update on a daily basis)

Here are some quick analyses to look at the data so far :)
We start by accessing the data and cleaning it up

In [4]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import numpy as np
import json
import os
import webbrowser
from datetime import datetime, timedelta

def load_cleaned_data(filename="commcare_cleaned_data.csv"):
    """Load the cleaned data from CSV file"""
    try:
        if not os.path.exists(filename):
            print(f"File '{filename}' not found!")
            print("Please run the data loader script first.")
            return None
        
        df = pd.read_csv(filename)
        return df
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def find_columns(df, column_type):
    """Find columns based on type"""
    if column_type == 'username':
        return [col for col in df.columns if 'username' in col.lower()]
    elif column_type == 'anoph':
        return [col for col in df.columns if 'anoph' in col.lower()]
    elif column_type == 'county':
        return [col for col in df.columns if 'county' in col.lower()]
    elif column_type == 'date':
        return [col for col in df.columns if 'collection_date' in col.lower() or 'date' in col.lower()]
    else:
        return []

def prepare_time_data(df):
    """Prepare data with time information"""
    
    # Find date column
    date_cols = find_columns(df, 'date')
    if not date_cols:
        print("No collection date column found")
        return df, None
    
    date_col = date_cols[0]
    print(f"Using date column: {date_col}")
    
    # Convert to datetime
    df_time = df.copy()
    df_time[date_col] = pd.to_datetime(df_time[date_col], errors='coerce')
    
    # Remove rows with invalid dates
    df_time = df_time.dropna(subset=[date_col])
    
    if df_time.empty:
        print("No valid dates found")
        return df, None
    
    # Add week information
    df_time['week_number'] = df_time[date_col].dt.isocalendar().week
    df_time['year'] = df_time[date_col].dt.year
    df_time['year_week'] = df_time[date_col].dt.strftime('%Y-W%U')
    df_time['week_start'] = df_time[date_col].dt.to_period('W').dt.start_time
    
    print(f"Date range: {df_time[date_col].min()} to {df_time[date_col].max()}")
    print(f"Total weeks: {df_time['year_week'].nunique()}")
    
    return df_time, date_col

def create_interactive_username_graph(df):
    """Create interactive username count graph with clickable county filters"""
    
    # Find username, anoph, and county columns
    username_cols = find_columns(df, 'username')
    anoph_cols = find_columns(df, 'anoph')
    county_cols = find_columns(df, 'county')
    
    if not username_cols:
        print("No username column found")
        return None
    
    username_col = username_cols[0]
    anoph_col = anoph_cols[0] if anoph_cols else None
    county_col = county_cols[0] if county_cols else None
    
    print(f"Using columns - Username: {username_col}")
    if anoph_col:
        print(f"Anoph: {anoph_col}")
    if county_col:
        print(f"County: {county_col}")
    
    # Filter out 'an_steph_test' and remove rows with missing usernames
    df_filtered = df[df[username_col] != 'an_steph_test'].copy()
    df_filtered = df_filtered.dropna(subset=[username_col])
    df_filtered = df_filtered[df_filtered[username_col] != '']
    
    if df_filtered.empty:
        print("No data found after filtering")
        return None
    
    # Get unique counties
    unique_counties = []
    if county_col:
        unique_counties = sorted(df_filtered[county_col].dropna().unique())
    
    # Create subplot with buttons for county filtering
    fig = go.Figure()
    
    # Define colors for different anoph values
    color_dict = {}
    if anoph_col:
        anoph_values = df_filtered[anoph_col].unique()
        for col in anoph_values:
            if str(col).lower() == 'yes':
                color_dict[col] = '#2E8B57'  # Sea Green
            elif str(col).lower() == 'no':
                color_dict[col] = '#DC143C'  # Crimson
            elif str(col) == '___' or str(col) == '---' or str(col).strip() == '' or pd.isna(col):
                color_dict[col] = '#8B4513'  # Saddle Brown
            else:
                color_dict[col] = '#708090'  # Slate Gray
    
    def create_traces_for_county(county_filter=None):
        """Create traces for specific county or all counties"""
        if county_filter and county_col:
            df_county = df_filtered[df_filtered[county_col] == county_filter]
        else:
            df_county = df_filtered
        
        if df_county.empty:
            return []
        
        # Get username counts and sort alphabetically
        df_county[username_col] = df_county[username_col].astype(str)
        username_counts = df_county[username_col].value_counts()
        username_counts = username_counts.reindex(sorted(username_counts.index, key=str))
        
        # Create county mapping for each username
        county_mapping = {}
        if county_col:
            for username in username_counts.index:
                user_counties = df_county[df_county[username_col] == username][county_col].dropna()
                if not user_counties.empty:
                    county_mapping[username] = user_counties.mode().iloc[0] if len(user_counties.mode()) > 0 else user_counties.iloc[0]
                else:
                    county_mapping[username] = "Unknown"
        
        traces = []
        
        if anoph_col and anoph_col in df_county.columns:
            # Create stacked bar chart colored by anoph_present
            crosstab = pd.crosstab(df_county[username_col], df_county[anoph_col], dropna=False)
            sorted_index = sorted(crosstab.index, key=str)
            crosstab = crosstab.reindex(sorted_index, fill_value=0)
            
            for anoph_value in crosstab.columns:
                values = crosstab[anoph_value].values
                
                # Create hover text
                hover_text = []
                for i, (site, count) in enumerate(zip(crosstab.index, values)):
                    if count > 0:
                        total_for_site = crosstab.loc[site].sum()
                        percentage = (count / total_for_site) * 100 if total_for_site > 0 else 0
                        county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
                        hover_text.append(
                            f"<b>{site}</b><br>" +
                            f"County: {county_name}<br>" +
                            f"Anoph Present: {anoph_value}<br>" +
                            f"Count: {count}<br>" +
                            f"Percentage: {percentage:.1f}%<br>" +
                            f"Total for site: {total_for_site}"
                        )
                    else:
                        hover_text.append("")
                
                traces.append(go.Bar(
                    name=f'Anoph: {anoph_value}',
                    x=crosstab.index,
                    y=values,
                    marker_color=color_dict[anoph_value],
                    marker_line=dict(width=0.5, color='white'),
                    hovertemplate='%{hovertext}<extra></extra>',
                    hovertext=hover_text,
                    opacity=0.8,
                    visible=True
                ))
        else:
            # Simple bar chart if no anoph data
            hover_text = []
            for site, count in zip(username_counts.index, username_counts.values):
                county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
                hover_text.append(
                    f"<b>{site}</b><br>" +
                    f"County: {county_name}<br>" +
                    f"Total Surveys: {count}"
                )
            
            traces.append(go.Bar(
                x=username_counts.index,
                y=username_counts.values,
                marker_color='steelblue',
                marker_line=dict(width=0.5, color='navy'),
                hovertemplate='%{hovertext}<extra></extra>',
                hovertext=hover_text,
                opacity=0.8,
                visible=True
            ))
        
        return traces, len(df_county)
    
    # Add initial traces (all data)
    initial_traces, initial_count = create_traces_for_county()
    for trace in initial_traces:
        fig.add_trace(trace)
    
    # Create dropdown buttons for county filtering
    dropdown_buttons = []
    
    # Add "All Counties" option
    all_traces, all_count = create_traces_for_county()
    dropdown_buttons.append(
        dict(
            label=f"All Counties ({all_count} surveys)",
            method="restyle",
            args=[
                {"visible": [True] * len(all_traces)},
                list(range(len(all_traces)))
            ]
        )
    )
    
    # Add button for each county
    for county in unique_counties:
        county_traces, county_count = create_traces_for_county(county)
        if county_count > 0:  # Only add if county has data
            dropdown_buttons.append(
                dict(
                    label=f"{county} ({county_count} surveys)",
                    method="restyle", 
                    args=[
                        {"visible": [False] * len(initial_traces)},
                        list(range(len(initial_traces)))
                    ]
                )
            )
    
    # Create county buttons with proper data and JavaScript integration
    county_buttons_html = '<div style="text-align: center; margin: 10px; font-family: Arial, sans-serif;">'
    county_buttons_html += '<b>Filter by County:</b><br><br>'
    
    # Add "All" button
    county_buttons_html += f'<button onclick="filterByCounty(\'all\', {all_count})" id="btn-all" '
    county_buttons_html += 'style="margin: 5px; padding: 8px 12px; background: #007bff; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 11px;">'
    county_buttons_html += f'All Counties ({all_count})</button>'
    
    # Add county buttons in rows
    counties_per_row = 6 if len(unique_counties) <= 12 else 8
    for i, county in enumerate(unique_counties):
        county_data = df_filtered[df_filtered[county_col] == county] if county_col else pd.DataFrame()
        county_count = len(county_data)
        
        if i % counties_per_row == 0 and i > 0:
            county_buttons_html += '<br>'
        
        county_id = county.replace(" ", "-").replace("'", "").replace(".", "")
        county_buttons_html += f'<button onclick="filterByCounty(\'{county}\', {county_count})" id="btn-{county_id}" '
        county_buttons_html += 'style="margin: 5px; padding: 8px 12px; background: #6c757d; color: white; border: none; border-radius: 4px; cursor: pointer; font-size: 11px;">'
        county_buttons_html += f'{county} ({county_count})</button>'
    
    county_buttons_html += '</div>'
    
    # Enhanced JavaScript for county filtering and total update
    js_code = f"""
    <script>
    function updateTotalSurveys(count) {{
        var totalDiv = document.querySelector('[data-total-counter]');
        if (totalDiv) {{
            totalDiv.innerHTML = 'Total Surveys: ' + count.toLocaleString();
        }}
    }}
    
    function filterByCounty(county, count) {{
        // Reset all button styles
        var buttons = document.querySelectorAll('button[id^="btn-"]');
        buttons.forEach(function(btn) {{
            btn.style.background = '#6c757d';
        }});
        
        // Highlight selected button
        var countyId = county === 'all' ? 'all' : county.replace(/[\s'\.]/g, '-');
        var selectedBtn = document.getElementById('btn-' + countyId);
        if (selectedBtn) {{
            selectedBtn.style.background = '#007bff';
        }}
        
        // Update total surveys counter
        updateTotalSurveys(count);
        
        // Update chart title
        var titleElement = document.querySelector('.js-plotly-plot .gtitle');
        if (titleElement && county !== 'all') {{
            var originalTitle = titleElement.textContent.split(' - ')[0];
            titleElement.textContent = originalTitle + ' - Filtered by ' + county;
        }}
        
        console.log('Filtering by county:', county, 'Count:', count);
    }}
    
    // Initialize with all counties selected
    document.addEventListener('DOMContentLoaded', function() {{
        updateTotalSurveys({all_count});
    }});
    </script>
    """
    
    # Update layout
    total_surveys = len(df_filtered)
    unique_sites = df_filtered[username_col].nunique()
    unique_counties_count = len(unique_counties) if unique_counties else 0
    
    title_text = f'Survey Count by Collection Site<br><sub>{unique_sites} sites'
    if unique_counties_count > 0:
        title_text += f' across {unique_counties_count} counties</sub>'
    else:
        title_text += '</sub>'
    
    # Create total surveys counter for top right with data attribute
    total_counter_html = f'''
    <div data-total-counter style="position: fixed; top: 20px; right: 20px; 
                background: rgba(255,255,255,0.95); 
                border: 2px solid #007bff; 
                border-radius: 8px; 
                padding: 15px 20px; 
                font-family: Arial, sans-serif; 
                font-size: 18px; 
                font-weight: bold; 
                color: #007bff;
                box-shadow: 0 4px 8px rgba(0,0,0,0.1);
                z-index: 9999;">
        Total Surveys: {total_surveys:,}
    </div>
    '''
    
    fig.update_layout(
        title={
            'text': title_text,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis_title='Collection Site',
        yaxis_title='Count of Survey',
        barmode='stack' if anoph_col else 'group',
        hovermode='closest',
        width=1200,
        height=850,
        font=dict(size=12),
        showlegend=True if anoph_col else False,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(l=80, r=120, t=100, b=250),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        annotations=[
            dict(
                text=total_counter_html,
                xref="paper", yref="paper",
                x=1, y=1,
                xanchor="right", yanchor="top",
                showarrow=False,
                bgcolor="rgba(0,0,0,0)",
                borderwidth=0
            ),
            dict(
                text=county_buttons_html + js_code,
                xref="paper", yref="paper",
                x=0.5, y=-0.25,
                xanchor="center", yanchor="top",
                showarrow=False,
                font=dict(size=11),
                bgcolor="rgba(255,255,255,0.9)",
                bordercolor="gray",
                borderwidth=1,
                borderpad=15,
                align="center"
            )
        ]
    )
    
    # Update x-axis
    fig.update_xaxes(
        tickangle=45,
        tickfont=dict(size=10),
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True
    )
    
    # Update y-axis
    fig.update_yaxes(
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True,
        zeroline=True,
        zerolinecolor='gray',
        zerolinewidth=1
    )
    
    return fig, df_filtered, unique_counties
    """Create interactive graph with time slider for collection weeks"""
    
    # Find required columns
    username_cols = find_columns(df, 'username')
    anoph_cols = find_columns(df, 'anoph')
    county_cols = find_columns(df, 'county')
    
    if not username_cols:
        print("No username column found")
        return None
    
    username_col = username_cols[0]
    anoph_col = anoph_cols[0] if anoph_cols else None
    county_col = county_cols[0] if county_cols else None
    
    # Prepare time data
    df_time, date_col = prepare_time_data(df)
    if date_col is None:
        print("Cannot create time slider without date information")
        return None
    
    # Filter out test user
    df_filtered = df_time[df_time[username_col] != 'an_steph_test'].copy()
    df_filtered = df_filtered.dropna(subset=[username_col])
    df_filtered = df_filtered[df_filtered[username_col] != '']
    
    if df_filtered.empty:
        print("No data found after filtering")
        return None
    
    # Get unique weeks sorted
    unique_weeks = sorted(df_filtered['year_week'].unique())
    unique_counties = sorted(df_filtered[county_col].dropna().unique()) if county_col else []
    
    print(f"Creating time slider for {len(unique_weeks)} weeks")
    
    # Create frames for animation/slider
    frames = []
    
    # Color mapping for anoph values
    color_dict = {}
    if anoph_col:
        anoph_values = df_filtered[anoph_col].unique()
        for col in anoph_values:
            if str(col).lower() == 'yes':
                color_dict[col] = '#2E8B57'  # Sea Green
            elif str(col).lower() == 'no':
                color_dict[col] = '#DC143C'  # Crimson
            elif str(col) == '___' or str(col) == '---' or str(col).strip() == '' or pd.isna(col):
                color_dict[col] = '#8B4513'  # Saddle Brown
            else:
                color_dict[col] = '#708090'  # Slate Gray
    
    # Create data for each week
    for week in unique_weeks:
        week_data = df_filtered[df_filtered['year_week'] == week]
        
        if week_data.empty:
            continue
        
        frame_traces = []
        
        if anoph_col and anoph_col in week_data.columns:
            # Create stacked bar chart for this week
            username_counts = week_data[username_col].value_counts()
            username_counts = username_counts.reindex(sorted(username_counts.index, key=str))
            
            crosstab = pd.crosstab(week_data[username_col], week_data[anoph_col], dropna=False)
            sorted_index = sorted(crosstab.index, key=str)
            crosstab = crosstab.reindex(sorted_index, fill_value=0)
            
            # County mapping for this week
            county_mapping = {}
            if county_col:
                for username in crosstab.index:
                    user_counties = week_data[week_data[username_col] == username][county_col].dropna()
                    if not user_counties.empty:
                        county_mapping[username] = user_counties.mode().iloc[0] if len(user_counties.mode()) > 0 else user_counties.iloc[0]
                    else:
                        county_mapping[username] = "Unknown"
            
            for anoph_value in crosstab.columns:
                values = crosstab[anoph_value].values
                
                # Create hover text for this week
                hover_text = []
                for site, count in zip(crosstab.index, values):
                    if count > 0:
                        total_for_site = crosstab.loc[site].sum()
                        percentage = (count / total_for_site) * 100 if total_for_site > 0 else 0
                        county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
                        hover_text.append(
                            f"<b>{site}</b><br>" +
                            f"Week: {week}<br>" +
                            f"County: {county_name}<br>" +
                            f"Anoph Present: {anoph_value}<br>" +
                            f"Count: {count}<br>" +
                            f"Percentage: {percentage:.1f}%<br>" +
                            f"Total for site: {total_for_site}"
                        )
                    else:
                        hover_text.append("")
                
                frame_traces.append(go.Bar(
                    name=f'Anoph: {anoph_value}',
                    x=crosstab.index,
                    y=values,
                    marker_color=color_dict[anoph_value],
                    marker_line=dict(width=0.5, color='white'),
                    hovertemplate='%{hovertext}<extra></extra>',
                    hovertext=hover_text,
                    opacity=0.8
                ))
        else:
            # Simple bar chart for this week
            username_counts = week_data[username_col].value_counts()
            username_counts = username_counts.reindex(sorted(username_counts.index, key=str))
            
            # County mapping for this week
            county_mapping = {}
            if county_col:
                for username in username_counts.index:
                    user_counties = week_data[week_data[username_col] == username][county_col].dropna()
                    if not user_counties.empty:
                        county_mapping[username] = user_counties.mode().iloc[0] if len(user_counties.mode()) > 0 else user_counties.iloc[0]
                    else:
                        county_mapping[username] = "Unknown"
            
            hover_text = []
            for site, count in zip(username_counts.index, username_counts.values):
                county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
                hover_text.append(
                    f"<b>{site}</b><br>" +
                    f"Week: {week}<br>" +
                    f"County: {county_name}<br>" +
                    f"Total Surveys: {count}"
                )
            
            frame_traces.append(go.Bar(
                x=username_counts.index,
                y=username_counts.values,
                marker_color='steelblue',
                marker_line=dict(width=0.5, color='navy'),
                hovertemplate='%{hovertext}<extra></extra>',
                hovertext=hover_text,
                opacity=0.8
            ))
        
        frames.append(go.Frame(
            data=frame_traces,
            name=week,
            layout=go.Layout(
                title=f'Survey Count by Collection Site - Week {week}<br><sub>Total: {len(week_data):,} surveys</sub>'
            )
        ))
    
    # Create initial figure (first week)
    if frames:
        fig = go.Figure(data=frames[0].data, frames=frames)
    else:
        print("No frames created - no data available")
        return None
    
    # Update layout with slider
    total_surveys = len(df_filtered)
    unique_sites = df_filtered[username_col].nunique()
    unique_counties_count = len(unique_counties)
    
    fig.update_layout(
        title={
            'text': f'Survey Count by Collection Site - Interactive Timeline<br><sub>Total: {total_surveys:,} surveys from {unique_sites} sites across {unique_counties_count} counties</sub>',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis_title='Collection Site',
        yaxis_title='Count of Survey',
        barmode='stack' if anoph_col else 'group',
        hovermode='closest',
        width=1200,
        height=700,
        font=dict(size=12),
        showlegend=True if anoph_col else False,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(l=80, r=120, t=120, b=150),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        updatemenus=[
            dict(
                type="buttons",
                direction="left",
                buttons=list([
                    dict(
                        args=[{"frame": {"duration": 500, "redraw": True},
                               "fromcurrent": True, "transition": {"duration": 300}}],
                        label="Play",
                        method="animate"
                    ),
                    dict(
                        args=[{"frame": {"duration": 0, "redraw": True},
                               "mode": "immediate",
                               "transition": {"duration": 0}}],
                        label="Pause",
                        method="animate"
                    )
                ]),
                pad={"r": 10, "t": 87},
                showactive=False,
                x=0.011,
                xanchor="right",
                y=0,
                yanchor="top"
            ),
        ],
        sliders=[
            dict(
                active=0,
                yanchor="top",
                xanchor="left",
                currentvalue={
                    "font": {"size": 16},
                    "prefix": "Week: ",
                    "visible": True,
                    "xanchor": "right"
                },
                transition={"duration": 300, "easing": "cubic-in-out"},
                pad={"b": 10, "t": 50},
                len=0.9,
                x=0.1,
                y=0,
                steps=[
                    dict(
                        args=[
                            [week],
                            {"frame": {"duration": 300, "redraw": True},
                             "mode": "immediate",
                             "transition": {"duration": 300}}
                        ],
                        label=week,
                        method="animate"
                    ) for week in unique_weeks
                ]
            )
        ]
    )
    
    # Update axes
    fig.update_xaxes(
        tickangle=45,
        tickfont=dict(size=10),
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True
    )
    
    fig.update_yaxes(
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True,
        zeroline=True,
        zerolinecolor='gray',
        zerolinewidth=1
    )
    
    return fig, df_filtered, unique_counties, unique_weeks
    """Create interactive username count graph colored by anoph_present, excluding 'an_steph_test'"""
    
    # Find username, anoph, and county columns
    username_cols = find_columns(df, 'username')
    anoph_cols = find_columns(df, 'anoph')
    county_cols = find_columns(df, 'county')
    
    if not username_cols:
        print("No username column found")
        return None
    
    username_col = username_cols[0]
    anoph_col = anoph_cols[0] if anoph_cols else None
    county_col = county_cols[0] if county_cols else None
    
    print(f"Using columns - Username: {username_col}")
    if anoph_col:
        print(f"Anoph: {anoph_col}")
    if county_col:
        print(f"County: {county_col}")
    
    # Filter out 'an_steph_test' and remove rows with missing usernames
    df_filtered = df[df[username_col] != 'an_steph_test'].copy()
    df_filtered = df_filtered.dropna(subset=[username_col])
    df_filtered = df_filtered[df_filtered[username_col] != '']
    
    if df_filtered.empty:
        print("No data found after filtering")
        return None
    
    # Get username counts and sort alphabetically
    df_filtered[username_col] = df_filtered[username_col].astype(str)
    username_counts = df_filtered[username_col].value_counts()
    username_counts = username_counts.reindex(sorted(username_counts.index, key=str))
    
    # Create county mapping for each username
    county_mapping = {}
    if county_col:
        for username in username_counts.index:
            user_counties = df_filtered[df_filtered[username_col] == username][county_col].dropna()
            if not user_counties.empty:
                # Get the most common county for this username
                county_mapping[username] = user_counties.mode().iloc[0] if len(user_counties.mode()) > 0 else user_counties.iloc[0]
            else:
                county_mapping[username] = "Unknown"
    
    # Get unique counties for the county list
    unique_counties = []
    if county_col:
        unique_counties = sorted(df_filtered[county_col].dropna().unique())
    
    # Create interactive figure
    fig = go.Figure()
    
    if anoph_col and anoph_col in df_filtered.columns:
        # Create stacked bar chart colored by anoph_present
        crosstab = pd.crosstab(df_filtered[username_col], df_filtered[anoph_col], dropna=False)
        sorted_index = sorted(crosstab.index, key=str)
        crosstab = crosstab.reindex(sorted_index, fill_value=0)
        
        # Define colors for different anoph values
        color_dict = {}
        for col in crosstab.columns:
            if str(col).lower() == 'yes':
                color_dict[col] = '#2E8B57'  # Sea Green
            elif str(col).lower() == 'no':
                color_dict[col] = '#DC143C'  # Crimson
            elif str(col) == '___' or str(col) == '---' or str(col).strip() == '' or pd.isna(col):
                color_dict[col] = '#8B4513'  # Saddle Brown
            else:
                color_dict[col] = '#708090'  # Slate Gray
        
        # Create stacked bars
        for anoph_value in crosstab.columns:
            values = crosstab[anoph_value].values
            
            # Create hover text
            hover_text = []
            for i, (site, count) in enumerate(zip(crosstab.index, values)):
                if count > 0:
                    total_for_site = crosstab.loc[site].sum()
                    percentage = (count / total_for_site) * 100 if total_for_site > 0 else 0
                    county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
                    hover_text.append(
                        f"<b>{site}</b><br>" +
                        f"County: {county_name}<br>" +
                        f"Anoph Present: {anoph_value}<br>" +
                        f"Count: {count}<br>" +
                        f"Percentage: {percentage:.1f}%<br>" +
                        f"Total for site: {total_for_site}"
                    )
                else:
                    hover_text.append("")
            
            fig.add_trace(go.Bar(
                name=f'Anoph: {anoph_value}',
                x=crosstab.index,
                y=values,
                marker_color=color_dict[anoph_value],
                marker_line=dict(width=0.5, color='white'),
                hovertemplate='%{hovertext}<extra></extra>',
                hovertext=hover_text,
                opacity=0.8
            ))
            
    else:
        # Simple bar chart if no anoph data
        hover_text = []
        for site, count in zip(username_counts.index, username_counts.values):
            county_name = county_mapping.get(site, "Unknown") if county_mapping else "N/A"
            hover_text.append(
                f"<b>{site}</b><br>" +
                f"County: {county_name}<br>" +
                f"Total Surveys: {count}"
            )
        
        fig.add_trace(go.Bar(
            x=username_counts.index,
            y=username_counts.values,
            marker_color='steelblue',
            marker_line=dict(width=0.5, color='navy'),
            hovertemplate='%{hovertext}<extra></extra>',
            hovertext=hover_text,
            opacity=0.8
        ))
    
    # Create county list text for annotation
    county_list_text = ""
    if unique_counties:
        county_list_text = "<b>Counties in Dataset:</b><br>" + "<br>".join([f"• {county}" for county in unique_counties])
    
    # Update layout
    total_surveys = len(df_filtered)
    unique_sites = len(username_counts)
    unique_counties_count = len(unique_counties) if unique_counties else 0
    
    title_text = f'Survey Count by Collection Site<br><sub>Total: {total_surveys:,} surveys from {unique_sites} sites'
    if unique_counties_count > 0:
        title_text += f' across {unique_counties_count} counties</sub>'
    else:
        title_text += '</sub>'
    
    fig.update_layout(
        title={
            'text': title_text,
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 16}
        },
        xaxis_title='Collection Site',
        yaxis_title='Count of Survey',
        barmode='stack' if anoph_col else 'group',
        hovermode='closest',
        width=1400,
        height=700,
        font=dict(size=12),
        showlegend=True if anoph_col else False,
        legend=dict(
            orientation="v",
            yanchor="top",
            y=1,
            xanchor="left",
            x=1.02
        ),
        margin=dict(l=80, r=200, t=100, b=150),
        plot_bgcolor='rgba(0,0,0,0)',
        paper_bgcolor='rgba(0,0,0,0)',
        annotations=[
            dict(
                text=county_list_text,
                xref="paper", yref="paper",
                x=1.15, y=0.5,
                xanchor="left", yanchor="middle",
                showarrow=False,
                font=dict(size=11),
                bgcolor="rgba(255,255,255,0.8)",
                bordercolor="gray",
                borderwidth=1,
                borderpad=10
            )
        ] if county_list_text else []
    )
    
    # Update x-axis
    fig.update_xaxes(
        tickangle=45,
        tickfont=dict(size=10),
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True
    )
    
    # Update y-axis
    fig.update_yaxes(
        gridcolor='lightgray',
        gridwidth=0.5,
        showgrid=True,
        zeroline=True,
        zerolinecolor='gray',
        zerolinewidth=1
    )
    
    return fig, df_filtered, unique_counties

def create_summary_dashboard(df):
    """Create a comprehensive dashboard with multiple views"""
    
    username_cols = find_columns(df, 'username')
    anoph_cols = find_columns(df, 'anoph')
    
    if not username_cols:
        print("No username column found for dashboard")
        return None
    
    username_col = username_cols[0]
    anoph_col = anoph_cols[0] if anoph_cols else None
    
    # Filter data
    df_filtered = df[df[username_col] != 'an_steph_test'].copy()
    df_filtered = df_filtered.dropna(subset=[username_col])
    df_filtered = df_filtered[df_filtered[username_col] != '']
    
    if df_filtered.empty:
        return None
    
    # Create subplots
    fig = make_subplots(
        rows=2, cols=2,
        subplot_titles=(
            'Survey Count by Collection Site',
            'Anopheles Present Distribution',
            'Top 10 Most Active Sites',
            'Survey Activity Overview'
        ),
        specs=[[{"secondary_y": False}, {"type": "pie"}],
               [{"type": "bar"}, {"type": "indicator"}]],
        vertical_spacing=0.12,
        horizontal_spacing=0.1
    )
    
    # 1. Main bar chart (top left)
    username_counts = df_filtered[username_col].value_counts()
    username_counts = username_counts.reindex(sorted(username_counts.index, key=str))
    
    if anoph_col and anoph_col in df_filtered.columns:
        crosstab = pd.crosstab(df_filtered[username_col], df_filtered[anoph_col], dropna=False)
        sorted_index = sorted(crosstab.index, key=str)
        crosstab = crosstab.reindex(sorted_index, fill_value=0)
        
        colors = ['#2E8B57', '#DC143C', '#8B4513', '#708090']
        for i, anoph_value in enumerate(crosstab.columns):
            color = colors[i % len(colors)]
            fig.add_trace(
                go.Bar(
                    name=f'Anoph: {anoph_value}',
                    x=crosstab.index,
                    y=crosstab[anoph_value].values,
                    marker_color=color,
                    showlegend=True
                ),
                row=1, col=1
            )
    
    # 2. Pie chart (top right)
    if anoph_col and anoph_col in df_filtered.columns:
        anoph_counts = df_filtered[anoph_col].value_counts()
        fig.add_trace(
            go.Pie(
                labels=anoph_counts.index,
                values=anoph_counts.values,
                marker_colors=['#2E8B57', '#DC143C', '#8B4513', '#708090'][:len(anoph_counts)],
                hovertemplate='<b>%{label}</b><br>Count: %{value}<br>Percentage: %{percent}<extra></extra>'
            ),
            row=1, col=2
        )
    
    # 3. Top 10 sites (bottom left)
    top_10_sites = username_counts.head(10)
    fig.add_trace(
        go.Bar(
            x=top_10_sites.values,
            y=top_10_sites.index,
            orientation='h',
            marker_color='lightblue',
            marker_line=dict(width=1, color='navy'),
            showlegend=False,
            hovertemplate='<b>%{y}</b><br>Surveys: %{x}<extra></extra>'
        ),
        row=2, col=1
    )
    
    # 4. Summary indicators (bottom right)
    total_surveys = len(df_filtered)
    unique_sites = len(username_counts)
    avg_per_site = total_surveys / unique_sites if unique_sites > 0 else 0
    
    fig.add_trace(
        go.Indicator(
            mode="number+delta",
            value=total_surveys,
            title={"text": "Total Surveys"},
            domain={'x': [0, 0.5], 'y': [0.7, 1]},
            number={'font': {'size': 40}},
            delta={'reference': total_surveys * 0.9, 'relative': True}
        ),
        row=2, col=2
    )
    
    # Update layout
    fig.update_layout(
        title={
            'text': 'CommCare Survey Dashboard',
            'x': 0.5,
            'xanchor': 'center',
            'font': {'size': 20}
        },
        height=800,
        width=1400,
        showlegend=True,
        barmode='stack'
    )
    
    return fig

def main():
    try:
        # Load the cleaned data
        df = load_cleaned_data()
        if df is None:
            return
        
        print("Creating interactive survey visualization with county list...")
        
        # Create interactive graph with county list
        result = create_interactive_username_graph(df)
        
        if result is not None:
            if len(result) == 3:
                fig, df_filtered, unique_counties = result
            else:
                print("Unexpected result format")
                return
            
            # Save as HTML
            main_filename = 'interactive_survey_count.html'
            fig.write_html(
                main_filename,
                config={
                    'displayModeBar': True,
                    'displaylogo': False,
                    'modeBarButtonsToAdd': ['drawline', 'drawopenpath', 'drawclosedpath', 'drawcircle', 'drawrect', 'eraseshape']
                }
            )
            
            print(f"Interactive survey count graph saved as: {main_filename}")
            if unique_counties:
                print(f"Counties included: {', '.join(unique_counties)}")
            
            # Open in browser
            try:
                print("Opening interactive visualization in browser...")
                main_path = os.path.abspath(main_filename)
                webbrowser.open(f'file://{main_path}')
                
            except Exception as e:
                print(f"Could not open browser automatically: {e}")
                print(f"Please manually open: {main_filename}")
            
            print("\nInteractive features:")
            print("- Click county buttons below the chart to filter data")
            print("- Hover over bars for detailed information (including county)")
            print("- Click legend items to show/hide categories")
            print("- Use toolbar for zoom, pan, and selection")
            print("- Double-click to reset zoom")
            print("- County buttons show survey count for each county")
            
        else:
            print("Failed to create interactive graph")
    
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Creating interactive survey visualization with county list...
Using columns - Username: username
Anoph: anoph_present
County: county
Interactive survey count graph saved as: interactive_survey_count.html
Counties included: Baringo, Bungoma, Garissa, Kakamega, Kisumu, Kitui, Lamu, Machakos, Mandera, Marsabit, Meru, Migori, Nakuru, Narok, Samburu, Tana River, Turkana, Wajir, West Pokot
Opening interactive visualization in browser...




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/


Interactive features:
- Click county buttons below the chart to filter data
- Hover over bars for detailed information (including county)
- Click legend items to show/hide categories
- Use toolbar for zoom, pan, and selection
- Double-click to reset zoom
- County buttons show survey count for each county
