### Great work team!👏 👏 👏

# Anostep results so far (updated daily)

Here are some quick analyses to look at the data so far :)
We start by accessing the data and cleaning it up

In [5]:
import pandas as pd
import folium
import numpy as np
import os
import webbrowser
from folium import plugins

def load_cleaned_data(filename="commcare_cleaned_data.csv"):
    """Load the cleaned data from CSV file"""
    try:
        if not os.path.exists(filename):
            print(f"File '{filename}' not found!")
            print("Please run the data loader script first.")
            return None
        
        df = pd.read_csv(filename)
        return df
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def find_gps_columns(df):
    """Find GPS coordinate columns"""
    site_gps_cols = [col for col in df.columns if 'site_gps' in col.lower()]
    lat_cols = [col for col in df.columns if 'lat' in col.lower()]
    lon_cols = [col for col in df.columns if 'lon' in col.lower()]
    
    return site_gps_cols, lat_cols, lon_cols

def extract_coordinates(df, gps_col):
    """Extract latitude and longitude from GPS coordinates"""
    coordinates = []
    
    for idx, gps_data in df[gps_col].items():
        if pd.isna(gps_data) or str(gps_data).strip() == '':
            continue
            
        try:
            # Handle different GPS coordinate formats
            gps_str = str(gps_data).strip()
            
            # Format: "lat lon" or "lat,lon" or "lat;lon"
            if ' ' in gps_str:
                parts = gps_str.split()
            elif ',' in gps_str:
                parts = gps_str.split(',')
            elif ';' in gps_str:
                parts = gps_str.split(';')
            else:
                continue
                
            if len(parts) >= 2:
                lat = float(parts[0].strip())
                lon = float(parts[1].strip())
                
                # Basic validation for Kenya coordinates
                if -5 <= lat <= 5 and 33 <= lon <= 42:
                    # Get additional data for popup
                    row_data = df.loc[idx].to_dict()
                    coordinates.append({
                        'latitude': lat, 
                        'longitude': lon, 
                        'index': idx,
                        'data': row_data
                    })
                    
        except (ValueError, AttributeError):
            continue
    
    return coordinates

def create_interactive_kenya_map(df):
    """Create interactive map of Kenya with collection points"""
    
    # Find GPS columns
    site_gps_cols, lat_cols, lon_cols = find_gps_columns(df)
    
    if not site_gps_cols and not (lat_cols and lon_cols):
        print("No GPS coordinate columns found")
        print("Looking for columns containing: site_gps, lat, lon")
        return None
    
    coordinates = []
    
    # Try to extract coordinates from site_gps column
    if site_gps_cols:
        gps_col = site_gps_cols[0]
        print(f"Using GPS column: {gps_col}")
        coordinates = extract_coordinates(df, gps_col)
    
    # If no coordinates found, try separate lat/lon columns
    if not coordinates and lat_cols and lon_cols:
        lat_col = lat_cols[0]
        lon_col = lon_cols[0]
        print(f"Using latitude column: {lat_col}")
        print(f"Using longitude column: {lon_col}")
        
        for idx, row in df.iterrows():
            try:
                lat = float(row[lat_col])
                lon = float(row[lon_col])
                
                # Basic validation for Kenya coordinates
                if -5 <= lat <= 5 and 33 <= lon <= 42:
                    row_data = row.to_dict()
                    coordinates.append({
                        'latitude': lat, 
                        'longitude': lon, 
                        'index': idx,
                        'data': row_data
                    })
            except (ValueError, TypeError):
                continue
    
    if not coordinates:
        print("No valid GPS coordinates found")
        return None
    
    print(f"Found {len(coordinates)} valid GPS coordinates")
    
    # Calculate center of Kenya for map
    kenya_center = [-0.5, 37.5]  # Approximate center of Kenya
    
    # Create base map
    m = folium.Map(
        location=kenya_center,
        zoom_start=6,
        tiles='OpenStreetMap'
    )
    
    # Add different tile layers with proper attributions
    folium.TileLayer(
        tiles='Stamen Terrain',
        attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.',
        name='Terrain'
    ).add_to(m)
    
    folium.TileLayer(
        tiles='CartoDB positron',
        attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
        name='Light'
    ).add_to(m)
    
    # Define colors for anoph_present
    def get_color(anoph_val):
        anoph_str = str(anoph_val).lower().strip()
        if anoph_str == 'yes':
            return 'green'
        elif anoph_str == 'no':
            return 'red'
        elif anoph_str in ['---', '___', 'nan', '']:
            return 'orange'
        else:
            return 'blue'
    
    def get_icon(anoph_val):
        anoph_str = str(anoph_val).lower().strip()
        if anoph_str == 'yes':
            return 'plus'
        elif anoph_str == 'no':
            return 'minus'
        elif anoph_str in ['---', '___', 'nan', '']:
            return 'question'
        else:
            return 'info'
    
    # Find anoph column
    anoph_cols = [col for col in df.columns if 'anoph' in col.lower()]
    anoph_col = anoph_cols[0] if anoph_cols else None
    
    if anoph_col:
        print(f"Coloring markers by: {anoph_col}")
    
    # Create marker clusters for better performance
    marker_cluster = plugins.MarkerCluster().add_to(m)
    
    # Add markers for each coordinate
    for coord in coordinates:
        lat = coord['latitude']
        lon = coord['longitude']
        data = coord['data']
        
        # Get anoph value for coloring
        anoph_val = data.get(anoph_col, 'unknown') if anoph_col else 'unknown'
        color = get_color(anoph_val)
        icon = get_icon(anoph_val)
        
        # Create popup content with key information
        popup_content = f"""
        <div style='width:300px'>
        <h4><b>Collection Point Details</b></h4>
        <table style='width:100%'>
        """
        
        # Add key fields to popup
        key_fields = ['username', 'site_code', 'county', 'subcounty', 
                     'collection_date', anoph_col, 'culicine_present']
        
        for field in key_fields:
            if field and field in data and not pd.isna(data[field]):
                field_name = field.replace('_', ' ').title()
                popup_content += f"""
                <tr>
                    <td><b>{field_name}:</b></td>
                    <td>{data[field]}</td>
                </tr>
                """
        
        popup_content += f"""
        <tr>
            <td><b>Coordinates:</b></td>
            <td>{lat:.4f}, {lon:.4f}</td>
        </tr>
        </table>
        </div>
        """
        
        # Create marker
        folium.Marker(
            location=[lat, lon],
            popup=folium.Popup(popup_content, max_width=400),
            tooltip=f"Site: {data.get('username', 'Unknown')} | Anoph: {anoph_val}",
            icon=folium.Icon(
                color=color, 
                icon=icon,
                prefix='fa'
            )
        ).add_to(marker_cluster)
    
    # Add a legend
    legend_html = '''
    <div style="position: fixed; 
                top: 10px; right: 10px; width: 180px; height: 120px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
    <h4>Anopheles Present</h4>
    <p><i class="fa fa-plus" style="color:green"></i> Yes</p>
    <p><i class="fa fa-minus" style="color:red"></i> No</p>
    <p><i class="fa fa-question" style="color:orange"></i> Missing/Unknown</p>
    <p><i class="fa fa-info" style="color:blue"></i> Other</p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    # Add fullscreen button
    plugins.Fullscreen().add_to(m)
    
    # Add measure tool
    plugins.MeasureControl().add_to(m)
    
    # Add minimap
    minimap = plugins.MiniMap()
    m.add_child(minimap)
    
    # Add mouse position
    fmtr = "function(num) {return L.Util.formatNum(num, 4) + ' º '};"
    plugins.MousePosition(
        position='topright',
        separator=' | ',
        empty_string='NaN',
        lng_first=True,
        num_digits=20,
        prefix='Coordinates:',
        lat_formatter=fmtr,
        lng_formatter=fmtr,
    ).add_to(m)
    
    return m

def create_heatmap(df, coordinates):
    """Create a heatmap overlay showing data density"""
    
    # Calculate center of Kenya for map
    kenya_center = [-0.5, 37.5]
    
    # Create base map
    m = folium.Map(
        location=kenya_center,
        zoom_start=6,
        tiles='OpenStreetMap'
    )
    
    # Add alternative tile layer with proper attribution
    folium.TileLayer(
        tiles='CartoDB positron',
        attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
        name='Light'
    ).add_to(m)
    
    # Prepare data for heatmap
    heat_data = [[coord['latitude'], coord['longitude']] for coord in coordinates]
    
    # Add heatmap layer
    plugins.HeatMap(heat_data).add_to(m)
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    return m

def main():
    """Main function to create interactive Kenya map"""
    try:
        # Load the cleaned data
        df = load_cleaned_data()
        if df is None:
            return
        
        print("Interactive Kenya Map with Collection Points")
        print("=" * 45)
        
        # Create the interactive map
        print("Creating interactive map...")
        map_obj = create_interactive_kenya_map(df)
        
        if map_obj is not None:
            # Save the map
            map_filename = 'kenya_collection_points_interactive.html'
            map_obj.save(map_filename)
            print(f"Interactive map saved as: {map_filename}")
            
            # Try to extract coordinates for additional maps
            site_gps_cols, lat_cols, lon_cols = find_gps_columns(df)
            coordinates = []
            
            if site_gps_cols:
                coordinates = extract_coordinates(df, site_gps_cols[0])
            elif lat_cols and lon_cols:
                for idx, row in df.iterrows():
                    try:
                        lat = float(row[lat_cols[0]])
                        lon = float(row[lon_cols[0]])
                        if -5 <= lat <= 5 and 33 <= lon <= 42:
                            coordinates.append({'latitude': lat, 'longitude': lon})
                    except (ValueError, TypeError):
                        continue
            
            # Create heatmap
            if coordinates:
                print("Creating heatmap...")
                heatmap_obj = create_heatmap(df, coordinates)
                heatmap_filename = 'kenya_collection_heatmap.html'
                heatmap_obj.save(heatmap_filename)
                print(f"Heatmap saved as: {heatmap_filename}")
            
            print(f"\nMap features:")
            print(f"- Click markers for detailed information")
            print(f"- Use layer control to switch map styles")
            print(f"- Fullscreen mode available")
            print(f"- Measure distances with measure tool")
            print(f"- Mouse coordinates shown in top-right")
            print(f"- Markers clustered for better performance")
            
            # Automatically open the maps in browser
            try:
                print(f"\nOpening interactive map in browser...")
                map_path = os.path.abspath(map_filename)
                webbrowser.open(f'file://{map_path}')
                
                if coordinates:
                    # Ask if user wants to open heatmap too
                    try:
                        open_heatmap = input("Open heatmap in browser too? (y/n, default=n): ").strip().lower()
                        if open_heatmap == 'y':
                            print("Opening heatmap in browser...")
                            heatmap_path = os.path.abspath(heatmap_filename)
                            webbrowser.open(f'file://{heatmap_path}')
                    except (EOFError, KeyboardInterrupt):
                        print("Skipping heatmap.")
                        
            except Exception as e:
                print(f"Could not automatically open browser: {e}")
                print(f"Please manually open: {map_filename}")
            
        else:
            print("Failed to create map")
    
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Interactive Kenya Map with Collection Points
Creating interactive map...
Using GPS column: site_gps
Found 858 valid GPS coordinates
Coloring markers by: anoph_present
Interactive map saved as: kenya_collection_points_interactive.html
Creating heatmap...
Heatmap saved as: kenya_collection_heatmap.html

Map features:
- Click markers for detailed information
- Use layer control to switch map styles
- Fullscreen mode available
- Measure distances with measure tool
- Mouse coordinates shown in top-right
- Markers clustered for better performance

Opening interactive map in browser...


Open heatmap in browser too? (y/n, default=n):  y


Opening heatmap in browser...


In [12]:
import pandas as pd
import folium
import numpy as np
import os
import webbrowser
from folium import plugins
from datetime import datetime, timedelta
import json

def load_cleaned_data(filename="commcare_cleaned_data.csv"):
    """Load the cleaned data from CSV file"""
    try:
        if not os.path.exists(filename):
            print(f"File '{filename}' not found!")
            print("Please run the data loader script first.")
            return None
        
        df = pd.read_csv(filename)
        return df
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def find_gps_columns(df):
    """Find GPS coordinate columns"""
    site_gps_cols = [col for col in df.columns if 'site_gps' in col.lower()]
    lat_cols = [col for col in df.columns if 'lat' in col.lower()]
    lon_cols = [col for col in df.columns if 'lon' in col.lower()]
    
    return site_gps_cols, lat_cols, lon_cols

def find_date_column(df):
    """Find collection date column"""
    date_cols = [col for col in df.columns if 'collection_date' in col.lower() or 
                 'date' in col.lower() or 'received' in col.lower()]
    return date_cols[0] if date_cols else None

def extract_coordinates_with_dates(df, gps_col, date_col):
    """Extract latitude, longitude and dates from GPS coordinates"""
    coordinates = []
    
    for idx, row in df.iterrows():
        gps_data = row[gps_col]
        date_data = row[date_col] if date_col else None
        
        if pd.isna(gps_data) or str(gps_data).strip() == '':
            continue
            
        try:
            # Handle different GPS coordinate formats
            gps_str = str(gps_data).strip()
            
            # Format: "lat lon" or "lat,lon" or "lat;lon"
            if ' ' in gps_str:
                parts = gps_str.split()
            elif ',' in gps_str:
                parts = gps_str.split(',')
            elif ';' in gps_str:
                parts = gps_str.split(';')
            else:
                continue
                
            if len(parts) >= 2:
                lat = float(parts[0].strip())
                lon = float(parts[1].strip())
                
                # Basic validation for Kenya coordinates
                if -5 <= lat <= 5 and 33 <= lon <= 42:
                    # Parse date
                    collection_date = None
                    week_number = None
                    
                    if date_col and not pd.isna(date_data):
                        try:
                            collection_date = pd.to_datetime(date_data)
                            # Calculate week number from start of year
                            week_number = collection_date.isocalendar()[1]
                        except:
                            collection_date = None
                            week_number = None
                    
                    # Get additional data for popup
                    row_data = row.to_dict()
                    coordinates.append({
                        'latitude': lat, 
                        'longitude': lon, 
                        'index': idx,
                        'data': row_data,
                        'collection_date': collection_date,
                        'week_number': week_number,
                        'date_string': collection_date.strftime('%Y-%m-%d') if collection_date else 'Unknown'
                    })
                    
        except (ValueError, AttributeError):
            continue
    
    return coordinates

def create_time_slider_map(df):
    """Create interactive map with time slider for collection weeks"""
    
    # Find GPS and date columns
    site_gps_cols, lat_cols, lon_cols = find_gps_columns(df)
    date_col = find_date_column(df)
    
    if not site_gps_cols and not (lat_cols and lon_cols):
        print("No GPS coordinate columns found")
        return None
    
    if not date_col:
        print("No collection date column found")
        print("Available columns:", df.columns.tolist())
        return None
    
    coordinates = []
    
    # Extract coordinates with dates
    if site_gps_cols:
        gps_col = site_gps_cols[0]
        print(f"Using GPS column: {gps_col}")
        print(f"Using date column: {date_col}")
        coordinates = extract_coordinates_with_dates(df, gps_col, date_col)
    
    if not coordinates:
        print("No valid GPS coordinates with dates found")
        return None
    
    # Filter out coordinates without dates for time slider
    coordinates_with_dates = [c for c in coordinates if c['collection_date'] is not None]
    
    if not coordinates_with_dates:
        print("No coordinates with valid dates found")
        return None
    
    print(f"Found {len(coordinates_with_dates)} coordinates with valid dates")
    
    # Group by week
    week_groups = {}
    for coord in coordinates_with_dates:
        week_key = f"{coord['collection_date'].year}-W{coord['week_number']:02d}"
        if week_key not in week_groups:
            week_groups[week_key] = []
        week_groups[week_key].append(coord)
    
    print(f"Data spans {len(week_groups)} weeks")
    
    # Calculate center of Kenya for map
    kenya_center = [-0.5, 37.5]
    
    # Create base map
    m = folium.Map(
        location=kenya_center,
        zoom_start=6,
        tiles='OpenStreetMap'
    )
    
    # Add different tile layers with proper attributions
    folium.TileLayer(
        tiles='Stamen Terrain',
        attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.',
        name='Terrain'
    ).add_to(m)
    
    folium.TileLayer(
        tiles='CartoDB positron',
        attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
        name='Light'
    ).add_to(m)
    
    # Define colors for anoph_present
    def get_color(anoph_val):
        anoph_str = str(anoph_val).lower().strip()
        if anoph_str == 'yes':
            return 'green'
        elif anoph_str == 'no':
            return 'red'
        elif anoph_str in ['---', '___', 'nan', '']:
            return 'orange'
        else:
            return 'blue'
    
    def get_icon(anoph_val):
        anoph_str = str(anoph_val).lower().strip()
        if anoph_str == 'yes':
            return 'plus'
        elif anoph_str == 'no':
            return 'minus'
        elif anoph_str in ['---', '___', 'nan', '']:
            return 'question'
        else:
            return 'info'
    
    # Find anoph column
    anoph_cols = [col for col in df.columns if 'anoph' in col.lower()]
    anoph_col = anoph_cols[0] if anoph_cols else None
    
    if anoph_col:
        print(f"Coloring markers by: {anoph_col}")
    
    # Create time-indexed data for TimestampedGeoJson
    features = []
    
    for week_key, coords_in_week in week_groups.items():
        for coord in coords_in_week:
            data = coord['data']
            anoph_val = data.get(anoph_col, 'unknown') if anoph_col else 'unknown'
            
            # Create popup content
            popup_content = f"""
            <div style='width:300px'>
            <h4><b>Collection Point Details</b></h4>
            <table style='width:100%'>
            <tr><td><b>Week:</b></td><td>{week_key}</td></tr>
            <tr><td><b>Date:</b></td><td>{coord['date_string']}</td></tr>
            """
            
            # Add key fields to popup
            key_fields = ['username', 'site_code', 'county', 'subcounty', 
                         anoph_col, 'culicine_present']
            
            for field in key_fields:
                if field and field in data and not pd.isna(data[field]):
                    field_name = field.replace('_', ' ').title()
                    popup_content += f"""
                    <tr><td><b>{field_name}:</b></td><td>{data[field]}</td></tr>
                    """
            
            popup_content += f"""
            <tr><td><b>Coordinates:</b></td><td>{coord['latitude']:.4f}, {coord['longitude']:.4f}</td></tr>
            </table></div>
            """
            
            # Create feature for TimestampedGeoJson
            feature = {
                'type': 'Feature',
                'geometry': {
                    'type': 'Point',
                    'coordinates': [coord['longitude'], coord['latitude']]
                },
                'properties': {
                    'time': coord['collection_date'].strftime('%Y-%m-%d'),
                    'popup': popup_content,
                    'tooltip': f"Site: {data.get('username', 'Unknown')} | Week: {week_key} | Anoph: {anoph_val}",
                    'icon': get_icon(anoph_val),
                    'color': get_color(anoph_val),
                    'anoph': anoph_val
                }
            }
            features.append(feature)
    
    # Create TimestampedGeoJson
    timestamped_geojson = plugins.TimestampedGeoJson(
        {
            'type': 'FeatureCollection',
            'features': features
        },
        period='P7D',  # 7 days (1 week) periods
        add_last_point=True,
        auto_play=False,
        loop=False,
        max_speed=5,
        loop_button=True,
        date_options='YYYY-MM-DD',
        time_slider_drag_update=True
    )
    
    timestamped_geojson.add_to(m)
    
    # Add a legend
    legend_html = '''
    <div style="position: fixed; 
                top: 10px; right: 10px; width: 200px; height: 140px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
    <h4>Collection Points by Week</h4>
    <p><i class="fa fa-plus" style="color:green"></i> Anoph: Yes</p>
    <p><i class="fa fa-minus" style="color:red"></i> Anoph: No</p>
    <p><i class="fa fa-question" style="color:orange"></i> Missing/Unknown</p>
    <p><i class="fa fa-info" style="color:blue"></i> Other</p>
    <p><small>Use time slider to filter by week</small></p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    # Add fullscreen button
    plugins.Fullscreen().add_to(m)
    
    # Add measure tool
    plugins.MeasureControl().add_to(m)
    
    # Add minimap
    minimap = plugins.MiniMap()
    m.add_child(minimap)
    
    return m, week_groups

def main():
    """Main function to create interactive Kenya map with time slider"""
    try:
        # Load the cleaned data
        df = load_cleaned_data()
        if df is None:
            return
        
        print("Interactive Kenya Map with Time Slider")
        print("=" * 40)
        
        # Create the time slider map
        print("Creating time-slider map...")
        result = create_time_slider_map(df)
        
        if result is not None:
            map_obj, week_groups = result
            
            # Save the map
            map_filename = 'kenya_time_slider_map.html'
            map_obj.save(map_filename)
            print(f"Time-slider map saved as: {map_filename}")
            
            # Print week summary
            print(f"\nWeek Summary:")
            print("-" * 30)
            for week, coords in sorted(week_groups.items()):
                print(f"{week}: {len(coords)} collection points")
            
            print(f"\nMap features:")
            print(f"- Time slider to filter by collection week")
            print(f"- Play button for animated timeline")
            print(f"- Click markers for detailed information")
            print(f"- Multiple map layer options")
            print(f"- Fullscreen mode and measurement tools")
            
            # Automatically open the map in browser
            try:
                print(f"\nOpening time-slider map in browser...")
                map_path = os.path.abspath(map_filename)
                webbrowser.open(f'file://{map_path}')
                        
            except Exception as e:
                print(f"Could not automatically open browser: {e}")
                print(f"Please manually open: {map_filename}")
            
        else:
            print("Failed to create time-slider map")
    
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Interactive Kenya Map with Time Slider
Creating time-slider map...
Using GPS column: site_gps
Using date column: collection_date
Found 858 coordinates with valid dates
Data spans 8 weeks
Coloring markers by: anoph_present
Time-slider map saved as: kenya_time_slider_map.html

Week Summary:
------------------------------
2025-W31: 25 collection points
2025-W32: 49 collection points
2025-W33: 79 collection points
2025-W34: 151 collection points
2025-W35: 157 collection points
2025-W36: 184 collection points
2025-W37: 118 collection points
2025-W38: 95 collection points

Map features:
- Time slider to filter by collection week
- Play button for animated timeline
- Click markers for detailed information
- Multiple map layer options
- Fullscreen mode and measurement tools

Opening time-slider map in browser...


In [13]:
import pandas as pd
import folium
import numpy as np
import os
import webbrowser
from folium import plugins
from datetime import datetime

def load_cleaned_data(filename="commcare_cleaned_data.csv"):
    """Load the cleaned data from CSV file"""
    try:
        if not os.path.exists(filename):
            print(f"File '{filename}' not found!")
            print("Please run the data loader script first.")
            return None
        
        df = pd.read_csv(filename)
        return df
    
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

def find_gps_columns(df):
    """Find GPS coordinate columns"""
    site_gps_cols = [col for col in df.columns if 'site_gps' in col.lower()]
    lat_cols = [col for col in df.columns if 'lat' in col.lower()]
    lon_cols = [col for col in df.columns if 'lon' in col.lower()]
    
    return site_gps_cols, lat_cols, lon_cols

def extract_coordinates_for_heatmap(df, gps_col):
    """Extract latitude and longitude for heatmap"""
    coordinates = []
    
    for idx, gps_data in df[gps_col].items():
        if pd.isna(gps_data) or str(gps_data).strip() == '':
            continue
            
        try:
            # Handle different GPS coordinate formats
            gps_str = str(gps_data).strip()
            
            # Format: "lat lon" or "lat,lon" or "lat;lon"
            if ' ' in gps_str:
                parts = gps_str.split()
            elif ',' in gps_str:
                parts = gps_str.split(',')
            elif ';' in gps_str:
                parts = gps_str.split(';')
            else:
                continue
                
            if len(parts) >= 2:
                lat = float(parts[0].strip())
                lon = float(parts[1].strip())
                
                # Basic validation for Kenya coordinates
                if -5 <= lat <= 5 and 33 <= lon <= 42:
                    coordinates.append([lat, lon])
                    
        except (ValueError, AttributeError):
            continue
    
    return coordinates

def create_density_heatmap(df):
    """Create a density heatmap of collection points"""
    
    # Find GPS columns
    site_gps_cols, lat_cols, lon_cols = find_gps_columns(df)
    
    if not site_gps_cols and not (lat_cols and lon_cols):
        print("No GPS coordinate columns found")
        return None
    
    coordinates = []
    
    # Extract coordinates
    if site_gps_cols:
        gps_col = site_gps_cols[0]
        print(f"Using GPS column: {gps_col}")
        coordinates = extract_coordinates_for_heatmap(df, gps_col)
    elif lat_cols and lon_cols:
        lat_col = lat_cols[0]
        lon_col = lon_cols[0]
        print(f"Using latitude column: {lat_col}")
        print(f"Using longitude column: {lon_col}")
        
        for idx, row in df.iterrows():
            try:
                lat = float(row[lat_col])
                lon = float(row[lon_col])
                
                if -5 <= lat <= 5 and 33 <= lon <= 42:
                    coordinates.append([lat, lon])
            except (ValueError, TypeError):
                continue
    
    if not coordinates:
        print("No valid GPS coordinates found for heatmap")
        return None
    
    print(f"Found {len(coordinates)} coordinates for heatmap")
    
    # Calculate center of Kenya
    kenya_center = [-0.5, 37.5]
    
    # Create base map
    m = folium.Map(
        location=kenya_center,
        zoom_start=6,
        tiles='OpenStreetMap'
    )
    
    # Add alternative tile layers
    folium.TileLayer(
        tiles='Stamen Terrain',
        attr='Map tiles by Stamen Design, under CC BY 3.0. Data by OpenStreetMap, under ODbL.',
        name='Terrain'
    ).add_to(m)
    
    folium.TileLayer(
        tiles='CartoDB positron',
        attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
        name='Light'
    ).add_to(m)
    
    folium.TileLayer(
        tiles='CartoDB dark_matter',
        attr='&copy; <a href="https://www.openstreetmap.org/copyright">OpenStreetMap</a> contributors &copy; <a href="https://carto.com/attributions">CARTO</a>',
        name='Dark'
    ).add_to(m)
    
    # Create heatmap layer
    heat_map = plugins.HeatMap(
        coordinates,
        min_opacity=0.2,
        max_zoom=18,
        radius=25,
        blur=15,
        gradient={
            0.4: 'blue',
            0.65: 'lime', 
            0.8: 'orange',
            1.0: 'red'
        }
    )
    heat_map.add_to(m)
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    # Add fullscreen button
    plugins.Fullscreen().add_to(m)
    
    # Add measure tool
    plugins.MeasureControl().add_to(m)
    
    # Add minimap
    minimap = plugins.MiniMap()
    m.add_child(minimap)
    
    # Add mouse position
    fmtr = "function(num) {return L.Util.formatNum(num, 4) + ' º '};"
    plugins.MousePosition(
        position='topright',
        separator=' | ',
        empty_string='NaN',
        lng_first=True,
        num_digits=20,
        prefix='Coordinates:',
        lat_formatter=fmtr,
        lng_formatter=fmtr,
    ).add_to(m)
    
    # Add legend for heatmap
    legend_html = '''
    <div style="position: fixed; 
                top: 10px; right: 10px; width: 200px; height: 120px; 
                background-color: white; border:2px solid grey; z-index:9999; 
                font-size:14px; padding: 10px">
    <h4>Collection Density</h4>
    <div style="background: linear-gradient(to right, blue, lime, orange, red); height: 20px; width: 100%; margin: 10px 0;"></div>
    <div style="display: flex; justify-content: space-between; font-size: 12px;">
        <span>Low</span>
        <span>High</span>
    </div>
    <p style="font-size: 12px; margin-top: 10px;">
        Total Points: ''' + str(len(coordinates)) + '''
    </p>
    </div>
    '''
    m.get_root().html.add_child(folium.Element(legend_html))
    
    return m

def create_cluster_heatmap(df):
    """Create a heatmap with marker clusters"""
    
    # Find GPS columns  
    site_gps_cols, lat_cols, lon_cols = find_gps_columns(df)
    
    if not site_gps_cols and not (lat_cols and lon_cols):
        print("No GPS coordinate columns found")
        return None
    
    coordinates = []
    
    # Extract coordinates with additional data
    if site_gps_cols:
        gps_col = site_gps_cols[0]
        
        for idx, row in df.iterrows():
            gps_data = row[gps_col]
            
            if pd.isna(gps_data) or str(gps_data).strip() == '':
                continue
                
            try:
                gps_str = str(gps_data).strip()
                
                if ' ' in gps_str:
                    parts = gps_str.split()
                elif ',' in gps_str:
                    parts = gps_str.split(',')
                elif ';' in gps_str:
                    parts = gps_str.split(';')
                else:
                    continue
                    
                if len(parts) >= 2:
                    lat = float(parts[0].strip())
                    lon = float(parts[1].strip())
                    
                    if -5 <= lat <= 5 and 33 <= lon <= 42:
                        coordinates.append({
                            'lat': lat,
                            'lon': lon,
                            'data': row.to_dict()
                        })
                        
            except (ValueError, AttributeError):
                continue
    
    if not coordinates:
        print("No valid coordinates for cluster map")
        return None
    
    # Calculate center of Kenya
    kenya_center = [-0.5, 37.5]
    
    # Create base map
    m = folium.Map(
        location=kenya_center,
        zoom_start=6,
        tiles='CartoDB positron'
    )
    
    # Create marker cluster
    marker_cluster = plugins.MarkerCluster(
        name='Collection Points',
        options={
            'disableClusteringAtZoom': 10,
            'maxClusterRadius': 50
        }
    ).add_to(m)
    
    # Find anoph column
    anoph_cols = [col for col in df.columns if 'anoph' in col.lower()]
    anoph_col = anoph_cols[0] if anoph_cols else None
    
    # Add markers to cluster
    for coord in coordinates:
        data = coord['data']
        anoph_val = data.get(anoph_col, 'unknown') if anoph_col else 'unknown'
        
        # Simple popup
        popup_text = f"""
        Site: {data.get('username', 'Unknown')}
        Anoph: {anoph_val}
        Coordinates: {coord['lat']:.4f}, {coord['lon']:.4f}
        """
        
        folium.CircleMarker(
            location=[coord['lat'], coord['lon']],
            radius=5,
            popup=popup_text,
            color='red' if str(anoph_val).lower() == 'yes' else 'blue',
            fillColor='red' if str(anoph_val).lower() == 'yes' else 'blue',
            fillOpacity=0.7
        ).add_to(marker_cluster)
    
    # Add heatmap layer
    heat_data = [[coord['lat'], coord['lon']] for coord in coordinates]
    heat_map = plugins.HeatMap(heat_data, name='Density Heatmap')
    heat_map.add_to(m)
    
    # Add layer control
    folium.LayerControl().add_to(m)
    
    return m

def main():
    """Main function to create heatmap visualizations"""
    try:
        # Load the cleaned data
        df = load_cleaned_data()
        if df is None:
            return
        
        print("Kenya Collection Points Heatmap Generator")
        print("=" * 45)
        
        print("\nChoose heatmap type:")
        print("1. Density Heatmap (heat intensity)")
        print("2. Cluster + Heatmap (markers + density)")
        print("3. Both (creates two files)")
        
        try:
            choice = input("Enter choice (1-3, default=1): ").strip() or "1"
        except (EOFError, KeyboardInterrupt):
            choice = "1"
        
        maps_created = []
        
        if choice in ["1", "3"]:
            print("\nCreating density heatmap...")
            density_map = create_density_heatmap(df)
            
            if density_map:
                filename = 'kenya_density_heatmap.html'
                density_map.save(filename)
                maps_created.append(filename)
                print(f"Density heatmap saved as: {filename}")
        
        if choice in ["2", "3"]:
            print("\nCreating cluster heatmap...")
            cluster_map = create_cluster_heatmap(df)
            
            if cluster_map:
                filename = 'kenya_cluster_heatmap.html'
                cluster_map.save(filename)
                maps_created.append(filename)
                print(f"Cluster heatmap saved as: {filename}")
        
        if maps_created:
            print(f"\nHeatmap features:")
            print(f"- Multiple map layer options")
            print(f"- Fullscreen mode available")
            print(f"- Measure distances with measure tool")
            print(f"- Mouse coordinates display")
            print(f"- Density visualization of collection activity")
            
            # Open maps in browser
            for filename in maps_created:
                try:
                    print(f"\nOpening {filename} in browser...")
                    map_path = os.path.abspath(filename)
                    webbrowser.open(f'file://{map_path}')
                except Exception as e:
                    print(f"Could not open {filename}: {e}")
        else:
            print("No heatmaps were created")
    
    except Exception as e:
        print(f"Error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    main()

Kenya Collection Points Heatmap Generator

Choose heatmap type:
1. Density Heatmap (heat intensity)
2. Cluster + Heatmap (markers + density)
3. Both (creates two files)


Enter choice (1-3, default=1):  3



Creating density heatmap...
Using GPS column: site_gps
Found 858 coordinates for heatmap
Density heatmap saved as: kenya_density_heatmap.html

Creating cluster heatmap...
Cluster heatmap saved as: kenya_cluster_heatmap.html

Heatmap features:
- Multiple map layer options
- Fullscreen mode available
- Measure distances with measure tool
- Mouse coordinates display
- Density visualization of collection activity

Opening kenya_density_heatmap.html in browser...

Opening kenya_cluster_heatmap.html in browser...
