In [1]:
"""
=============================================================================
GEO ANALYTICS - REGIONAL DISTRIBUTION & DEMAND ANALYSIS
Step 13: Comprehensive Geographic Analysis for Gym Expansion Strategy
=============================================================================

This notebook performs:
1. Member distribution mapping and density analysis
2. Demand pattern analysis across regions
3. Accessibility calculations (travel time, distance)
4. Market penetration and competitive analysis
5. Expansion planning with ROI projections
6. Interactive Folium maps for strategic insights

Author: Gym Analytics Team
Date: October 2025
=============================================================================
"""

# ============================================================================
# SECTION 1: ENVIRONMENT SETUP & INSTALLATIONS
# ============================================================================

!pip install folium geopandas geopy scikit-learn pandas numpy matplotlib seaborn plotly -q

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
from folium import plugins
from folium.plugins import HeatMap, MarkerCluster
import geopandas as gpd
from geopy.distance import geodesic
from scipy.spatial import distance_matrix
from sklearn.cluster import KMeans, DBSCAN
from sklearn.preprocessing import StandardScaler
import plotly.express as px
import plotly.graph_objects as go
from datetime import datetime
import json
import warnings
warnings.filterwarnings('ignore')

# Set visualization style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

print("✅ All libraries imported successfully!")
print(f"📅 Analysis Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# ============================================================================
# SECTION 2: DATA LOADING & VALIDATION
# ============================================================================

print("\n" + "="*80)
print("📂 LOADING GEO ANALYTICS DATASETS")
print("="*80)

# Load member geographic data
member_geo_df = pd.read_csv('member_geo_data.csv')
print(f"\n✅ Loaded member_geo_data.csv: {member_geo_df.shape[0]} members")

# Load facility locations
facility_df = pd.read_csv('facility_locations_data.csv')
print(f"✅ Loaded facility_locations_data.csv: {facility_df.shape[0]} facilities")

# Load market analysis data
market_df = pd.read_csv('market_analysis_data.csv')
print(f"✅ Loaded market_analysis_data.csv: {market_df.shape[0]} zip codes")

# Data validation
print("\n" + "-"*80)
print("🔍 DATA VALIDATION CHECKS")
print("-"*80)

# Check for missing values
print("\n1. Missing Values Check:")
print(f"   Member Data: {member_geo_df.isnull().sum().sum()} missing values")
print(f"   Facility Data: {facility_df.isnull().sum().sum()} missing values")
print(f"   Market Data: {market_df.isnull().sum().sum()} missing values")

# Check coordinate ranges
print("\n2. Coordinate Validation:")
print(f"   Latitude range: [{member_geo_df['latitude'].min():.4f}, {member_geo_df['latitude'].max():.4f}]")
print(f"   Longitude range: [{member_geo_df['longitude'].min():.4f}, {member_geo_df['longitude'].max():.4f}]")

# Display sample data
print("\n3. Sample Member Data:")
print(member_geo_df.head(3))

print("\n4. Sample Facility Data:")
print(facility_df.head(3))

print("\n5. Sample Market Data:")
print(market_df.head(3))

# ============================================================================
# SECTION 3: MEMBER DISTRIBUTION ANALYSIS
# ============================================================================

print("\n" + "="*80)
print("🗺️ TASK 1: MEMBER DISTRIBUTION MAPPING")
print("="*80)

# Calculate members per zip code
zip_distribution = member_geo_df.groupby('zip_code').agg({
    'member_id': 'count',
    'latitude': 'mean',
    'longitude': 'mean',
    'membership_tier': lambda x: x.mode()[0] if len(x) > 0 else 'N/A',
    'distance_to_gym_km': 'mean',
    'travel_time_minutes': 'mean'
}).rename(columns={'member_id': 'member_count'}).reset_index()

print("\n📊 Member Distribution by Zip Code:")
print(zip_distribution.sort_values('member_count', ascending=False))

# Calculate density metrics
total_members = len(member_geo_df)
zip_distribution['member_density'] = zip_distribution['member_count'] / total_members
zip_distribution['density_category'] = pd.cut(
    zip_distribution['member_count'], 
    bins=[0, 2, 4, 100],
    labels=['Low', 'Medium', 'High']
)

print(f"\n📈 Density Distribution:")
print(zip_distribution['density_category'].value_counts())

# Merge with market data for enhanced analysis
distribution_enhanced = zip_distribution.merge(
    market_df[['zip_code', 'population', 'median_income', 'fitness_interest_score']], 
    on='zip_code', 
    how='left'
)

# Calculate penetration rate (members per 1000 population)
distribution_enhanced['penetration_rate'] = (
    distribution_enhanced['member_count'] / 
    distribution_enhanced['population'] * 1000
)

print("\n🎯 Top 5 Zip Codes by Member Penetration Rate:")
print(distribution_enhanced.nlargest(5, 'penetration_rate')[
    ['zip_code', 'member_count', 'population', 'penetration_rate']
])

# ============================================================================
# SECTION 4: DEMAND PATTERN ANALYSIS
# ============================================================================

print("\n" + "="*80)
print("📊 TASK 2: DEMAND ANALYSIS & HEATMAPS")
print("="*80)

# Analyze demand by membership tier
tier_demand = member_geo_df.groupby('membership_tier').agg({
    'member_id': 'count',
    'visit_frequency': lambda x: (x == 'high').sum(),
    'distance_to_gym_km': 'mean',
    'travel_time_minutes': 'mean'
}).rename(columns={
    'member_id': 'total_members',
    'visit_frequency': 'high_frequency_users'
})

print("\n💎 Demand by Membership Tier:")
print(tier_demand)

# Analyze demand by visit frequency
frequency_demand = member_geo_df['visit_frequency'].value_counts()
print("\n📅 Demand by Visit Frequency:")
print(frequency_demand)

# Calculate demand intensity score
demand_by_zip = member_geo_df.groupby('zip_code').agg({
    'member_id': 'count',
    'visit_frequency': lambda x: (x == 'high').sum() / len(x),
    'membership_tier': lambda x: (x == 'premium').sum() / len(x)
}).rename(columns={
    'member_id': 'member_count',
    'visit_frequency': 'high_freq_ratio',
    'membership_tier': 'premium_ratio'
})

# Create demand intensity score (0-100)
demand_by_zip['demand_intensity'] = (
    demand_by_zip['member_count'] * 0.4 +
    demand_by_zip['high_freq_ratio'] * 30 +
    demand_by_zip['premium_ratio'] * 30
)

demand_by_zip = demand_by_zip.reset_index()
print("\n🔥 Top 5 High-Demand Zip Codes:")
print(demand_by_zip.nlargest(5, 'demand_intensity'))

# Analyze transportation mode preferences
transport_analysis = member_geo_df.groupby(['zip_code', 'transportation_mode']).size().unstack(fill_value=0)
print("\n🚗 Transportation Mode Distribution by Zip:")
print(transport_analysis)

# ============================================================================
# SECTION 5: ACCESSIBILITY ANALYSIS
# ============================================================================

print("\n" + "="*80)
print("🚦 TASK 3: ACCESSIBILITY ANALYSIS")
print("="*80)

# Calculate accessibility metrics
accessibility_metrics = member_geo_df.groupby('zip_code').agg({
    'distance_to_gym_km': ['mean', 'min', 'max', 'std'],
    'travel_time_minutes': ['mean', 'min', 'max', 'std'],
    'member_id': 'count'
}).round(2)

accessibility_metrics.columns = ['_'.join(col).strip() for col in accessibility_metrics.columns]
accessibility_metrics = accessibility_metrics.reset_index()

print("\n📍 Accessibility Metrics by Zip Code:")
print(accessibility_metrics)

# Identify underserved areas (high travel time/distance)
underserved_threshold_distance = member_geo_df['distance_to_gym_km'].quantile(0.75)
underserved_threshold_time = member_geo_df['travel_time_minutes'].quantile(0.75)

underserved_members = member_geo_df[
    (member_geo_df['distance_to_gym_km'] > underserved_threshold_distance) |
    (member_geo_df['travel_time_minutes'] > underserved_threshold_time)
]

print(f"\n⚠️ Underserved Areas Analysis:")
print(f"   Distance Threshold: {underserved_threshold_distance:.2f} km")
print(f"   Travel Time Threshold: {underserved_threshold_time:.2f} minutes")
print(f"   Underserved Members: {len(underserved_members)} ({len(underserved_members)/len(member_geo_df)*100:.1f}%)")

underserved_zips = underserved_members['zip_code'].value_counts().head(5)
print(f"\n🚨 Top 5 Underserved Zip Codes:")
print(underserved_zips)

# Calculate facility accessibility scores
for idx, facility in facility_df.iterrows():
    facility_name = facility['facility_name']
    facility_members = member_geo_df[member_geo_df['preferred_facility'] == facility_name]
    
    if len(facility_members) > 0:
        avg_distance = facility_members['distance_to_gym_km'].mean()
        avg_time = facility_members['travel_time_minutes'].mean()
        print(f"\n🏢 {facility_name}:")
        print(f"   Members: {len(facility_members)}")
        print(f"   Avg Distance: {avg_distance:.2f} km")
        print(f"   Avg Travel Time: {avg_time:.2f} minutes")

# ============================================================================
# SECTION 6: MARKET PENETRATION ANALYSIS
# ============================================================================

print("\n" + "="*80)
print("🎯 TASK 4: MARKET PENETRATION ANALYSIS")
print("="*80)

# Merge member data with market data
penetration_analysis = market_df.copy()
member_counts = member_geo_df['zip_code'].value_counts().to_dict()
penetration_analysis['current_members'] = penetration_analysis['zip_code'].map(member_counts).fillna(0)

# Calculate market share and penetration
penetration_analysis['penetration_rate'] = (
    penetration_analysis['current_members'] / 
    penetration_analysis['population'] * 1000
)

penetration_analysis['market_share_potential'] = (
    penetration_analysis['fitness_interest_score'] * 
    penetration_analysis['population'] / 1000
)

penetration_analysis['current_share_pct'] = (
    penetration_analysis['current_members'] / 
    penetration_analysis['market_share_potential'] * 100
).fillna(0)

print("\n📊 Market Penetration Summary:")
print(f"   Total Addressable Market: {penetration_analysis['population'].sum():,} people")
print(f"   Current Members: {penetration_analysis['current_members'].sum():.0f}")
print(f"   Average Penetration Rate: {penetration_analysis['penetration_rate'].mean():.2f} per 1000")
print(f"   Average Market Share: {penetration_analysis['current_share_pct'].mean():.2f}%")

# Identify high-potential, low-penetration areas
penetration_analysis['opportunity_score'] = (
    penetration_analysis['market_share_potential'] * 0.4 +
    (100 - penetration_analysis['current_share_pct']) * 0.3 +
    (1 - penetration_analysis['competitor_density']) * 30
)

print("\n🎯 Top 10 Market Opportunity Zip Codes:")
top_opportunities = penetration_analysis.nlargest(10, 'opportunity_score')[
    ['zip_code', 'population', 'current_members', 'penetration_rate', 
     'competitor_density', 'opportunity_score', 'expansion_priority']
]
print(top_opportunities)

# Competitive analysis
print("\n⚔️ Competitive Landscape:")
competitive_summary = penetration_analysis.groupby('expansion_priority').agg({
    'zip_code': 'count',
    'population': 'sum',
    'current_members': 'sum',
    'competitor_density': 'mean',
    'fitness_interest_score': 'mean'
}).round(2)
print(competitive_summary)

# ============================================================================
# SECTION 7: EXPANSION PLANNING ALGORITHMS
# ============================================================================

print("\n" + "="*80)
print("🚀 TASK 5: EXPANSION PLANNING & RECOMMENDATIONS")
print("="*80)

# Create expansion scoring model
expansion_candidates = penetration_analysis.copy()

# Calculate weighted expansion score
expansion_candidates['expansion_score'] = (
    expansion_candidates['population'] / 10000 * 20 +  # Population weight
    expansion_candidates['median_income'] / 100000 * 15 +  # Income weight
    expansion_candidates['age_18_35_pct'] * 100 * 15 +  # Age demographic weight
    expansion_candidates['fitness_interest_score'] * 100 * 25 +  # Fitness interest weight
    (1 - expansion_candidates['competitor_density']) * 25  # Low competition weight
)

# Adjust for current presence
expansion_candidates.loc[expansion_candidates['current_members'] > 0, 'expansion_score'] *= 0.5

# Filter high-priority candidates
high_priority = expansion_candidates[
    (expansion_candidates['expansion_score'] > expansion_candidates['expansion_score'].quantile(0.7)) &
    (expansion_candidates['current_members'] == 0)
].sort_values('expansion_score', ascending=False)

print("\n🎯 TOP 5 EXPANSION RECOMMENDATIONS:")
print("="*80)
for idx, row in high_priority.head(5).iterrows():
    print(f"\n#{idx+1}. Zip Code: {row['zip_code']}")
    print(f"   City: {row['city']}, {row['state']}")
    print(f"   Population: {row['population']:,}")
    print(f"   Median Income: ${row['median_income']:,}")
    print(f"   Fitness Interest: {row['fitness_interest_score']:.2f}")
    print(f"   Competitor Density: {row['competitor_density']:.2f}")
    print(f"   Expansion Score: {row['expansion_score']:.1f}")
    print(f"   Priority: {row['expansion_priority']}")
    
    # Estimate potential members
    potential_members = row['population'] * row['fitness_interest_score'] * 0.02
    print(f"   Estimated Potential Members: {potential_members:.0f}")
    
    # ROI estimation
    estimated_revenue = potential_members * 600  # Avg annual revenue per member
    facility_cost = 500000  # Estimated facility setup cost
    roi_years = facility_cost / estimated_revenue if estimated_revenue > 0 else 999
    print(f"   Estimated Annual Revenue: ${estimated_revenue:,.0f}")
    print(f"   ROI Payback Period: {roi_years:.1f} years")

# Clustering analysis for optimal facility placement
print("\n" + "-"*80)
print("📍 OPTIMAL FACILITY PLACEMENT ANALYSIS (K-Means Clustering)")
print("-"*80)

# Prepare data for clustering
cluster_data = expansion_candidates[expansion_candidates['current_members'] == 0].copy()
features_for_clustering = cluster_data[['population', 'median_income', 'fitness_interest_score']].copy()

# Normalize features
scaler = StandardScaler()
features_normalized = scaler.fit_transform(features_for_clustering)

# Perform K-Means clustering (3 clusters for 3 potential new facilities)
kmeans = KMeans(n_clusters=3, random_state=42, n_init=10)
cluster_data['cluster'] = kmeans.fit_predict(features_normalized)

print("\n🎯 Recommended Facility Clusters:")
for cluster_id in range(3):
    cluster_zips = cluster_data[cluster_data['cluster'] == cluster_id]
    best_location = cluster_zips.nlargest(1, 'expansion_score').iloc[0]
    
    print(f"\n   Cluster {cluster_id + 1}: Zip Code {best_location['zip_code']}")
    print(f"   Total Population in Cluster: {cluster_zips['population'].sum():,}")
    print(f"   Average Income: ${cluster_zips['median_income'].mean():,.0f}")
    print(f"   Avg Fitness Interest: {cluster_zips['fitness_interest_score'].mean():.2f}")

# ============================================================================
# SECTION 8: INTERACTIVE FOLIUM MAPS
# ============================================================================

print("\n" + "="*80)
print("🗺️ GENERATING INTERACTIVE FOLIUM MAPS")
print("="*80)

# Map 1: Member Distribution Map with Facilities
print("\n1. Creating Member Distribution Map...")
center_lat = member_geo_df['latitude'].mean()
center_lng = member_geo_df['longitude'].mean()

member_map = folium.Map(
    location=[center_lat, center_lng],
    zoom_start=12,
    tiles='OpenStreetMap'
)

# Add facility markers
for idx, facility in facility_df.iterrows():
    folium.Marker(
        location=[facility['latitude'], facility['longitude']],
        popup=f"""
        <b>{facility['facility_name']}</b><br>
        Capacity: {facility['capacity']}<br>
        Members: {facility['current_members']}<br>
        Utilization: {facility['utilization_rate']*100:.0f}%<br>
        Market Share: {facility['market_share']*100:.0f}%
        """,
        icon=folium.Icon(color='red', icon='home', prefix='fa'),
        tooltip=facility['facility_name']
    ).add_to(member_map)

# Add member markers with clustering
member_cluster = MarkerCluster(name='Members').add_to(member_map)

for idx, member in member_geo_df.iterrows():
    folium.CircleMarker(
        location=[member['latitude'], member['longitude']],
        radius=5,
        popup=f"""
        Member ID: {member['member_id']}<br>
        Tier: {member['membership_tier']}<br>
        Distance: {member['distance_to_gym_km']:.1f} km<br>
        Travel Time: {member['travel_time_minutes']} min<br>
        Frequency: {member['visit_frequency']}
        """,
        color='blue',
        fill=True,
        fillOpacity=0.6
    ).add_to(member_cluster)

# Add heatmap layer
heat_data = [[row['latitude'], row['longitude']] for idx, row in member_geo_df.iterrows()]
HeatMap(heat_data, name='Member Density Heatmap', radius=15, blur=25).add_to(member_map)

# Add layer control
folium.LayerControl().add_to(member_map)

# Save map
member_map.save('member_distribution_map.html')
print("   ✅ Saved: member_distribution_map.html")

# Map 2: Expansion Opportunity Map
print("\n2. Creating Expansion Opportunity Map...")
expansion_map = folium.Map(
    location=[center_lat, center_lng],
    zoom_start=11,
    tiles='CartoDB positron'
)

# Add existing facilities
for idx, facility in facility_df.iterrows():
    folium.Marker(
        location=[facility['latitude'], facility['longitude']],
        popup=f"<b>Existing: {facility['facility_name']}</b>",
        icon=folium.Icon(color='green', icon='check', prefix='fa')
    ).add_to(expansion_map)

# Add top expansion opportunities
for idx, location in high_priority.head(10).iterrows():
    # Get approximate coordinates for zip code centroid
    zip_members = member_geo_df[member_geo_df['zip_code'] == location['zip_code']]
    if len(zip_members) > 0:
        lat = zip_members['latitude'].mean()
        lng = zip_members['longitude'].mean()
    else:
        # Use facility average if no members in that zip
        lat = facility_df['latitude'].mean()
        lng = facility_df['longitude'].mean()
    
    folium.CircleMarker(
        location=[lat, lng],
        radius=location['expansion_score'] / 5,
        popup=f"""
        <b>Expansion Opportunity</b><br>
        Zip: {location['zip_code']}<br>
        Population: {location['population']:,}<br>
        Score: {location['expansion_score']:.1f}<br>
        Priority: {location['expansion_priority']}
        """,
        color='orange',
        fill=True,
        fillColor='yellow',
        fillOpacity=0.7
    ).add_to(expansion_map)

expansion_map.save('expansion_opportunities_map.html')
print("   ✅ Saved: expansion_opportunities_map.html")

# Map 3: Accessibility & Underserved Areas
print("\n3. Creating Accessibility Analysis Map...")
accessibility_map = folium.Map(
    location=[center_lat, center_lng],
    zoom_start=12,
    tiles='OpenStreetMap'
)

# Add facilities
for idx, facility in facility_df.iterrows():
    folium.Marker(
        location=[facility['latitude'], facility['longitude']],
        popup=f"<b>{facility['facility_name']}</b>",
        icon=folium.Icon(color='darkblue', icon='home', prefix='fa')
    ).add_to(accessibility_map)

# Color code members by accessibility
for idx, member in member_geo_df.iterrows():
    if member['distance_to_gym_km'] > underserved_threshold_distance:
        color = 'red'
        category = 'Underserved (High Distance)'
    elif member['travel_time_minutes'] > underserved_threshold_time:
        color = 'orange'
        category = 'Moderate Access'
    else:
        color = 'green'
        category = 'Well Served'
    
    folium.CircleMarker(
        location=[member['latitude'], member['longitude']],
        radius=4,
        popup=f"""
        {category}<br>
        Distance: {member['distance_to_gym_km']:.1f} km<br>
        Travel: {member['travel_time_minutes']} min<br>
        Transport: {member['transportation_mode']}
        """,
        color=color,
        fill=True,
        fillOpacity=0.7
    ).add_to(accessibility_map)

accessibility_map.save('accessibility_analysis_map.html')
print("   ✅ Saved: accessibility_analysis_map.html")

# ============================================================================
# SECTION 9: STATISTICAL INSIGHTS & RECOMMENDATIONS
# ============================================================================

print("\n" + "="*80)
print("📈 STATISTICAL INSIGHTS & STRATEGIC RECOMMENDATIONS")
print("="*80)

# Key Performance Indicators
print("\n🎯 KEY PERFORMANCE INDICATORS:")
print(f"   Total Members: {len(member_geo_df)}")
print(f"   Total Facilities: {len(facility_df)}")
print(f"   Average Distance to Gym: {member_geo_df['distance_to_gym_km'].mean():.2f} km")
print(f"   Average Travel Time: {member_geo_df['travel_time_minutes'].mean():.1f} minutes")
print(f"   Member Concentration: {len(member_geo_df['zip_code'].unique())} zip codes")
print(f"   Average Facility Utilization: {facility_df['utilization_rate'].mean()*100:.1f}%")

# Distribution insights
print("\n📊 DISTRIBUTION INSIGHTS:")
tier_dist = member_geo_df['membership_tier'].value_counts()
print(f"   Premium Members: {tier_dist.get('premium', 0)} ({tier_dist.get('premium', 0)/len(member_geo_df)*100:.1f}%)")
print(f"   Standard Members: {tier_dist.get('standard', 0)} ({tier_dist.get('standard', 0)/len(member_geo_df)*100:.1f}%)")
print(f"   Basic Members: {tier_dist.get('basic', 0)} ({tier_dist.get('basic', 0)/len(member_geo_df)*100:.1f}%)")

freq_dist = member_geo_df['visit_frequency'].value_counts()
print(f"\n   High Frequency: {freq_dist.get('high', 0)} ({freq_dist.get('high', 0)/len(member_geo_df)*100:.1f}%)")
print(f"   Medium Frequency: {freq_dist.get('medium', 0)} ({freq_dist.get('medium', 0)/len(member_geo_df)*100:.1f}%)")
print(f"   Low Frequency: {freq_dist.get('low', 0)} ({freq_dist.get('low', 0)/len(member_geo_df)*100:.1f}%)")

# Strategic recommendations
print("\n💡 STRATEGIC RECOMMENDATIONS:")
print("\n1. IMMEDIATE ACTIONS:")
print("   • Focus on underserved zip codes with high travel times")
print("   • Improve transportation options for members >30 min travel time")
print("   • Target premium membership sales in high-income areas")

print("\n2. SHORT-TERM (3-6 months):")
print("   • Launch targeted marketing in top 5 expansion zip codes")
print("   • Analyze facility capacity constraints at gym_main (90% utilization)")
print("   • Implement shuttle service for underserved areas")

print("\n3. LONG-TERM (6-12 months):")
top_expansion = high_priority.head(1).iloc[0]
print(f"   • Open new facility in Zip Code {top_expansion['zip_code']} (highest expansion score)")
print("   • Expand capacity at gym_main to accommodate growing demand")
print("   • Develop strategic partnerships in high-competitor areas")

print("\n4. MARKET PENETRATION STRATEGY:")
print("   • Current market penetration: Low in several high-potential areas")
print("   • Opportunity: 10+ zip codes with <2 members but high fitness interest")
print("   • Recommendation: Launch neighborhood ambassador program")

# ============================================================================
# SECTION 10: EXPORT RESULTS FOR DASHBOARD
# ============================================================================

print("\n" + "="*80)
print("💾 EXPORTING RESULTS FOR STREAMLIT DASHBOARD")
print("="*80)

# Export processed datasets
distribution_enhanced.to_csv('processed_member_distribution.csv', index=False)
print("✅ Exported: processed_member_distribution.csv")

demand_by_zip.to_csv('processed_demand_analysis.csv', index=False)
print("✅ Exported: processed_demand_analysis.csv")

accessibility_metrics.to_csv('processed_accessibility_metrics.csv', index=False)
print("✅ Exported: processed_accessibility_metrics.csv")

penetration_analysis.to_csv('processed_market_penetration.csv', index=False)
print("✅ Exported: processed_market_penetration.csv")

high_priority.to_csv('expansion_recommendations.csv', index=False)
print("✅ Exported: expansion_recommendations.csv")

# Export summary statistics
summary_stats = {
    'total_members': len(member_geo_df),
    'total_facilities': len(facility_df),
    'avg_distance_km': float(member_geo_df['distance_to_gym_km'].mean()),
    'avg_travel_time': float(member_geo_df['travel_time_minutes'].mean()),
    'unique_zip_codes': len(member_geo_df['zip_code'].unique()),
    'avg_utilization': float(facility_df['utilization_rate'].mean()),
    'underserved_members': len(underserved_members),
    'top_expansion_zip': str(top_expansion['zip_code']),
    'analysis_date': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
}

with open('geo_analytics_summary.json', 'w') as f:
    json.dump(summary_stats, f, indent=4)
print("✅ Exported: geo_analytics_summary.json")

print("\n" + "="*80)
print("✅ GEO ANALYTICS ANALYSIS COMPLETE!")
print("="*80)
print("\n📁 Generated Files:")
print("   • member_distribution_map.html")
print("   • expansion_opportunities_map.html")
print("   • accessibility_analysis_map.html")
print("   • processed_member_distribution.csv")
print("   • processed_demand_analysis.csv")
print("   • processed_accessibility_metrics.csv")
print("   • processed_market_penetration.csv")
print("   • expansion_recommendations.csv")
print("   • geo_analytics_summary.json")

print("\n🚀 Next Steps:")
print("   1. Review interactive HTML maps for visual insights")
print("   2. Load processed CSV files into Streamlit dashboard")
print("   3. Present expansion recommendations to stakeholders")
print("   4. Implement strategic actions based on findings")

print("\n" + "="*80)

'pip' is not recognized as an internal or external command,
operable program or batch file.


✅ All libraries imported successfully!
📅 Analysis Date: 2025-10-02 03:29:05

📂 LOADING GEO ANALYTICS DATASETS


FileNotFoundError: [Errno 2] No such file or directory: 'member_geo_data.csv'