# Map Visualization - Geographic Data

**Use Case**: Display geographic/spatial data, regional analysis, location-based insights

This notebook demonstrates how to create effective map visualizations for geographic data analysis.


In [None]:
# Import necessary libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from mpl_toolkits.basemap import Basemap
import folium
from folium import plugins
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import geopandas as gpd
from shapely.geometry import Point
import warnings
warnings.filterwarnings('ignore')

# Set style for better-looking plots
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("viridis")

# Set random seed for reproducibility
np.random.seed(42)

print("Note: Some libraries (like basemap, geopandas) may need installation:")
print("pip install basemap geopandas folium plotly")


In [None]:
# Create sample geographic datasets
# 1. US Cities data
us_cities = pd.DataFrame({
    'city': ['New York', 'Los Angeles', 'Chicago', 'Houston', 'Phoenix', 
             'Philadelphia', 'San Antonio', 'San Diego', 'Dallas', 'San Jose',
             'Austin', 'Jacksonville', 'Fort Worth', 'Columbus', 'Charlotte'],
    'state': ['NY', 'CA', 'IL', 'TX', 'AZ', 'PA', 'TX', 'CA', 'TX', 'CA',
              'TX', 'FL', 'TX', 'OH', 'NC'],
    'latitude': [40.7589, 34.0522, 41.8781, 29.7604, 33.4484, 39.9526,
                 29.4241, 32.7157, 32.7767, 37.3387, 30.2672, 30.3322,
                 32.7555, 39.9612, 35.2271],
    'longitude': [-73.9851, -118.2437, -87.6298, -95.3698, -112.0740, -75.1652,
                  -98.4936, -117.1611, -96.7970, -121.8914, -97.7431, -81.6557,
                  -97.3308, -82.9988, -80.8431],
    'population': [8336817, 4000000, 2693976, 2320268, 1680992, 1584064,
                   1547253, 1423851, 1343573, 1021795, 978908, 911507,
                   918915, 898553, 885708],
    'gdp_per_capita': [65000, 70000, 55000, 60000, 45000, 58000,
                       40000, 68000, 62000, 110000, 48000, 42000,
                       45000, 50000, 52000]
})

# 2. World Countries data (simplified)
world_data = pd.DataFrame({
    'country': ['United States', 'China', 'Japan', 'Germany', 'United Kingdom',
                'India', 'France', 'Italy', 'Brazil', 'Canada',
                'South Korea', 'Spain', 'Australia', 'Mexico', 'Indonesia'],
    'continent': ['North America', 'Asia', 'Asia', 'Europe', 'Europe',
                  'Asia', 'Europe', 'Europe', 'South America', 'North America',
                  'Asia', 'Europe', 'Oceania', 'North America', 'Asia'],
    'latitude': [39.8283, 35.8617, 36.2048, 51.1657, 55.3781,
                 20.5937, 46.2276, 41.8719, -14.2350, 56.1304,
                 35.9078, 40.4637, -25.2744, 23.6345, -0.7893],
    'longitude': [-98.5795, 104.1954, 138.2529, 10.4515, -3.4360,
                  78.9629, 2.2137, 12.5674, -51.9253, -106.3468,
                  127.7669, -3.7492, 133.7751, -102.5528, 113.9213],
    'gdp_trillion': [21.4, 14.3, 5.1, 3.8, 2.8, 2.9, 2.6, 2.0, 1.8, 1.7,
                     1.6, 1.4, 1.4, 1.3, 1.1],
    'population_millions': [331, 1439, 126, 83, 68, 1380, 65, 60, 213, 38,
                           52, 47, 26, 129, 274]
})

# 3. Sales regions data
np.random.seed(42)
sales_regions = pd.DataFrame({
    'region': ['Northeast', 'Southeast', 'Midwest', 'Southwest', 'West', 'Northwest'],
    'center_lat': [41.5, 33.0, 41.0, 31.5, 36.0, 47.0],
    'center_lon': [-74.0, -82.0, -90.0, -99.0, -119.0, -120.0],
    'sales_2023': [2500000, 1800000, 2200000, 1600000, 3200000, 900000],
    'stores': [45, 38, 52, 35, 68, 22],
    'avg_income': [65000, 48000, 52000, 45000, 72000, 58000]
})

print("Sample geographic datasets created:")
print(f"US Cities: {us_cities.shape[0]} cities")
print(f"World Data: {world_data.shape[0]} countries") 
print(f"Sales Regions: {sales_regions.shape[0]} regions")


In [None]:
# Basic scatter plot maps (geographic coordinates)
fig, axes = plt.subplots(2, 2, figsize=(20, 16))
fig.suptitle('Geographic Data Visualization - Basic Maps', fontsize=16, fontweight='bold')

# 1. US Cities population map
ax1 = axes[0, 0]
scatter = ax1.scatter(us_cities['longitude'], us_cities['latitude'], 
                     s=us_cities['population']/20000, 
                     c=us_cities['gdp_per_capita'], 
                     cmap='viridis', alpha=0.7)
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
ax1.set_title('US Cities: Population Size & GDP per Capita')
plt.colorbar(scatter, ax=ax1, label='GDP per Capita ($)')

# Add city labels for major cities
for idx, row in us_cities.head(5).iterrows():
    ax1.annotate(row['city'], (row['longitude'], row['latitude']), 
                xytext=(5, 5), textcoords='offset points', fontsize=8)

# 2. World countries GDP
ax2 = axes[0, 1]
scatter2 = ax2.scatter(world_data['longitude'], world_data['latitude'],
                      s=world_data['gdp_trillion']*30,
                      c=world_data['population_millions'],
                      cmap='plasma', alpha=0.7)
ax2.set_xlabel('Longitude')
ax2.set_ylabel('Latitude')
ax2.set_title('World Countries: GDP & Population')
plt.colorbar(scatter2, ax=ax2, label='Population (millions)')

# 3. Sales regions bubble map
ax3 = axes[1, 0]
colors = ['red', 'blue', 'green', 'orange', 'purple', 'brown']
for i, region in sales_regions.iterrows():
    ax3.scatter(region['center_lon'], region['center_lat'],
               s=region['sales_2023']/10000,  # Size by sales
               c=colors[i], alpha=0.6, label=region['region'])
    ax3.annotate(f"${region['sales_2023']/1000000:.1f}M", 
                (region['center_lon'], region['center_lat']),
                xytext=(5, 5), textcoords='offset points', fontsize=9)
ax3.set_xlabel('Longitude')
ax3.set_ylabel('Latitude')
ax3.set_title('Sales Regions: Revenue by Location')
ax3.legend(bbox_to_anchor=(1.05, 1), loc='upper left')

# 4. Continent analysis
ax4 = axes[1, 1]
continent_colors = {'Asia': 'red', 'Europe': 'blue', 'North America': 'green',
                   'South America': 'orange', 'Oceania': 'purple'}
for continent in world_data['continent'].unique():
    continent_data = world_data[world_data['continent'] == continent]
    ax4.scatter(continent_data['longitude'], continent_data['latitude'],
               s=continent_data['gdp_trillion']*50,
               c=continent_colors[continent], alpha=0.7, label=continent)
ax4.set_xlabel('Longitude')
ax4.set_ylabel('Latitude')
ax4.set_title('World GDP by Continent')
ax4.legend()

plt.tight_layout()
plt.show()


In [None]:
# Choropleth-style visualization using matplotlib
# Create a heatmap-style geographic visualization
fig, axes = plt.subplots(2, 2, figsize=(18, 14))
fig.suptitle('Advanced Geographic Visualizations', fontsize=16, fontweight='bold')

# 1. Regional sales density simulation
ax1 = axes[0, 0]
# Create a grid for the US approximate bounds
lon_range = np.linspace(-125, -65, 50)
lat_range = np.linspace(25, 50, 40)
X, Y = np.meshgrid(lon_range, lat_range)

# Simulate sales density based on distance from major cities
Z = np.zeros_like(X)
for _, city in us_cities.iterrows():
    distance = np.sqrt((X - city['longitude'])**2 + (Y - city['latitude'])**2)
    Z += city['population'] * np.exp(-distance**2 / 50)  # Gaussian influence

contour = ax1.contourf(X, Y, Z, levels=20, cmap='YlOrRd', alpha=0.8)
ax1.scatter(us_cities['longitude'], us_cities['latitude'], 
           c='black', s=50, marker='o', alpha=0.8)
ax1.set_xlabel('Longitude')
ax1.set_ylabel('Latitude')
ax1.set_title('Population Density Heatmap (US Cities)')
plt.colorbar(contour, ax=ax1, label='Density')

# 2. GDP distribution by region
ax2 = axes[0, 1]
# Simulate regional economic activity
lon_range_world = np.linspace(-180, 180, 60)
lat_range_world = np.linspace(-60, 80, 50)
X_world, Y_world = np.meshgrid(lon_range_world, lat_range_world)

Z_world = np.zeros_like(X_world)
for _, country in world_data.iterrows():
    distance = np.sqrt((X_world - country['longitude'])**2 + (Y_world - country['latitude'])**2)
    Z_world += country['gdp_trillion'] * np.exp(-distance**2 / 400)

contour2 = ax2.contourf(X_world, Y_world, Z_world, levels=15, cmap='viridis', alpha=0.8)
ax2.scatter(world_data['longitude'], world_data['latitude'], 
           c='white', s=60, marker='o', edgecolor='black', alpha=0.9)
ax2.set_xlabel('Longitude')
ax2.set_ylabel('Latitude')
ax2.set_title('Global GDP Distribution')
plt.colorbar(contour2, ax=ax2, label='GDP Influence')

# 3. Store distribution analysis
ax3 = axes[1, 0]
# Create Voronoi-like regions for sales territories
colors_region = plt.cm.Set3(np.linspace(0, 1, len(sales_regions)))
for i, region in sales_regions.iterrows():
    # Create circular territories around each region center
    circle = plt.Circle((region['center_lon'], region['center_lat']), 
                       radius=8, color=colors_region[i], alpha=0.3)
    ax3.add_patch(circle)
    
    # Add region markers
    ax3.scatter(region['center_lon'], region['center_lat'],
               s=region['stores']*10, c=colors_region[i], 
               edgecolor='black', linewidth=2, alpha=0.9)
    
    # Add labels
    ax3.annotate(f"{region['region']}\n{region['stores']} stores", 
                (region['center_lon'], region['center_lat']),
                xytext=(0, -30), textcoords='offset points', 
                ha='center', fontsize=9, fontweight='bold')

ax3.set_xlabel('Longitude')
ax3.set_ylabel('Latitude')
ax3.set_title('Sales Territories: Store Distribution')
ax3.set_xlim(-130, -65)
ax3.set_ylim(25, 55)

# 4. Economic indicators correlation map
ax4 = axes[1, 1]
# Create a correlation visualization on the map
for _, country in world_data.iterrows():
    # Color by GDP per capita (GDP/population)
    gdp_per_capita = (country['gdp_trillion'] * 1000) / country['population_millions']
    
    # Size by total GDP
    size = country['gdp_trillion'] * 20
    
    # Color mapping
    color_intensity = gdp_per_capita / world_data['gdp_trillion'].max() * 1000 * world_data['population_millions'].min()
    
    ax4.scatter(country['longitude'], country['latitude'],
               s=size, c=gdp_per_capita, cmap='RdYlBu_r', 
               alpha=0.7, edgecolor='black', linewidth=0.5)

# Create custom colorbar
scatter_plot = ax4.scatter(world_data['longitude'], world_data['latitude'],
                          s=0, c=world_data['gdp_trillion']*1000/world_data['population_millions'], 
                          cmap='RdYlBu_r')
plt.colorbar(scatter_plot, ax=ax4, label='GDP per Capita (thousands)')

ax4.set_xlabel('Longitude')
ax4.set_ylabel('Latitude')
ax4.set_title('World GDP per Capita (size = total GDP)')
ax4.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
# Interactive map creation using Plotly (simulated output)
print("Interactive Map Visualizations (Plotly):")
print("=" * 50)

# Note: In actual implementation, these would create interactive maps
# Here we show the code structure and describe the output

print("\n1. Interactive Scatter Map - US Cities")
print("Code:")
print("""
fig = px.scatter_mapbox(us_cities, 
                       lat="latitude", lon="longitude",
                       size="population", color="gdp_per_capita",
                       hover_name="city", hover_data=["state", "population"],
                       color_continuous_scale="viridis",
                       size_max=50, zoom=3,
                       mapbox_style="open-street-map")
fig.update_layout(title="US Cities: Interactive Population & GDP Map")
fig.show()
""")
print("Output: Interactive map with zoomable, hoverable city markers sized by population")

print("\n2. Choropleth World Map")
print("Code:")
print("""
fig = px.choropleth(world_data,
                   locations="country",
                   locationmode="country names",
                   color="gdp_trillion",
                   hover_name="country",
                   hover_data=["population_millions", "continent"],
                   color_continuous_scale="Blues",
                   title="World GDP Distribution")
fig.show()
""")
print("Output: Interactive world map with countries colored by GDP")

print("\n3. Animated Time Series Map")
print("Code:")
print("""
# Create time series data
years = list(range(2018, 2024))
animated_data = []
for year in years:
    for _, region in sales_regions.iterrows():
        growth_factor = 1 + (year - 2018) * 0.08 + np.random.normal(0, 0.05)
        animated_data.append({
            'year': year,
            'region': region['region'],
            'lat': region['center_lat'],
            'lon': region['center_lon'],
            'sales': region['sales_2023'] * growth_factor,
            'stores': region['stores']
        })

animated_df = pd.DataFrame(animated_data)

fig = px.scatter_mapbox(animated_df,
                       lat="lat", lon="lon",
                       size="sales", color="region",
                       animation_frame="year",
                       hover_name="region",
                       hover_data=["sales", "stores"],
                       size_max=80, zoom=3,
                       mapbox_style="open-street-map")
fig.show()
""")
print("Output: Animated map showing sales growth over time")


In [None]:
# Statistical analysis of geographic data
print("Geographic Data Statistical Analysis:")
print("=" * 50)

# 1. Spatial distribution analysis
print("1. US Cities Spatial Analysis:")
print(f"   Latitude range: {us_cities['latitude'].min():.2f} to {us_cities['latitude'].max():.2f}")
print(f"   Longitude range: {us_cities['longitude'].min():.2f} to {us_cities['longitude'].max():.2f}")
print(f"   Geographic center: ({us_cities['latitude'].mean():.2f}, {us_cities['longitude'].mean():.2f})")

# Calculate distances between cities (simplified)
from scipy.spatial.distance import pdist, squareform
coords = us_cities[['latitude', 'longitude']].values
distances = pdist(coords, metric='euclidean')  # Simplified distance
avg_distance = np.mean(distances)
print(f"   Average inter-city distance: {avg_distance:.2f} coordinate units")

# 2. Economic geography analysis
print(f"\n2. Economic Geography Analysis:")
# GDP vs Population correlation by location
from scipy.stats import pearsonr

# US Cities analysis
pop_gdp_corr, pop_gdp_p = pearsonr(us_cities['population'], us_cities['gdp_per_capita'])
print(f"   US Cities - Population vs GDP per capita: r={pop_gdp_corr:.3f}, p={pop_gdp_p:.3f}")

# Regional clustering analysis
print(f"\n   Sales Regions Analysis:")
for _, region in sales_regions.iterrows():
    efficiency = region['sales_2023'] / region['stores']
    income_ratio = region['avg_income'] / sales_regions['avg_income'].mean()
    print(f"   {region['region']}: ${efficiency/1000:.0f}k per store, income ratio: {income_ratio:.2f}")

# 3. Continental GDP analysis
print(f"\n3. Continental Economic Analysis:")
world_continental = world_data.groupby('continent').agg({
    'gdp_trillion': ['sum', 'mean', 'count'],
    'population_millions': ['sum', 'mean']
}).round(2)

for continent in world_data['continent'].unique():
    continent_data = world_data[world_data['continent'] == continent]
    total_gdp = continent_data['gdp_trillion'].sum()
    total_pop = continent_data['population_millions'].sum()
    gdp_per_capita = (total_gdp * 1000) / total_pop if total_pop > 0 else 0
    
    print(f"   {continent}:")
    print(f"     Countries: {len(continent_data)}")
    print(f"     Total GDP: ${total_gdp:.1f}T")
    print(f"     Population: {total_pop:.0f}M")
    print(f"     GDP per capita: ${gdp_per_capita:.1f}k")

# 4. Spatial autocorrelation (simplified)
print(f"\n4. Spatial Patterns:")
print("   Geographic clustering indicators:")

# Check if similar values cluster geographically
lat_gdp_corr, _ = pearsonr(us_cities['latitude'], us_cities['gdp_per_capita'])
lon_gdp_corr, _ = pearsonr(us_cities['longitude'], us_cities['gdp_per_capita'])
print(f"   Latitude-GDP correlation: {lat_gdp_corr:.3f}")
print(f"   Longitude-GDP correlation: {lon_gdp_corr:.3f}")

if abs(lat_gdp_corr) > 0.3 or abs(lon_gdp_corr) > 0.3:
    print("   → Moderate geographic clustering detected")
else:
    print("   → Limited geographic clustering")

print(f"\nMap Visualization Insights:")
print("✓ Scatter maps show point data with geographic context")
print("✓ Choropleth maps display regional/country-level data")
print("✓ Bubble maps combine multiple variables (size, color)")
print("✓ Heatmaps reveal density and concentration patterns")
print("✓ Interactive maps enable detailed exploration")
print("✓ Time series maps show temporal geographic changes")

print(f"\nBest Practices:")
print("• Choose appropriate map projections for your region")
print("• Use color scales that are colorblind-friendly")
print("• Include clear legends and scale indicators")
print("• Consider data privacy for precise location data")
print("• Validate geographic coordinates before plotting")
print("• Use clustering for dense point data")
print("• Provide hover information for interactive maps")
