In [105]:
import requests
import pandas as pd
import numpy as np
import os

In [107]:
# Your bounding box coordinates (Shanghai area)
BBOX = {
    'north': 31.24552,
    'south': 31.23750,
    'west': 121.49647,
    'east': 121.51051
}

In [None]:
# Output path for Excel files
OUTPUT_PATH = "/data"

# Create output directory if it doesn't exist
if not os.path.exists(OUTPUT_PATH):
    os.makedirs(OUTPUT_PATH)
    print(f"Created directory: {OUTPUT_PATH}")
else:
    print(f"Directory exists: {OUTPUT_PATH}")

Directory exists: C:\Users\Wanqi.Shi\Desktop\Archive\DF_25\mlp_example


In [111]:
# Cell 3: Helper functions
def calculate_polygon_area(lons, lats):
    """Calculate approximate area of polygon"""
    if len(lons) < 3:
        return 0
    # Simple approximation using shoelace formula
    area = 0
    n = len(lons)
    for i in range(n):
        j = (i + 1) % n
        area += lons[i] * lats[j]
        area -= lons[j] * lats[i]
    return abs(area) / 2 * 111000 * 111000  # Convert to square meters approximately

def calculate_distance_to_center(lon, lat, bbox):
    """Calculate distance to bounding box center"""
    center_lon = (bbox['west'] + bbox['east']) / 2
    center_lat = (bbox['south'] + bbox['north']) / 2
    return np.sqrt((lon - center_lon)**2 + (lat - center_lat)**2)

In [113]:
# Cell 4: Download Building Data
def download_buildings(bbox, output_path):
    """Download and process building data"""
    print("Downloading building data...")
    
    # OpenStreetMap Overpass API query for buildings
    overpass_url = "http://overpass-api.de/api/interpreter"
    query = f"""
    [out:json][timeout:30];
    (
      way["building"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
    );
    out geom;
    """
    
    try:
        response = requests.get(overpass_url, params={'data': query})
        data = response.json()
        
        buildings = []
        for element in data.get('elements', []):
            if 'geometry' in element and 'tags' in element:
                # Calculate center point
                lons = [node['lon'] for node in element['geometry']]
                lats = [node['lat'] for node in element['geometry']]
                center_lon = sum(lons) / len(lons)
                center_lat = sum(lats) / len(lats)
                
                # Extract height (estimate if not available)
                height = 10  # default height
                tags = element['tags']
                
                if 'height' in tags:
                    try:
                        height = float(tags['height'].replace('m', ''))
                    except:
                        pass
                elif 'building:levels' in tags:
                    try:
                        levels = int(tags['building:levels'])
                        height = levels * 3.5  # 3.5m per floor
                    except:
                        pass
                
                buildings.append({
                    'longitude': center_lon,
                    'latitude': center_lat,
                    'height': height,
                    'building_type': tags.get('building', 'residential'),
                    'area': calculate_polygon_area(lons, lats)
                })
        
        df = pd.DataFrame(buildings)
        if df.empty:
            print("No buildings found, creating dummy data...")
            df = create_dummy_buildings(bbox)
        
        # Save to Excel
        excel_path = os.path.join(output_path, "building_height.xlsx")
        df.to_excel(excel_path, index=False)
        print(f"✓ Building data saved: {len(df)} records")
        return df
        
    except Exception as e:
        print(f"Error downloading buildings, creating dummy data: {e}")
        df = create_dummy_buildings(bbox)
        excel_path = os.path.join(output_path, "building_height.xlsx")
        df.to_excel(excel_path, index=False)
        return df

In [115]:
# Cell 5: Create dummy building data (fallback)
def create_dummy_buildings(bbox):
    """Create dummy building data if API fails"""
    n_buildings = 30
    lons = np.random.uniform(bbox['west'], bbox['east'], n_buildings)
    lats = np.random.uniform(bbox['south'], bbox['north'], n_buildings)
    
    buildings = []
    for i in range(n_buildings):
        buildings.append({
            'longitude': lons[i],
            'latitude': lats[i],
            'height': np.random.uniform(5, 50),
            'building_type': np.random.choice(['residential', 'commercial', 'industrial']),
            'area': np.random.uniform(100, 2000)
        })
    return pd.DataFrame(buildings)

In [117]:
# Cell 6: Run building data download
buildings_df = download_buildings(BBOX, OUTPUT_PATH)
print(f"Building data shape: {buildings_df.shape}")
buildings_df.head()

Downloading building data...
✓ Building data saved: 178 records
Building data shape: (178, 5)


Unnamed: 0,longitude,latitude,height,building_type,area
0,121.505043,31.238488,196.0,commercial,3818.06607
1,121.498023,31.240448,17.5,yes,2975.677731
2,121.501015,31.242456,157.5,yes,2318.54507
3,121.498885,31.244155,90.0,yes,2924.592907
4,121.498965,31.240464,226.0,yes,1934.754742


In [118]:
# Cell 7: Download Water Data
def download_water(bbox, output_path):
    """Download and process water data"""
    print("Downloading water data...")
    
    overpass_url = "http://overpass-api.de/api/interpreter"
    query = f"""
    [out:json][timeout:30];
    (
      way["natural"="water"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
      way["waterway"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
    );
    out geom;
    """
    
    try:
        response = requests.get(overpass_url, params={'data': query})
        data = response.json()
        
        water_features = []
        for element in data.get('elements', []):
            if 'geometry' in element:
                lons = [node['lon'] for node in element['geometry']]
                lats = [node['lat'] for node in element['geometry']]
                center_lon = sum(lons) / len(lons)
                center_lat = sum(lats) / len(lats)
                
                tags = element.get('tags', {})
                water_features.append({
                    'longitude': center_lon,
                    'latitude': center_lat,
                    'water_type': tags.get('natural', tags.get('waterway', 'water')),
                    'area': calculate_polygon_area(lons, lats),
                    'distance_to_center': calculate_distance_to_center(center_lon, center_lat, bbox)
                })
        
        df = pd.DataFrame(water_features)
        if df.empty:
            print("No water features found, creating dummy data...")
            df = create_dummy_water(bbox)
        
        excel_path = os.path.join(output_path, "water.xlsx")
        df.to_excel(excel_path, index=False)
        print(f"✓ Water data saved: {len(df)} records")
        return df
        
    except Exception as e:
        print(f"Error downloading water, creating dummy data: {e}")
        df = create_dummy_water(bbox)
        excel_path = os.path.join(output_path, "water.xlsx")
        df.to_excel(excel_path, index=False)
        return df

In [121]:
# Cell 8: Create dummy water data (fallback)
def create_dummy_water(bbox):
    """Create dummy water data if API fails"""
    n_water = 5
    lons = np.random.uniform(bbox['west'], bbox['east'], n_water)
    lats = np.random.uniform(bbox['south'], bbox['north'], n_water)
    
    water = []
    for i in range(n_water):
        water.append({
            'longitude': lons[i],
            'latitude': lats[i],
            'water_type': np.random.choice(['river', 'pond', 'canal']),
            'area': np.random.uniform(50, 500),
            'distance_to_center': calculate_distance_to_center(lons[i], lats[i], bbox)
        })
    return pd.DataFrame(water)

In [123]:
# Cell 9: Run water data download
water_df = download_water(BBOX, OUTPUT_PATH)
print(f"Water data shape: {water_df.shape}")
water_df.head()

# Cell 10: Download Green Area Data
def download_green(bbox, output_path):
    """Download and process green area data"""
    print("Downloading green area data...")
    
    overpass_url = "http://overpass-api.de/api/interpreter"
    query = f"""
    [out:json][timeout:30];
    (
      way["landuse"="forest"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
      way["landuse"="grass"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
      way["leisure"="park"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
      way["natural"="wood"]({bbox['south']},{bbox['west']},{bbox['north']},{bbox['east']});
    );
    out geom;
    """
    
    try:
        response = requests.get(overpass_url, params={'data': query})
        data = response.json()
        
        green_areas = []
        for element in data.get('elements', []):
            if 'geometry' in element:
                lons = [node['lon'] for node in element['geometry']]
                lats = [node['lat'] for node in element['geometry']]
                center_lon = sum(lons) / len(lons)
                center_lat = sum(lats) / len(lats)
                
                tags = element.get('tags', {})
                green_type = tags.get('landuse', tags.get('leisure', tags.get('natural', 'green')))
                
                green_areas.append({
                    'longitude': center_lon,
                    'latitude': center_lat,
                    'green_type': green_type,
                    'area': calculate_polygon_area(lons, lats),
                    'vegetation_density': np.random.uniform(0.3, 1.0)  # Simulated density
                })
        
        df = pd.DataFrame(green_areas)
        if df.empty:
            print("No green areas found, creating dummy data...")
            df = create_dummy_green(bbox)
        
        excel_path = os.path.join(output_path, "green.xlsx")
        df.to_excel(excel_path, index=False)
        print(f"✓ Green area data saved: {len(df)} records")
        return df
        
    except Exception as e:
        print(f"Error downloading green areas, creating dummy data: {e}")
        df = create_dummy_green(bbox)
        excel_path = os.path.join(output_path, "green.xlsx")
        df.to_excel(excel_path, index=False)
        return df

Downloading water data...
✓ Water data saved: 3 records
Water data shape: (3, 5)


In [124]:
# Cell 11: Create dummy green data (fallback)
def create_dummy_green(bbox):
    """Create dummy green data if API fails"""
    n_green = 15
    lons = np.random.uniform(bbox['west'], bbox['east'], n_green)
    lats = np.random.uniform(bbox['south'], bbox['north'], n_green)
    
    green = []
    for i in range(n_green):
        green.append({
            'longitude': lons[i],
            'latitude': lats[i],
            'green_type': np.random.choice(['park', 'forest', 'grass']),
            'area': np.random.uniform(100, 1000),
            'vegetation_density': np.random.uniform(0.3, 1.0)
        })
    return pd.DataFrame(green)

# Cell 12: Run green data download
green_df = download_green(BBOX, OUTPUT_PATH)
print(f"Green data shape: {green_df.shape}")
green_df.head()

Downloading green area data...
✓ Green area data saved: 41 records
Green data shape: (41, 5)


Unnamed: 0,longitude,latitude,green_type,area,vegetation_density
0,121.501764,31.240218,park,98395.15459,0.879564
1,121.503469,31.238118,park,17134.015488,0.873586
2,121.495749,31.242831,park,36654.819881,0.412909
3,121.500498,31.243535,park,7464.435582,0.860499
4,121.498189,31.239613,grass,4263.698872,0.59696


In [126]:
# Cell 12: Run green data download
green_df = download_green(BBOX, OUTPUT_PATH)
print(f"Green data shape: {green_df.shape}")
green_df.head()

Downloading green area data...
✓ Green area data saved: 41 records
Green data shape: (41, 5)


Unnamed: 0,longitude,latitude,green_type,area,vegetation_density
0,121.501764,31.240218,park,98395.15459,0.516228
1,121.503469,31.238118,park,17134.015488,0.820841
2,121.495749,31.242831,park,36654.819881,0.763472
3,121.500498,31.243535,park,7464.435582,0.954708
4,121.498189,31.239613,grass,4263.698872,0.484572


In [127]:
# Cell 13: Create Heat Degree Data
def create_heat_degree(bbox, output_path):
    """Create heat degree data (simulated thermal data)"""
    print("Creating heat degree data...")
    
    # Create grid points within the bounding box
    n_points = 50  # Number of data points
    
    # Generate random points within the bounding box
    lons = np.random.uniform(bbox['west'], bbox['east'], n_points)
    lats = np.random.uniform(bbox['south'], bbox['north'], n_points)
    
    heat_data = []
    for i in range(n_points):
        # Simulate temperature based on location and urban factors
        distance_from_center = calculate_distance_to_center(lons[i], lats[i], bbox)
        
        # Base temperature with urban heat island effect
        base_temp = 28.0  # Base temperature in Celsius
        urban_heat = max(0, 8 - distance_from_center * 1000)  # Urban heat island
        noise = np.random.normal(0, 3)  # Temperature variation
        
        temperature = base_temp + urban_heat + noise
        
        heat_data.append({
            'longitude': lons[i],
            'latitude': lats[i],
            'temperature': round(temperature, 2),
            'heat_index': round(temperature + np.random.uniform(-5, 10), 2),
            'time_of_day': np.random.choice(['morning', 'afternoon', 'evening'])
        })
    
    df = pd.DataFrame(heat_data)
    excel_path = os.path.join(output_path, "heat_degree.xlsx")
    df.to_excel(excel_path, index=False)
    print(f"✓ Heat degree data saved: {len(df)} records")
    return df


In [129]:
# Cell 14: Run heat data creation
heat_df = create_heat_degree(BBOX, OUTPUT_PATH)
print(f"Heat data shape: {heat_df.shape}")
heat_df.head()


Creating heat degree data...
✓ Heat degree data saved: 50 records
Heat data shape: (50, 5)


Unnamed: 0,longitude,latitude,temperature,heat_index,time_of_day
0,121.504834,31.240361,28.61,36.59,afternoon
1,121.496998,31.241446,33.21,31.92,morning
2,121.500228,31.243683,33.75,42.21,evening
3,121.502226,31.239633,35.27,38.59,morning
4,121.505311,31.242418,34.28,34.06,morning


In [None]:
# Cell 15: Summary and verification
print("=" * 60)
print("DATA DOWNLOAD COMPLETE!")
print("=" * 60)
print(f"Output directory: {OUTPUT_PATH}")
print("\nFiles created:")
for filename in ["building_height.xlsx", "water.xlsx", "green.xlsx", "heat_degree.xlsx"]:
    filepath = os.path.join(OUTPUT_PATH, filename)
    if os.path.exists(filepath):
        print(f"✓ {filename}")
    else:
        print(f"✗ {filename} - NOT FOUND")

print(f"\nData summary:")
print(f"Buildings: {len(buildings_df)} records")
print(f"Water features: {len(water_df)} records") 
print(f"Green areas: {len(green_df)} records")
print(f"Heat points: {len(heat_df)} records")

print("\nReady for MLP training!")

DATA DOWNLOAD COMPLETE!
Output directory: C:\Users\Wanqi.Shi\Desktop\Archive\DF_25\mlp_example

Files created:
✓ building_height.xlsx
✓ water.xlsx
✓ green.xlsx
✓ heat_degree.xlsx

Data summary:
Buildings: 178 records
Water features: 3 records
Green areas: 41 records
Heat points: 50 records

Ready for MLP training! 🎯
