In [None]:
!pip install geopandas earthengine-api requests rasterio rasterstats scikit-learn pandas matplotlib statsmodels tensorflow shap

In [None]:
import geopandas as gpd
import pandas as pd
import ee
import requests
import os
import zipfile
from rasterstats import zonal_stats

ee.Authenticate()

ee.Initialize()

print("Earth Engine Initialized Successfully!")

In [None]:
gpkg_path = '5th cite.gpkg'

print("Defining Area of Interest from ward boundaries...")

wards_gdf = gpd.read_file(gpkg_path, layer='wards_2022')

wards_gdf = wards_gdf.to_crs("EPSG:4326")
print(f"  - Step 1: Reprojected data to WGS84 (EPSG:4326).")

bounds = wards_gdf.total_bounds
print(f"  - Step 2: Calculated total bounds: {bounds}")

aoi = ee.Geometry.Rectangle(*bounds)

print("\n AOI defined successfully using a robust bounding box.")

In [None]:
print("Preparing high-resolution data for download...")

dem = ee.Image('USGS/SRTMGL1_003').clip(aoi)
slope = ee.Terrain.slope(dem)
print("  - DEM and Slope prepared.")

lulc_recent = ee.ImageCollection('MODIS/061/MCD12Q1').sort('system:time_start', False).first().select('LC_Type1').clip(aoi)
print("  - Most recent LULC prepared.")

download_dir = 'Flood_Prediction_Data_Local'
os.makedirs(download_dir, exist_ok=True)

def download_image(image, filename, scale, directory, aoi_geom):
    path = os.path.join(directory, filename)
    try:
        url = image.getDownloadURL({'scale': scale, 'crs': 'EPSG:4326', 'region': aoi_geom})
        print(f"Downloading {filename}...")
        response = requests.get(url)
        response.raise_for_status()
        with open(path, 'wb') as f: f.write(response.content)
        if os.path.exists(path): print(f"✅ Successfully downloaded and verified: {path}\n")
        else: print(f"❌ DOWNLOAD FAILED: {path} was not created.\n")
    except Exception as e: print(f"❌ Failed to download {filename}. Reason: {e}\n")

download_image(dem, 'dem.zip', 30, download_dir, aoi)
download_image(slope, 'slope.zip', 30, download_dir, aoi)
download_image(lulc_recent, 'lulc_2023.zip', 500, download_dir, aoi)

In [None]:
print("Unzipping files...")
zip_files_to_unzip = ['dem.zip', 'slope.zip', 'lulc_2023.zip']
tif_files_to_check = ['dem.tif', 'slope.tif', 'lulc_2023.tif']

for zip_file in zip_files_to_unzip:
    path_to_zip = os.path.join(download_dir, zip_file)
    if os.path.exists(path_to_zip):
        with zipfile.ZipFile(path_to_zip, 'r') as zip_ref:
            zip_ref.extractall(download_dir)
        print(f"  - Extracted: {zip_file}")

print("\n--- Verifying Extracted Files ---")
all_files_found = True
for tif_file in tif_files_to_check:
    path_to_tif = os.path.join(download_dir, tif_file)
    if os.path.exists(path_to_tif): print(f"✅ Found: {path_to_tif}")
    else:
        print(f"❌ MISSING: {path_to_tif}")
        all_files_found = False

if not all_files_found:
    print("\n❌ One or more .tif files are missing. Please re-run the download cell.")
else:
    print("\n✅ All necessary static files are present.")

In [None]:
from sklearn.base import BaseEstimator, ClassifierMixin
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
import numpy as np

class KerasClassifier(BaseEstimator, ClassifierMixin):
    def __init__(self, build_fn=None, epochs=100, batch_size=32, verbose=0, **kwargs):
        self.build_fn = build_fn
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.kwargs = kwargs
        self.model = None
        self.label_encoder = None
        self.classes_ = None

    def build_model(self):
        model = keras.Sequential([
            layers.Dense(128, activation='relu', input_shape=(self.input_shape_,)),
            layers.Dropout(0.2),
            layers.Dense(64, activation='relu'),
            layers.Dropout(0.2),
            layers.Dense(32, activation='relu'),
            layers.Dense(self.num_classes_, activation='softmax')
        ])
        model.compile(optimizer='adam',
                     loss='categorical_crossentropy',
                     metrics=['accuracy'])
        return model

    def fit(self, X, y):
        self.input_shape_ = X.shape[1]
        self.label_encoder = LabelEncoder()
        y_encoded = self.label_encoder.fit_transform(y)
        self.classes_ = self.label_encoder.classes_
        self.num_classes_ = len(self.classes_)
        y_categorical = to_categorical(y_encoded, num_classes=self.num_classes_)

        if self.build_fn is None:
            self.model = self.build_model()
        else:
            self.model = self.build_fn(**self.kwargs)

        self.model.fit(X, y_categorical,
                      epochs=self.epochs,
                      batch_size=self.batch_size,
                      verbose=self.verbose)
        return self

    def predict(self, X):
        probabilities = self.model.predict(X, verbose=0)
        predicted_classes = np.argmax(probabilities, axis=1)
        return self.label_encoder.inverse_transform(predicted_classes)

    def predict_proba(self, X):
        return self.model.predict(X, verbose=0)

np.random.seed(42)
tf.random.set_seed(42)
import requests

In [None]:
print("Checking drainage sewer data quality...")

drainage_primary = gpd.read_file(gpkg_path, layer='swd_primary')
drainage_secondary = gpd.read_file(gpkg_path, layer='swd_secondary')
drainage_tertiary = gpd.read_file(gpkg_path, layer='swd_tertiary')

print(f"Primary drainage: {len(drainage_primary)} segments")
print(f"Secondary drainage: {len(drainage_secondary)} segments")
print(f"Tertiary drainage: {len(drainage_tertiary)} segments")

empty_primary = drainage_primary.geometry.is_empty.sum()
empty_secondary = drainage_secondary.geometry.is_empty.sum()
empty_tertiary = drainage_tertiary.geometry.is_empty.sum()

print(f"Empty geometries - Primary: {empty_primary}, Secondary: {empty_secondary}, Tertiary: {empty_tertiary}")

total_length_primary = drainage_primary.length.sum() if not drainage_primary.empty else 0
total_length_secondary = drainage_secondary.length.sum() if not drainage_secondary.empty else 0
total_length_tertiary = drainage_tertiary.length.sum() if not drainage_tertiary.empty else 0

print(f"Total lengths - Primary: {total_length_primary:.2f}, Secondary: {total_length_secondary:.2f}, Tertiary: {total_length_tertiary:.2f}")

if total_length_primary < 1000 or total_length_secondary < 5000 or total_length_tertiary < 10000 or empty_primary > 0 or empty_secondary > 0 or empty_tertiary > 0:
    print("⚠️ Drainage data appears inadequate. Consider extracting from Earth Engine.")
    use_ee_drainage = True
else:
    print("✅ Drainage data looks good.")
    use_ee_drainage = False

In [None]:
print("\nRequesting server-side daily rainfall calculation for each ward...")

wards = wards_gdf.copy()

ward_identifier_column = 'Name'

start_date = '2010-01-01'
end_date = '2024-12-31'
daily_collection = (
    ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY')
    .filterDate(start_date, end_date)
    .select('precipitation')
 )

projected_crs = wards.estimate_utm_crs()
wards_centroids = wards.to_crs(projected_crs).centroid.to_crs(wards.crs)
ee_features = []
for idx, geom in zip(wards.index, wards_centroids):
    geom_json = gpd.GeoSeries([geom]).__geo_interface__
    coords = geom_json['features'][0]['geometry']
    ee_feature = ee.Feature(ee.Geometry(coords), {'original_id': int(idx)})
    ee_features.append(ee_feature)
wards_ee_centroids = ee.FeatureCollection(ee_features)
print("  - Successfully created server-side FeatureCollection of ward centroids.")

print("  - Downloading daily rainfall time series for each ward (this may take several minutes)...")
all_rainfall_rows = []
wards_list = wards_ee_centroids.toList(wards_ee_centroids.size())
num_wards = wards_ee_centroids.size().getInfo()

for i in range(num_wards):
    ward_feature = ee.Feature(wards_list.get(i))
    ward_info = ward_feature.getInfo()
    ward_id = ward_info['properties']['original_id']
    try:
        region_data = daily_collection.getRegion(
            geometry=ward_feature.geometry(),
            scale=5566
        ).getInfo()
    except Exception as e:
        print(f"    ❌ Failed to fetch data for ward {ward_id}: {e}")
        continue

    if len(region_data) <= 1:
        print(f"    ⚠️ No rainfall data for ward {ward_id}.")
        continue

    headers = region_data[0]
    data_rows = region_data[1:]
    ward_df = pd.DataFrame(data_rows, columns=headers)
    ward_df = ward_df[['time', 'precipitation']].dropna()
    if ward_df.empty:
        continue
    ward_df['date'] = pd.to_datetime(ward_df['time'], unit='ms').dt.date.astype(str)
    ward_df['rainfall'] = ward_df['precipitation'].astype(float)
    ward_df['original_id'] = ward_id
    all_rainfall_rows.append(ward_df[['original_id', 'date', 'rainfall']])

    if (i + 1) % 10 == 0:
        print(f"    ... processed {i + 1} of {num_wards} wards.")

if not all_rainfall_rows:
    raise RuntimeError("No rainfall data retrieved. Please verify the AOI and date range.")

rainfall_df_long = pd.concat(all_rainfall_rows, ignore_index=True)
rainfall_df_long = rainfall_df_long.drop_duplicates(subset=['original_id', 'date'])
rainfall_df = rainfall_df_long.pivot(index='original_id', columns='date', values='rainfall').sort_index(axis=1)
rainfall_df = rainfall_df.fillna(0)

csv_path = os.path.join(download_dir, 'ward_daily_rainfall.csv')
rainfall_df.to_csv(csv_path)
print(f"\n✅ Successfully processed and saved all rainfall data to '{csv_path}'.")
wards = wards.join(rainfall_df)

date_columns = [
    col for col in wards.columns
    if isinstance(col, str) and col[:4].isdigit() and '-' in col
 ]
wards[date_columns] = wards[date_columns].fillna(0)
wards['average_rainfall'] = wards[date_columns].mean(axis=1)
wards['total_rainfall'] = wards[date_columns].sum(axis=1)
wards['max_daily_rainfall'] = wards[date_columns].max(axis=1)
wards['rainfall_std'] = wards[date_columns].std(axis=1).fillna(0)

print("✅ Feature extraction complete.")

In [None]:
print("\nExtracting static geospatial features from downloaded rasters...")

wards_clean = wards[~wards.geometry.is_empty].copy()

dem_path = os.path.join(download_dir, 'dem.tif')
slope_path = os.path.join(download_dir, 'slope.tif')
lulc_path = os.path.join(download_dir, 'lulc_2023.tif')

if os.path.exists(dem_path):
    dem_stats = zonal_stats(wards_clean, dem_path, stats="mean", nodata=-9999)
    wards.loc[wards_clean.index, 'elevation'] = [stat['mean'] if stat['mean'] is not None else 0 for stat in dem_stats]
    print("  - Elevation extracted.")
else:
    wards['elevation'] = 0
    print("  - DEM file not found, setting elevation to 0.")

if os.path.exists(slope_path):
    slope_stats = zonal_stats(wards_clean, slope_path, stats="mean", nodata=-9999)
    wards.loc[wards_clean.index, 'slope'] = [stat['mean'] if stat['mean'] is not None else 0 for stat in slope_stats]
    print("  - Slope extracted.")
else:
    wards['slope'] = 0
    print("  - Slope file not found, setting slope to 0.")

if os.path.exists(lulc_path):
    lulc_stats = zonal_stats(wards_clean, lulc_path, stats="majority", nodata=-9999)
    wards.loc[wards_clean.index, 'land_cover'] = [stat['majority'] if stat['majority'] is not None else 0 for stat in lulc_stats]
    print("  - Land cover extracted.")
else:
    wards['land_cover'] = 0
    print("  - LULC file not found, setting land_cover to 0.")

wards['elevation'] = wards['elevation'].fillna(0)
wards['slope'] = wards['slope'].fillna(0)
wards['land_cover'] = wards['land_cover'].fillna(0)

print("✅ Static geospatial features extracted.")

In [None]:
from collections import defaultdict

if not use_ee_drainage:
    print("\nCalculating Drainage Density from GPKG...")
    wards_cleaned = wards[~wards.geometry.is_empty].copy()
    drainage_primary = gpd.read_file(gpkg_path, layer='swd_primary').to_crs(wards.crs)
    drainage_secondary = gpd.read_file(gpkg_path, layer='swd_secondary').to_crs(wards.crs)
    drainage_tertiary = gpd.read_file(gpkg_path, layer='swd_tertiary').to_crs(wards.crs)
    all_drainage = pd.concat([drainage_primary, drainage_secondary, drainage_tertiary])

    projected_crs = wards_cleaned.estimate_utm_crs()
    wards_proj = wards_cleaned.to_crs(projected_crs)
    all_drainage_proj = all_drainage.to_crs(projected_crs)

    ward_drainage = gpd.overlay(wards_proj, all_drainage_proj, how='intersection', keep_geom_type=False)

    if ward_identifier_column in ward_drainage.columns:
        ward_id_after_overlay = ward_identifier_column
    elif f"{ward_identifier_column}_1" in ward_drainage.columns:
        ward_id_after_overlay = f"{ward_identifier_column}_1"
    elif f"{ward_identifier_column}_2" in ward_drainage.columns:
        ward_id_after_overlay = f"{ward_identifier_column}_2"
    else:
        raise ValueError(f"Cannot find {ward_identifier_column} column in overlay result. Available columns: {ward_drainage.columns.tolist()}")
    
    drainage_lengths_dict = defaultdict(float)

    print("  - Calculating drainage length for each ward individually...")

    for index, row in ward_drainage.iterrows():
        ward_id = row[ward_id_after_overlay]
        if isinstance(ward_id, pd.Series):
            ward_id = ward_id.iloc[0]
        segment_length = row.geometry.length
        drainage_lengths_dict[ward_id] += segment_length

    drainage_length = pd.Series(drainage_lengths_dict)

    ward_areas = wards_proj.set_index(ward_identifier_column).geometry.area

    drainage_density = (drainage_length / ward_areas).rename('drainage_density')

    if 'drainage_density' in wards.columns:
        wards = wards.drop(columns=['drainage_density'])
    
    wards = wards.join(drainage_density, on=ward_identifier_column)
    wards['drainage_density'].fillna(0, inplace=True)
    print("✅ Drainage Density calculated from GPKG.")
else:
    print("\n⚠️ Using alternative drainage extraction from Earth Engine...")
    wards['drainage_density'] = 0.01  # Placeholder
    print("✅ Placeholder drainage density set. Implement EE extraction as needed.")

In [None]:
print("\nExtracting historical traffic/mobility data from Earth Engine...")

mobility_collection = ee.ImageCollection('GOOGLE/GLOBAL_MOBILITY').filterDate('2020-01-01', '2024-12-31')

mobility_image = mobility_collection.select(['transit_stations_percent_change_from_baseline', 'workplaces_percent_change_from_baseline']).mean().clip(aoi)


wards['historical_mobility_transit'] = 0  # Placeholder
wards['historical_mobility_workplaces'] = 0  # Placeholder

print("✅ Historical mobility data extracted (placeholder).")

In [None]:
print("\nProcessing traffic congestion factors...")

wards['traffic_factor'] = wards['historical_mobility_transit'] * 0.5 + wards['historical_mobility_workplaces'] * 0.5
wards['traffic_factor'] = wards['traffic_factor'].fillna(0)

print("✅ Traffic factors integrated.")

In [None]:
print("\nExtracting real-time weather data...")

bounds = wards.total_bounds  # [minx, miny, maxx, maxy]
lat = (bounds[1] + bounds[3]) / 2  # average of min and max y
lon = (bounds[0] + bounds[2]) / 2  # average of min and max x

weather_api_key = '737bfada8cc22323afe9f0b4d87f00e2'

try:
    current_weather_url = f'http://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={weather_api_key}&units=metric'
    current_response = requests.get(current_weather_url)
    current_data = current_response.json()

    if current_response.status_code == 200:
        current_rain = current_data.get('rain', {}).get('1h', 0)  # rainfall in last hour (mm)
        current_temp = current_data['main']['temp']
        current_humidity = current_data['main']['humidity']
        print(f"Current weather - Temp: {current_temp}°C, Humidity: {current_humidity}%, Rain (1h): {current_rain}mm")
    else:
        current_rain = 0
        current_temp = 25  # default
        current_humidity = 60  # default
        print("⚠️ Could not fetch current weather data")

    forecast_url = f'http://api.openweathermap.org/data/2.5/forecast?lat={lat}&lon={lon}&appid={weather_api_key}&units=metric'
    forecast_response = requests.get(forecast_url)
    forecast_data = forecast_response.json()

    if forecast_response.status_code == 200:
        forecast_rain = []
        for item in forecast_data['list'][:40]:  # 5 days * 8 (3-hourly)
            rain = item.get('rain', {}).get('3h', 0)
            forecast_rain.append(rain)

        daily_forecast_rain = [sum(forecast_rain[i:i+8]) for i in range(0, len(forecast_rain), 8)][:5]
        print(f"5-day rainfall forecast: {daily_forecast_rain}")
    else:
        daily_forecast_rain = [0] * 5
        print("⚠️ Could not fetch weather forecast")

    wards['current_rainfall'] = current_rain
    wards['current_temperature'] = current_temp
    wards['current_humidity'] = current_humidity
    for i, rain in enumerate(daily_forecast_rain):
        wards[f'forecast_rain_day_{i+1}'] = rain

    print("✅ Real-time weather data integrated")

except Exception as e:
    print(f"❌ Error fetching weather data: {e}")
    wards['current_rainfall'] = 0
    wards['current_temperature'] = 25
    wards['current_humidity'] = 60
    for i in range(5):
        wards[f'forecast_rain_day_{i+1}'] = 0

In [None]:
print("\nSaving extracted data...")

wards.to_file('wards_extracted.geojson', driver='GeoJSON')
wards.to_pickle('wards_extracted.pkl')

print("✅ Data saved to wards_extracted.geojson and wards_extracted.pkl")