In [1]:
from datetime import datetime
import pandas as pd
import numpy as np
import tensorflow as tf

STAGE 1 MODEL DATA ENGINEERING

In [40]:



hist = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\blantyre_2018-2024.csv", parse_dates=['time'])

# Ensure sorted by area and time for rolling computations
hist = hist.sort_values(['area','time']).reset_index(drop=True)

# -----------------------------
# 1. Rolling precipitation
# -----------------------------
hist['prcp_1d'] = hist.groupby('area')['prcp'].shift(1)  # yesterday's prcp
hist['prcp_3d'] = hist.groupby('area')['prcp'].rolling(3, min_periods=1).mean().reset_index(0,drop=True)
hist['prcp_7d'] = hist.groupby('area')['prcp'].rolling(7, min_periods=1).mean().reset_index(0,drop=True)
hist['prcp_30d'] = hist.groupby('area')['prcp'].rolling(30, min_periods=1).mean().reset_index(0,drop=True)

# Precipitation intensity ratio
hist['prcp_ratio'] = hist['prcp_1d'] / hist['prcp_7d']

# Antecedent Wetness Index (AWI)
hist['AWI'] = 0.6*hist['prcp_1d'] + 0.3*hist['prcp_3d'] + 0.1*hist['prcp_7d']


if 'pres' in hist.columns:
    hist['pres_24h_drop'] = hist.groupby('area')['pres'].shift(1) - hist['pres']




nino_train = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\nino2016_2023.csv", parse_dates=['date'])
nino_test  = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\nino2025.csv", parse_dates=['date'])


hist = hist.merge(nino_train.rename(columns={'date':'time'}), on='time', how='left')


stage1_features = ['time','area','prcp_1d','prcp_3d','prcp_7d','prcp_30d','prcp_ratio','AWI',
                   'pres','pres_24h_drop','wspd','nino34_sst_anomaly']


STAGE 2 DATA ENG

In [17]:
# Load topographic / spatial data
topo = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\Blantyre_data.csv")  # columns: area, elevation, slope, soil_moisture_index, dist_mz_border_km, dist_major_river_km


topo['elevation_x_slope'] = topo['elevation_m'] * topo['avg_slope_deg']


stage2_features = ['soil_moisture_index','elevation','slope','elevation_x_slope',
                   'dist_mz_border_km','dist_major_river_km','month','lsi','ofi']


In [30]:


# Load forecast and parse Date
forecast = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\forecast_data.csv")
forecast['time'] = pd.to_datetime(forecast['Date'])  # create 'time' column
forecast = forecast.rename(columns={'Forecast':'prcp'})  # precipitation column

# Drop the original Date column if you want
forecast = forecast.drop(columns=['Date'])


In [33]:
nino_test = pd.read_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\nino2025.csv")
nino_test['time'] = pd.to_datetime(nino_test['date'])
nino_test = nino_test[['time','nino34_sst_anomaly']]  # keep only relevant columns


In [34]:
forecast = forecast.merge(nino_test, on='time', how='left')


In [36]:
forecast['area'] = 'Blantyre_CBD'

forecast = forecast.sort_values(['area','time']).reset_index(drop=True)

forecast['prcp_1d'] = forecast.groupby('area')['prcp'].shift(1)
forecast['prcp_3d'] = forecast.groupby('area')['prcp'].rolling(3, min_periods=1).mean().reset_index(0, drop=True)
forecast['prcp_7d'] = forecast.groupby('area')['prcp'].rolling(7, min_periods=1).mean().reset_index(0, drop=True)
forecast['prcp_30d'] = forecast.groupby('area')['prcp'].rolling(30, min_periods=1).mean().reset_index(0, drop=True)

forecast['prcp_ratio'] = forecast['prcp_1d'] / forecast['prcp_7d']
forecast['AWI'] = 0.6*forecast['prcp_1d'] + 0.3*forecast['prcp_3d'] + 0.1*forecast['prcp_7d']

# -----------------------------
# 5. Save ready-to-predict dataset
# -----------------------------
forecast.to_csv(r"D:\Backup folder\Code\GitHub\goesafe101\AI\data\forecast_ready_2025_2026.csv",
                index=False)

print("Forecast dataset for 2025–2026 prepared with Stage 1 + Stage 2 features.")

Forecast dataset for 2025–2026 prepared with Stage 1 + Stage 2 features.


2 staged model architecture

In [41]:
hist.head(5)

Unnamed: 0.1,time,area,Unnamed: 0,tavg,tmin,tmax,prcp,wspd,pres,prcp_1d,prcp_3d,prcp_7d,prcp_30d,prcp_ratio,AWI,pres_24h_drop,nino34_sst_anomaly
0,2018-01-04,Angelogothere,0,23.8,20.1,,0.0,,,,0.0,0.0,0.0,,,,
1,2018-01-18,Angelogothere,1,24.4,19.6,,8.9,,,0.0,4.45,4.45,4.45,0.0,1.78,,
2,2018-01-31,Angelogothere,2,26.2,21.6,,13.7,,,8.9,7.533333,7.533333,7.533333,1.181416,8.353333,,
3,2018-02-01,Angelogothere,3,26.6,20.3,,0.0,,,13.7,7.533333,5.65,5.65,2.424779,11.045,,-0.73
4,2018-02-05,Angelogothere,4,25.6,20.8,,0.0,,,0.0,4.566667,4.52,4.52,0.0,1.822,,


Stage 1

In [42]:
X1 = hist[stage1_features]