<a href="https://colab.research.google.com/github/Kabindra02/Flood_Severity_index/blob/main/Untitled11.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd

In [7]:
import numpy as np

In [9]:
try:
    df = pd.read_csv('/content/new_flood.csv', parse_dates=['Date'])
except FileNotFoundError:
    print("Error: '/new_flood.csv' not found. Please ensure the file is uploaded to Colab.")
    # Create an empty DataFrame to prevent errors in subsequent code
    df = pd.DataFrame()

In [10]:
df.head()

Unnamed: 0,Date,Daily flow Rate,Gauge height flow,24h accumulated,sm_top_0_10cm,sm_10_40cm
0,2009-01-01,470.0,3.36,0.0,20.222,63.26336
1,2009-01-02,460.0,3.34,0.0,18.118,54.073805
2,2009-01-03,456.0,3.33,0.0,22.784667,65.749364
3,2009-01-04,448.0,3.31,0.0,22.027667,68.656034
4,2009-01-05,445.0,3.3,0.0,18.637667,58.216001


In [11]:
df.shape

(5478, 6)

In [12]:
if not df.empty:
    # Set the Date column as the index for time-series operations
    df.set_index('Date', inplace=True)

In [13]:
# Soil moisture change rate
df['sm_change_rate'] = df['sm_top_0_10cm'].diff().fillna(0)

# Weighted saturation index
df['saturation_index'] = (
    0.6 * df['sm_top_0_10cm'] +
    0.4 * df['sm_10_40cm']
)


In [14]:
# Flow change rate (relative)
df['flow_change_rate'] = (
    df['Daily flow Rate'].diff() /
    (df['Daily flow Rate'].shift(1) + 1e-6)
)

# Flow lags
df['flow_lag_1d'] = df['Daily flow Rate'].shift(1)
df['flow_lag_2d'] = df['Daily flow Rate'].shift(2)



In [19]:
df.shape

(5476, 10)

In [16]:
df = df.dropna()

In [17]:
df.head()

Unnamed: 0_level_0,Daily flow Rate,Gauge height flow,24h accumulated,sm_top_0_10cm,sm_10_40cm,sm_change_rate,saturation_index,flow_change_rate,flow_lag_1d,flow_lag_2d
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2009-01-03,456.0,3.33,0.0,22.784667,65.749364,4.666667,39.970546,-0.008696,460.0,470.0
2009-01-04,448.0,3.31,0.0,22.027667,68.656034,-0.757,40.679014,-0.017544,456.0,460.0
2009-01-05,445.0,3.3,0.0,18.637667,58.216001,-3.39,34.469,-0.006696,448.0,456.0
2009-01-06,449.0,3.31,0.0,17.945667,56.251036,-0.692,33.267815,0.008989,445.0,448.0
2009-01-07,453.0,3.32,0.0,18.687,58.913774,0.741334,34.77771,0.008909,449.0,445.0


In [18]:
df.shape

(5476, 10)

In [20]:
df['rain_sum_3d'] = df['24h accumulated'].rolling(3).sum()
df['rain_sum_5d'] = df['24h accumulated'].rolling(5).sum()
df['rain_sum_7d'] = df['24h accumulated'].rolling(7).sum()

df[['rain_sum_3d','rain_sum_5d','rain_sum_7d']] = \
    df[['rain_sum_3d','rain_sum_5d','rain_sum_7d']].fillna(0)


In [21]:
    # Antecedent Precipitation Index (API)
    # API(t) = API(t-1) * k + P(t-1)
    # Where 'k' is the decay constant (often between 0.8 and 0.95 for slow-decaying basins).
    # We'll use a common value of k = 0.9
    k = 0.9

    # Calculate API using an exponential decay function
    rainfall_yesterday = df['24h accumulated'].shift(1).fillna(0) # P(t-1)

    # Initialize API with 0 and use a cumulative product and sum to apply the decay logic
    # This calculation requires a custom function due to its recursive nature.
    def calculate_api(series, k):
        api_values = series.copy()
        for i in range(1, len(series)):
            # API(t) = API(t-1) * k + P(t-1)
            api_values.iloc[i] = api_values.iloc[i-1] * k + series.iloc[i-1]
        return api_values

    # Apply the API calculation
    df['API'] = calculate_api(df['24h accumulated'].fillna(0), k)


In [22]:
df.head()

Unnamed: 0_level_0,Daily flow Rate,Gauge height flow,24h accumulated,sm_top_0_10cm,sm_10_40cm,sm_change_rate,saturation_index,flow_change_rate,flow_lag_1d,flow_lag_2d,rain_sum_3d,rain_sum_5d,rain_sum_7d,API
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2009-01-03,456.0,3.33,0.0,22.784667,65.749364,4.666667,39.970546,-0.008696,460.0,470.0,0.0,0.0,0.0,0.0
2009-01-04,448.0,3.31,0.0,22.027667,68.656034,-0.757,40.679014,-0.017544,456.0,460.0,0.0,0.0,0.0,0.0
2009-01-05,445.0,3.3,0.0,18.637667,58.216001,-3.39,34.469,-0.006696,448.0,456.0,0.0,0.0,0.0,0.0
2009-01-06,449.0,3.31,0.0,17.945667,56.251036,-0.692,33.267815,0.008989,445.0,448.0,0.0,0.0,0.0,0.0
2009-01-07,453.0,3.32,0.0,18.687,58.913774,0.741334,34.77771,0.008909,449.0,445.0,0.0,0.0,0.0,0.0


In [23]:


df_features = df.copy()


In [24]:
df.head()

Unnamed: 0_level_0,Daily flow Rate,Gauge height flow,24h accumulated,sm_top_0_10cm,sm_10_40cm,sm_change_rate,saturation_index,flow_change_rate,flow_lag_1d,flow_lag_2d,rain_sum_3d,rain_sum_5d,rain_sum_7d,API
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
2009-01-03,456.0,3.33,0.0,22.784667,65.749364,4.666667,39.970546,-0.008696,460.0,470.0,0.0,0.0,0.0,0.0
2009-01-04,448.0,3.31,0.0,22.027667,68.656034,-0.757,40.679014,-0.017544,456.0,460.0,0.0,0.0,0.0,0.0
2009-01-05,445.0,3.3,0.0,18.637667,58.216001,-3.39,34.469,-0.006696,448.0,456.0,0.0,0.0,0.0,0.0
2009-01-06,449.0,3.31,0.0,17.945667,56.251036,-0.692,33.267815,0.008989,445.0,448.0,0.0,0.0,0.0,0.0
2009-01-07,453.0,3.32,0.0,18.687,58.913774,0.741334,34.77771,0.008909,449.0,445.0,0.0,0.0,0.0,0.0


In [25]:
df_features.to_csv('flood_features.csv', index=False)


In [26]:
from google.colab import files
files.download('flood_features.csv')


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>