In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

In [3]:
df=pd.read_csv('flood.csv')
df

Unnamed: 0,date,rainfall_mm,soil_moisture_percent,humidity_percent,river_level_m,temperature_c,flood_risk
0,2026-05-28,61.51,21.94,88.36,0.78,24.77,1
1,2021-10-26,62.53,35.56,89.93,7.41,24.67,1
2,2025-08-26,18.49,44.70,63.43,9.04,20.08,0
3,2021-03-22,33.11,15.28,74.05,7.69,28.93,0
4,2021-04-30,14.58,27.62,70.35,3.36,31.83,0
...,...,...,...,...,...,...,...
4737,2023-10-12,86.84,46.71,68.73,4.65,36.10,1
4738,2021-05-01,55.82,30.77,63.23,6.76,25.82,0
4739,2020-04-06,81.33,33.56,88.42,3.17,30.46,1
4740,2023-10-26,136.63,36.44,69.02,10.12,21.46,1


In [4]:
# fill the null values
df.isnull().sum()

date                     0
rainfall_mm              0
soil_moisture_percent    0
humidity_percent         0
river_level_m            0
temperature_c            0
flood_risk               0
dtype: int64

In [14]:
from sklearn.model_selection import train_test_split

In [9]:
X=df[['rainfall_mm','humidity_percent',"river_level_m","temperature_c"]]
y=df['flood_risk']

In [10]:
y.shape

(4742,)

In [11]:
X

Unnamed: 0,rainfall_mm,humidity_percent,river_level_m,temperature_c
0,61.51,88.36,0.78,24.77
1,62.53,89.93,7.41,24.67
2,18.49,63.43,9.04,20.08
3,33.11,74.05,7.69,28.93
4,14.58,70.35,3.36,31.83
...,...,...,...,...
4737,86.84,68.73,4.65,36.10
4738,55.82,63.23,6.76,25.82
4739,81.33,88.42,3.17,30.46
4740,136.63,69.02,10.12,21.46


In [12]:
y

0       1
1       1
2       0
3       0
4       0
       ..
4737    1
4738    0
4739    1
4740    1
4741    0
Name: flood_risk, Length: 4742, dtype: int64

In [15]:
x_train,x_test,y_train,y_test=train_test_split(X,y,random_state=24)

In [16]:
x_train.shape

(3556, 4)

In [17]:
x_test.shape

(1186, 4)

In [18]:
y_train.shape

(3556,)

In [19]:
y_test.shape

(1186,)

In [21]:
from sklearn.preprocessing import StandardScaler

In [23]:
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.transform(x_test)

In [41]:
joblib.dump(sc, "scaler.pkl")

['scaler.pkl']

In [26]:
# basic model

from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score

xgb = XGBClassifier(random_state=42)
xgb.fit(x_train, y_train)

y_pred = xgb.predict(x_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 0.9949409780775716
              precision    recall  f1-score   support

           0       1.00      0.99      0.99       591
           1       0.99      1.00      0.99       595

    accuracy                           0.99      1186
   macro avg       1.00      0.99      0.99      1186
weighted avg       0.99      0.99      0.99      1186



In [27]:
from sklearn.model_selection import GridSearchCV

param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
    'colsample_bytree': [0.8, 1.0],
    'gamma': [0, 0.1, 0.2]
}

xgb_clf = XGBClassifier(random_state=42)

grid_search = GridSearchCV(
    estimator=xgb_clf,
    param_grid=param_grid,
    cv=3,
    scoring='accuracy',
    verbose=2,
    n_jobs=-1
)

grid_search.fit(x_train, y_train)

print("✅ Best Parameters:", grid_search.best_params_)
print("🏆 Best Accuracy:", grid_search.best_score_)


Fitting 3 folds for each of 324 candidates, totalling 972 fits
✅ Best Parameters: {'colsample_bytree': 0.8, 'gamma': 0.1, 'learning_rate': 0.1, 'max_depth': 7, 'n_estimators': 200, 'subsample': 0.8}
🏆 Best Accuracy: 0.9907201457226004


In [29]:
best_params = grid_search.best_params_

xgb_best = XGBClassifier(**best_params, random_state=42)
xgb_best.fit(x_train, y_train)

y_pred_best = xgb_best.predict(x_test)
print("Final Accuracy:", accuracy_score(y_test, y_pred_best))
print(classification_report(y_test, y_pred_best))


Final Accuracy: 0.9957841483979764
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       591
           1       0.99      1.00      1.00       595

    accuracy                           1.00      1186
   macro avg       1.00      1.00      1.00      1186
weighted avg       1.00      1.00      1.00      1186



In [None]:
import joblib

# Save the model
joblib.dump(xgb_best, "xgb_flood_model.pkl")

# Later, load the model
loaded_model = joblib.load("xgb_flood_model.pkl")

# Make predictions
y_pred = loaded_model.predict(X_test_scaled)


In [30]:
import joblib

# Save the model
joblib.dump(xgb_best, "xgb_flood_model.pkl")

['xgb_flood_model.pkl']

In [31]:
loaded_model = joblib.load("xgb_flood_model.pkl")

In [38]:
pre=[[61.0,88.4,7.6,24]]
pre=sc.transform(pre)



In [39]:
pre

array([[ 0.5363092 ,  0.75928802,  0.87140975, -0.65107785]])

In [40]:
print(loaded_model.predict(pre)[0])

1
