# Importing Useful Python libraries.

In [146]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore", category=UserWarning)

# Reading the CSV file (Dataset)

In [147]:
df = pd.read_csv("flood_data_with_missing.csv")
df

Unnamed: 0,Rainfall,River_Level,Soil_Moisture,Flood
0,152.0,5.28,46.0,0
1,229.0,7.80,45.0,0
2,142.0,7.28,66.0,0
3,64.0,14.92,75.0,0
4,156.0,3.46,82.0,0
...,...,...,...,...
395,148.0,8.48,84.0,0
396,202.0,5.98,70.0,0
397,142.0,13.52,72.0,0
398,195.0,2.80,55.0,1


# EDA of DATASET

In [148]:
print("\n--- Dataset Info ---")
print(df.info())


--- Dataset Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Rainfall       385 non-null    float64
 1   River_Level    385 non-null    float64
 2   Soil_Moisture  385 non-null    float64
 3   Flood          400 non-null    int64  
dtypes: float64(3), int64(1)
memory usage: 12.6 KB
None


In [149]:
print("\n--- Dataset Description ---")
print(df.describe())


--- Dataset Description ---
         Rainfall  River_Level  Soil_Moisture      Flood
count  385.000000   385.000000     385.000000  400.00000
mean   179.254545     8.011169      53.924675    0.23000
std     71.466722     3.994086      20.240019    0.42136
min     50.000000     1.070000      20.000000    0.00000
25%    120.000000     4.640000      37.000000    0.00000
50%    179.000000     7.940000      54.000000    0.00000
75%    240.000000    11.320000      71.000000    0.00000
max    299.000000    14.940000      89.000000    1.00000


In [150]:
print("\n--- Missing Values ---")
print(df.isnull().sum())


--- Missing Values ---
Rainfall         15
River_Level      15
Soil_Moisture    15
Flood             0
dtype: int64


In [151]:
data = df.dropna()
data

Unnamed: 0,Rainfall,River_Level,Soil_Moisture,Flood
0,152.0,5.28,46.0,0
1,229.0,7.80,45.0,0
2,142.0,7.28,66.0,0
3,64.0,14.92,75.0,0
4,156.0,3.46,82.0,0
...,...,...,...,...
395,148.0,8.48,84.0,0
396,202.0,5.98,70.0,0
397,142.0,13.52,72.0,0
398,195.0,2.80,55.0,1


# Feature Selection and spiliting the data into Train and Test

In [152]:

X = df[["Rainfall", "River_Level", "Soil_Moisture"]]
y = df["Flood"]

In [153]:

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.6, random_state=66)

# Looking for the test and train data shape

In [154]:
X_train.shape


(160, 3)

In [155]:
X_test.shape

(240, 3)

In [156]:
y_train.shape

(160,)

In [157]:
y_test.shape

(240,)

In [158]:
X_train


Unnamed: 0,Rainfall,River_Level,Soil_Moisture
75,160.0,10.41,70.0
355,56.0,9.51,63.0
83,130.0,5.25,63.0
72,179.0,1.39,85.0
282,162.0,,51.0
...,...,...,...
122,237.0,2.68,79.0
51,104.0,10.40,75.0
119,73.0,1.67,68.0
316,289.0,1.52,73.0


In [159]:
X_test

Unnamed: 0,Rainfall,River_Level,Soil_Moisture
189,173.0,11.36,34.0
292,196.0,4.92,20.0
140,292.0,14.07,45.0
97,267.0,11.00,30.0
207,85.0,9.43,57.0
...,...,...,...
92,53.0,6.76,20.0
208,62.0,8.75,25.0
181,191.0,11.24,29.0
127,64.0,14.71,20.0


In [160]:
y_train

75     0
355    0
83     0
72     0
282    0
      ..
122    1
51     0
119    0
316    0
20     1
Name: Flood, Length: 160, dtype: int64

In [161]:
y_test

189    0
292    0
140    1
97     1
207    0
      ..
92     0
208    0
181    0
127    0
171    0
Name: Flood, Length: 240, dtype: int64

## Training the model

In [162]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)

## Checking the Accuracy of the Trained Model

In [163]:
y_pred = model.predict(X_test)
print("Model Accuracy:", accuracy_score(y_test, y_pred))

Model Accuracy: 0.7291666666666666


## Taking User Input 

In [164]:
rainfall = float(input("Enter rainfall (mm): "))
river_level = float(input("Enter river level (m): "))
soil_moisture = float(input("Enter soil moisture (%): "))

## Predicting Outcome 

In [165]:
user_data = [[rainfall, river_level, soil_moisture]]
prediction = model.predict(user_data)

if prediction[0] == 1:
    print("⚠️ Flood predicted in this area!")
else:
    print("✅ No flood predicted.")

⚠️ Flood predicted in this area!
