# Project Title:
Flood Prediction & Early Warning System

# Problem Statement:
Floods cause severe loss of life, property, and infrastructure every year. Traditional systems are mostly reactive, giving little time for preparation. There is a need for a predictive, data-driven early warning system that can forecast flood risks in advance using rainfall, weather, and historical data.

# Description:
This project develops a Flood Prediction & Early Warning System using machine learning. Multiple datasets—flood features, rainfall patterns, weather data, and historical flood records—are cleaned, analyzed, and combined to predict flood risks.
Key steps include:
Data cleaning & exploratory analysis
Feature engineering (rainfall indices, risk scores)
ML modeling for flood probability
Generating risk alerts (Low, Medium, High)

### Loading datasets, Displaying first 5 rows of datasets

In [52]:
import os, sys
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define project root: one level up from notebooks/
ROOT = Path(os.getcwd()).resolve().parents[0]

# Add src/ to sys.path so imports work
src_path = ROOT / "src"
if str(src_path) not in sys.path:
    sys.path.append(str(src_path))

# Import from paths.py
from paths import DATA_RAW, DATA_PROCESSED, MODELS_DIR, REPORTS_DIR

print("Project root:", ROOT)
print("Raw data:", DATA_RAW)
print("Processed data:", DATA_PROCESSED)




In [53]:
# List raw CSVs
csvs = sorted([p for p in DATA_RAW.glob("*.csv")])
for p in csvs:
    print("-", p.name)

- flood_features.csv
- flood_risk_dataset_india.csv
- rainfall_india_1901_2015.csv
- rainfall_normals_district.csv
- weatherHistory_A.csv
- weatherHistory_B.csv


In [54]:
from pathlib import Path
import pandas as pd

# Step 1: Define project root
ROOT = Path(r"C:\Users\HP\Project_Flood\Flood Prediction & Early Warning System")

# Step 2: Define raw data folder & get CSVs
DATA_RAW = ROOT / "data" / "raw"
csvs = list(DATA_RAW.glob("*.csv"))
print("Found CSVs:", [p.name for p in csvs])

# Step 3: Preview each CSV safely
summaries = []
for p in csvs:
    try:
        try:
            df = pd.read_csv(p)  # normal read
        except UnicodeDecodeError:
            df = pd.read_csv(p, encoding="latin1")  # fallback if encoding error
    except Exception as e:
        print(f"Failed to read {p.name}: {e}")
        continue

    # Show metadata
    display(pd.DataFrame({
        "file": [p.name],
        "rows": [len(df)],
        "cols": [df.shape[1]],
        "columns": [list(df.columns)[:12]]
    }))

    # Save summary for later
    summaries.append({
        "file": p.name,
        "rows": int(len(df)),
        "cols": int(df.shape[1]),
        "columns_head": list(df.columns)[:12],
        "na_pct_mean": float(df.isna().mean().mean())
    })

    # Show first 5 rows of dataset
    display(df.head(5))

# Final summary of all datasets
summary_df = pd.DataFrame(summaries)
summary_df


Found CSVs: ['flood_features.csv', 'flood_risk_dataset_india.csv', 'rainfall_india_1901_2015.csv', 'rainfall_normals_district.csv', 'weatherHistory_A.csv', 'weatherHistory_B.csv']


Unnamed: 0,file,rows,cols,columns
0,flood_features.csv,50000,21,"[MonsoonIntensity, TopographyDrainage, RiverMa..."


Unnamed: 0,MonsoonIntensity,TopographyDrainage,RiverManagement,Deforestation,Urbanization,ClimateChange,DamsQuality,Siltation,AgriculturalPractices,Encroachments,...,DrainageSystems,CoastalVulnerability,Landslides,Watersheds,DeterioratingInfrastructure,PopulationScore,WetlandLoss,InadequatePlanning,PoliticalFactors,FloodProbability
0,3,8,6,6,4,4,6,2,3,2,...,10,7,4,2,3,4,3,2,6,0.45
1,8,4,5,7,7,9,1,5,5,4,...,9,2,6,2,1,1,9,1,3,0.475
2,3,10,4,1,7,5,4,7,4,9,...,7,4,4,8,6,1,8,3,6,0.515
3,4,4,2,7,3,4,1,4,6,4,...,4,2,6,6,8,8,6,6,10,0.52
4,3,7,5,2,5,8,5,2,7,5,...,7,6,5,3,3,4,4,3,4,0.475


Unnamed: 0,file,rows,cols,columns
0,flood_risk_dataset_india.csv,10000,14,"[Latitude, Longitude, Rainfall (mm), Temperatu..."


Unnamed: 0,Latitude,Longitude,Rainfall (mm),Temperature (°C),Humidity (%),River Discharge (m³/s),Water Level (m),Elevation (m),Land Cover,Soil Type,Population Density,Infrastructure,Historical Floods,Flood Occurred
0,18.861663,78.835584,218.999493,34.144337,43.912963,4236.182888,7.415552,377.465433,Water Body,Clay,7276.742184,1,0,1
1,35.570715,77.654451,55.353599,28.778774,27.585422,2472.585219,8.811019,7330.608875,Forest,Peat,6897.736956,0,1,0
2,29.227824,73.108463,103.991908,43.934956,30.108738,977.328053,4.631799,2205.873488,Agricultural,Loam,4361.518494,1,1,1
3,25.361096,85.610733,198.984191,21.569354,34.45369,3683.208933,2.891787,2512.2778,Desert,Sandy,6163.069701,1,1,0
4,12.524541,81.822101,144.626803,32.635692,36.292267,2093.390678,3.188466,2001.818223,Agricultural,Loam,6167.964591,1,0,0


Unnamed: 0,file,rows,cols,columns
0,rainfall_india_1901_2015.csv,4116,19,"[SUBDIVISION, YEAR, JAN, FEB, MAR, APR, MAY, J..."


Unnamed: 0,SUBDIVISION,YEAR,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,ANDAMAN & NICOBAR ISLANDS,1901,49.2,87.1,29.2,2.3,528.8,517.5,365.1,481.1,332.6,388.5,558.2,33.6,3373.2,136.3,560.3,1696.3,980.3
1,ANDAMAN & NICOBAR ISLANDS,1902,0.0,159.8,12.2,0.0,446.1,537.1,228.9,753.7,666.2,197.2,359.0,160.5,3520.7,159.8,458.3,2185.9,716.7
2,ANDAMAN & NICOBAR ISLANDS,1903,12.7,144.0,0.0,1.0,235.1,479.9,728.4,326.7,339.0,181.2,284.4,225.0,2957.4,156.7,236.1,1874.0,690.6
3,ANDAMAN & NICOBAR ISLANDS,1904,9.4,14.7,0.0,202.4,304.5,495.1,502.0,160.1,820.4,222.2,308.7,40.1,3079.6,24.1,506.9,1977.6,571.0
4,ANDAMAN & NICOBAR ISLANDS,1905,1.3,0.0,3.3,26.9,279.5,628.7,368.7,330.5,297.0,260.7,25.4,344.7,2566.7,1.3,309.7,1624.9,630.8


Unnamed: 0,file,rows,cols,columns
0,rainfall_normals_district.csv,641,19,"[STATE_UT_NAME, DISTRICT, JAN, FEB, MAR, APR, ..."


Unnamed: 0,STATE_UT_NAME,DISTRICT,JAN,FEB,MAR,APR,MAY,JUN,JUL,AUG,SEP,OCT,NOV,DEC,ANNUAL,Jan-Feb,Mar-May,Jun-Sep,Oct-Dec
0,ANDAMAN And NICOBAR ISLANDS,NICOBAR,107.3,57.9,65.2,117.0,358.5,295.5,285.0,271.9,354.8,326.0,315.2,250.9,2805.2,165.2,540.7,1207.2,892.1
1,ANDAMAN And NICOBAR ISLANDS,SOUTH ANDAMAN,43.7,26.0,18.6,90.5,374.4,457.2,421.3,423.1,455.6,301.2,275.8,128.3,3015.7,69.7,483.5,1757.2,705.3
2,ANDAMAN And NICOBAR ISLANDS,N & M ANDAMAN,32.7,15.9,8.6,53.4,343.6,503.3,465.4,460.9,454.8,276.1,198.6,100.0,2913.3,48.6,405.6,1884.4,574.7
3,ARUNACHAL PRADESH,LOHIT,42.2,80.8,176.4,358.5,306.4,447.0,660.1,427.8,313.6,167.1,34.1,29.8,3043.8,123.0,841.3,1848.5,231.0
4,ARUNACHAL PRADESH,EAST SIANG,33.3,79.5,105.9,216.5,323.0,738.3,990.9,711.2,568.0,206.9,29.5,31.7,4034.7,112.8,645.4,3008.4,268.1


Unnamed: 0,file,rows,cols,columns
0,weatherHistory_A.csv,96453,12,"[Formatted Date, Summary, Precip Type, Tempera..."


Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


Unnamed: 0,file,rows,cols,columns
0,weatherHistory_B.csv,96453,12,"[Formatted Date, Summary, Precip Type, Tempera..."


Unnamed: 0,Formatted Date,Summary,Precip Type,Temperature (C),Apparent Temperature (C),Humidity,Wind Speed (km/h),Wind Bearing (degrees),Visibility (km),Loud Cover,Pressure (millibars),Daily Summary
0,2006-04-01 00:00:00.000 +0200,Partly Cloudy,rain,9.472222,7.388889,0.89,14.1197,251.0,15.8263,0.0,1015.13,Partly cloudy throughout the day.
1,2006-04-01 01:00:00.000 +0200,Partly Cloudy,rain,9.355556,7.227778,0.86,14.2646,259.0,15.8263,0.0,1015.63,Partly cloudy throughout the day.
2,2006-04-01 02:00:00.000 +0200,Mostly Cloudy,rain,9.377778,9.377778,0.89,3.9284,204.0,14.9569,0.0,1015.94,Partly cloudy throughout the day.
3,2006-04-01 03:00:00.000 +0200,Partly Cloudy,rain,8.288889,5.944444,0.83,14.1036,269.0,15.8263,0.0,1016.41,Partly cloudy throughout the day.
4,2006-04-01 04:00:00.000 +0200,Mostly Cloudy,rain,8.755556,6.977778,0.83,11.0446,259.0,15.8263,0.0,1016.51,Partly cloudy throughout the day.


Unnamed: 0,file,rows,cols,columns_head,na_pct_mean
0,flood_features.csv,50000,21,"[MonsoonIntensity, TopographyDrainage, RiverMa...",0.0
1,flood_risk_dataset_india.csv,10000,14,"[Latitude, Longitude, Rainfall (mm), Temperatu...",0.0
2,rainfall_india_1901_2015.csv,4116,19,"[SUBDIVISION, YEAR, JAN, FEB, MAR, APR, MAY, J...",0.001713
3,rainfall_normals_district.csv,641,19,"[STATE_UT_NAME, DISTRICT, JAN, FEB, MAR, APR, ...",0.0
4,weatherHistory_A.csv,96453,12,"[Formatted Date, Summary, Precip Type, Tempera...",0.000447
5,weatherHistory_B.csv,96453,12,"[Formatted Date, Summary, Precip Type, Tempera...",0.000447


In [15]:
import sys
!{sys.executable} -m pip install -U pandas pyarrow

Defaulting to user installation because normal site-packages is not writeable


In [6]:
import pandas as pd
print("Pandas version:", pd.__version__)


Pandas version: 2.3.2
