In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from sklearn.preprocessing import StandardScaler 
from sklearn.preprocessing import LabelEncoder

In [2]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

In [3]:
train.head()

Unnamed: 0,id,MonsoonIntensity,TopographyDrainage,RiverManagement,Deforestation,Urbanization,ClimateChange,DamsQuality,Siltation,AgriculturalPractices,...,DrainageSystems,CoastalVulnerability,Landslides,Watersheds,DeterioratingInfrastructure,PopulationScore,WetlandLoss,InadequatePlanning,PoliticalFactors,FloodProbability
0,0,5,8,5,8,6,4,4,3,3,...,5,3,3,5,4,7,5,7,3,0.445
1,1,6,7,4,4,8,8,3,5,4,...,7,2,0,3,5,3,3,4,3,0.45
2,2,6,5,6,7,3,7,1,5,4,...,7,3,7,5,6,8,2,3,3,0.53
3,3,3,4,6,5,4,8,4,7,6,...,2,4,7,4,4,6,5,7,5,0.535
4,4,5,3,2,6,4,4,3,3,3,...,2,2,6,6,4,1,2,3,5,0.415


In [4]:
test.head()

Unnamed: 0,id,MonsoonIntensity,TopographyDrainage,RiverManagement,Deforestation,Urbanization,ClimateChange,DamsQuality,Siltation,AgriculturalPractices,...,IneffectiveDisasterPreparedness,DrainageSystems,CoastalVulnerability,Landslides,Watersheds,DeterioratingInfrastructure,PopulationScore,WetlandLoss,InadequatePlanning,PoliticalFactors
0,1117957,4,6,3,5,6,7,8,7,8,...,8,5,7,5,6,3,6,4,4,5
1,1117958,4,4,2,9,5,5,4,7,5,...,2,4,7,4,5,1,7,4,4,3
2,1117959,1,3,6,5,7,2,4,6,4,...,7,9,2,5,5,2,3,6,8,3
3,1117960,2,4,4,6,4,5,4,3,4,...,7,8,4,6,7,6,4,2,4,4
4,1117961,6,3,2,4,6,4,5,5,3,...,4,3,2,6,4,6,8,4,5,5


# Explanation of Columns
1. id: Unique identifier for each record (region, scenario, etc.).

2. MonsoonIntensity: Represents the severity or intensity of the monsoon season, rated on a scale (e.g., 1–10). Higher values indicate stronger monsoons, which may increase flood risk.

3. TopographyDrainage: A measure of the land's slope and how well water is drained. Steeper or poorly drained regions may face higher flood risks.

4. RiverManagement: Refers to actions taken to manage rivers (e.g., construction of levees, embankments). Higher scores may reflect better management, which can reduce flood risk.

5. Deforestation: Rating of the level of deforestation in the region. Higher values indicate greater deforestation, which increases runoff and flood risk due to the loss of natural water absorption.

6. Urbanization: Measures the degree of urban development, which typically increases impermeable surfaces (like concrete) and leads to higher flood risk.

7. ClimateChange: Captures the effects of climate change, such as rising temperatures and changing weather patterns, which may increase the likelihood of extreme weather events like floods.

8. DamsQuality: Represents the structural integrity and quality of dams. Low-quality dams may fail during floods, exacerbating the situation.

Siltation: Measures the accumulation of silt in rivers or reservoirs, which reduces their capacity to hold water, increasing flood risk.

9. AgriculturalPractices: Refers to farming methods that may either mitigate or worsen flood risk. Poor practices such as over-irrigation or excessive land clearance could increase soil erosion and runoff.

10. Encroachments: Refers to unauthorized or illegal development (buildings, settlements) in flood-prone areas, increasing the vulnerability to floods.

11. IneffectiveDisasterPreparedness: Reflects the region's capacity for disaster preparedness, including early warning systems and emergency response plans. A high score indicates inadequate measures.

12. DrainageSystems: Evaluates the effectiveness of urban and rural drainage systems in preventing water accumulation. Poor drainage systems can lead to flooding even during moderate rainfall.

13. CoastalVulnerability: Rates the region's vulnerability to coastal flooding due to factors like rising sea levels and storm surges. Coastal regions may have higher flood risks depending on their preparedness.

14. Landslides: Measures the risk of landslides in the area, which can be triggered by heavy rainfall and further complicate flood management.

15. Watersheds: Indicates the health and management of the watersheds, which are crucial for controlling water flow. Poor watershed management can increase flood risks.

16. DeterioratingInfrastructure: Rates the condition of the region's infrastructure (roads, bridges, etc.). Poor infrastructure may not withstand heavy rains and flooding, exacerbating risks.

17. PopulationScore: An index reflecting the population density. High population density can lead to higher impacts of flooding on human life and property.

18. WetlandLoss: Measures the degree of wetland destruction or degradation. Wetlands act as natural flood buffers, so their loss increases flood vulnerability.

19. InadequatePlanning: Reflects the quality of urban and regional planning. Poor planning (e.g., building in floodplains) can significantly increase flood risk.

20. PoliticalFactors: Refers to the influence of political decisions on flood prevention and management, such as government policies, corruption, and resource allocation. Poor political governance can lead to inefficient flood management strategies.

21. FloodProbability: The target variable, representing the probability or likelihood of a flood occurring, based on the values of all the other factors. This is likely a predicted value based on a model.

In [12]:
train.isnull().sum()

id                                 0
MonsoonIntensity                   0
TopographyDrainage                 0
RiverManagement                    0
Deforestation                      0
Urbanization                       0
ClimateChange                      0
DamsQuality                        0
Siltation                          0
AgriculturalPractices              0
Encroachments                      0
IneffectiveDisasterPreparedness    0
DrainageSystems                    0
CoastalVulnerability               0
Landslides                         0
Watersheds                         0
DeterioratingInfrastructure        0
PopulationScore                    0
WetlandLoss                        0
InadequatePlanning                 0
PoliticalFactors                   0
FloodProbability                   0
dtype: int64

In [13]:
test.isnull().sum()

id                                 0
MonsoonIntensity                   0
TopographyDrainage                 0
RiverManagement                    0
Deforestation                      0
Urbanization                       0
ClimateChange                      0
DamsQuality                        0
Siltation                          0
AgriculturalPractices              0
Encroachments                      0
IneffectiveDisasterPreparedness    0
DrainageSystems                    0
CoastalVulnerability               0
Landslides                         0
Watersheds                         0
DeterioratingInfrastructure        0
PopulationScore                    0
WetlandLoss                        0
InadequatePlanning                 0
PoliticalFactors                   0
dtype: int64