# Cleaning Flood Water Extent for Kampala and Addis Ababa

This notebook cleans the file of monthly flood water extent by;
- Removing unnecessary columns.
- Sorting the rows so each row has coordinates for each city of the same year.
- Re-arranging the variables in the order

In [23]:
import pandas as pd

df = pd.read_csv("Monthly_Flood_Water_Extent_Kampala_Addis_2015_2025_km2.csv")

In [24]:
df.head()

Unnamed: 0,system:index,city,flood_water_extent (km^2),month,source,.geo
0,0_0,Kampala,195.380092,2015-01,Sentinel-1,"{""type"":""MultiPoint"",""coordinates"":[]}"
1,0_1,Kampala,207.921894,2015-02,Sentinel-1,"{""type"":""MultiPoint"",""coordinates"":[]}"
2,0_2,Kampala,140.035708,2015-03,Sentinel-1,"{""type"":""MultiPoint"",""coordinates"":[]}"
3,0_3,Kampala,112.024489,2015-04,Sentinel-1,"{""type"":""MultiPoint"",""coordinates"":[]}"
4,0_4,Kampala,114.628558,2015-05,Sentinel-1,"{""type"":""MultiPoint"",""coordinates"":[]}"


In [25]:
df.columns

Index(['system:index', 'city', 'flood_water_extent (km^2)', 'month', 'source',
       '.geo'],
      dtype='object')

In [26]:
# Extracting the year from the 'month' column
df["year"] = df["month"].str.extract(r"(\d{4})").astype(int)

# Removing unnecessary columns
df = df.drop(columns=["system:index", "month", "source", ".geo"])

# Grouping by year and city to sum the flood water extent
yearly_flood_water_extent = (
    df.groupby(["year", "city"])["flood_water_extent (km^2)"].sum().reset_index()
)

# Sorting the variables so that the year is first and
# city is second then flood water extent
yearly_flood_water_extent = yearly_flood_water_extent.sort_values(
    by=["year", "city"]
).reset_index(drop=True)

# show the first few rows
yearly_flood_water_extent.head()

Unnamed: 0,year,city,flood_water_extent (km^2)
0,2015,Addis Ababa,1161.517373
1,2015,Kampala,1789.580176
2,2016,Addis Ababa,1532.617001
3,2016,Kampala,2217.205481
4,2017,Addis Ababa,1377.904746


In [27]:
# Keep only rows of years 2015 to 2020
yearly_flood_water_extent = yearly_flood_water_extent[
    (yearly_flood_water_extent["year"] >= 2015)
    & (yearly_flood_water_extent["year"] <= 2020)
]

yearly_flood_water_extent.head()

Unnamed: 0,year,city,flood_water_extent (km^2)
0,2015,Addis Ababa,1161.517373
1,2015,Kampala,1789.580176
2,2016,Addis Ababa,1532.617001
3,2016,Kampala,2217.205481
4,2017,Addis Ababa,1377.904746


In [28]:
# saved to csv
yearly_flood_water_extent.to_csv(
    "yearly_flood_water_extent_kampala_addis_2015_2020.csv", index=False
)