# Cleaning Urban Extent Data for Kampala and Addis Ababa

This notebook is for cleaning the urban extent data for Kampala and Addis Ababa.

In [1]:
import pandas as pd

df = pd.read_csv("UrbanExtent_Kampala_Addis_2001_2025.csv")

In [2]:
df.head()

Unnamed: 0,system:index,city,source,urban_area_sqkm,year,.geo
0,1_1_1_2001_01_01,Kampala,MODIS,186.064651,2001.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
1,1_1_1_2002_01_01,Kampala,MODIS,189.051243,2002.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
2,1_1_1_2003_01_01,Kampala,MODIS,197.264369,2003.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
3,1_1_1_2004_01_01,Kampala,MODIS,204.979731,2004.0,"{""type"":""MultiPoint"",""coordinates"":[]}"
4,1_1_1_2005_01_01,Kampala,MODIS,212.943976,2005.0,"{""type"":""MultiPoint"",""coordinates"":[]}"


In [3]:
df.columns

Index(['system:index', 'city', 'source', 'urban_area_sqkm', 'year', '.geo'], dtype='object')

Removing columns, "`system:index, source, .geo`"

In [4]:
# Dropping unnecessary columns
df = df.drop(columns=["system:index", "source", ".geo"], axis=1)

df.head()

Unnamed: 0,city,urban_area_sqkm,year
0,Kampala,186.064651,2001.0
1,Kampala,189.051243,2002.0
2,Kampala,197.264369,2003.0
3,Kampala,204.979731,2004.0
4,Kampala,212.943976,2005.0


In [5]:
# sorting the rows to have each city in order by year
df = df.sort_values(by=["year", "city"]).reset_index(drop=True)
df.head()

Unnamed: 0,city,urban_area_sqkm,year
0,Addis Ababa,193.521207,2001.0
1,Kampala,186.064651,2001.0
2,Addis Ababa,193.521207,2002.0
3,Kampala,189.051243,2002.0
4,Addis Ababa,194.019134,2003.0


In [6]:
# Reordering the columns to have Year, City, and UrbanExtent
df = df[["year", "city", "urban_area_sqkm"]]

df.head()

Unnamed: 0,year,city,urban_area_sqkm
0,2001.0,Addis Ababa,193.521207
1,2001.0,Kampala,186.064651
2,2002.0,Addis Ababa,193.521207
3,2002.0,Kampala,189.051243
4,2003.0,Addis Ababa,194.019134


In [7]:
# Keeping only rows of years 2015 to 2020
df = df[(df["year"] >= 2015) & (df["year"] <= 2020)]
df.head()

Unnamed: 0,year,city,urban_area_sqkm
28,2015.0,Addis Ababa,205.969394
29,2015.0,Kampala,263.680897
30,2016.0,Addis Ababa,208.707991
31,2016.0,Kampala,269.156316
32,2017.0,Addis Ababa,211.197625


In [None]:
# Saving the cleaned DataFrame to a new CSV file
df.to_csv("urbanextent_kampala_addis_cleaned.csv", index=False)