In [1]:
import pandas as pd

In [2]:
# FAO Data
fao_fbs = pd.read_csv(r"..\data\FAOSTAT\processed\fao_fbs_relevant.csv")
fao_fs = pd.read_csv(r"..\data\FAOSTAT\processed\fao_fs_relevant.csv")
fao_emissions = pd.read_csv(r"..\data\FAOSTAT\processed\fao_emissions_relevant.csv")
fao_cpi = pd.read_csv(r"..\data\FAOSTAT\processed\fao_cpi_relevant.csv")
fao_population = pd.read_csv(r"..\data\FAOSTAT\processed\fao_population_relevant.csv")
fao_prices = pd.read_csv(r"..\data\FAOSTAT\processed\fao_prices_relevant.csv")
fao_production_indices = pd.read_csv(r"..\data\FAOSTAT\processed\fao_production_indices_relevant.csv")

In [3]:
# Other Data
f_waste = pd.read_excel(r"../data/Food Waste Estimation UNEP 2024.xlsx")
fsi_data = pd.read_csv(r"../data/processed/fsi_data_melted.csv")
asean_sdg = pd.read_excel(r"../data/SDG From ASEANstats.xlsx")
un_sdg = pd.read_excel(r"../data/SDG From UNSDG (2, 11, 12, 13).xlsx")
gfsi_data = pd.read_csv(r"../data/processed/gfsi_data_melted.csv")
fsi_score = pd.read_csv(r"../data/processed/fsi_score_melted.csv")
gfsi_score = pd.read_csv(r"../data/processed/gfsi_score_melted.csv")

In [10]:
fao_fs['Year']

0         2000-2002
1         2001-2003
2         2002-2004
3         2003-2005
4         2004-2006
            ...    
283875    2016-2018
283876    2017-2019
283877    2018-2020
283878    2019-2021
283879    2020-2022
Name: Year, Length: 283880, dtype: object

In [15]:
# Replace all Lao People's Dem. Rep. with Lao People's Democratic Republic
f_waste['Country'] = f_waste['Country'].replace('Lao People\'s Dem. Rep.', 'Lao People\'s Democratic Republic')

In [16]:
# Define the list of ASEAN countries for consistent filtering across all datasets.
asean_countries = [
    'Brunei Darussalam', 'Cambodia', 'Indonesia', 'Lao People\'s Democratic Republic',
    'Malaysia', 'Myanmar', 'Philippines', 'Singapore', 'Thailand', 'Viet Nam'
]

# --- 1. Process Urban Population Data ---
# Use the latest available year up to 2023 (not a future projection)
max_actual_year = fao_population[fao_population['Year'] <= 2023]['Year'].max()
population_urban = fao_population[
    (fao_population['Area'].isin(asean_countries)) &
    (fao_population['Element'] == 'Urban population') &
    (fao_population['Year'] == max_actual_year)
].copy()
population_urban['Value'] = population_urban['Value'] * 1000  # Convert from '1000 persons' to persons
population_urban = population_urban[['Area', 'Year', 'Value']].rename(columns={
    'Area': 'Country',
    'Year': 'Population Year',
    'Value': 'Urban Population'
})

# --- 2. Process Urban Food Insecurity Data ---
# Use the latest available year up to 2023 for each country
fs_urban_insecurity = fao_fs[
    (fao_fs['Area'].isin(asean_countries)) &
    (fao_fs['Item'] == 'Prevalence of moderate or severe food insecurity in the female adult population (percent) (3-year average)') &
    (fao_fs['Year'].astype(str).str.contains('2021|2022|2023|2024'))
].copy()
fs_urban_insecurity = fs_urban_insecurity.sort_values('Year').drop_duplicates('Area', keep='last')
fs_urban_insecurity = fs_urban_insecurity[['Area', 'Year', 'Value']].rename(columns={
    'Area': 'Country',
    'Year': 'FS Year',
    'Value': 'Urban Food Insecurity (%)'
})

# --- 3. Process Food Waste Data ---
# Load the UNEP food waste report data. We will focus on the 'Household' sector as it's
# the largest contributor and directly related to the urban population.
waste_household = f_waste[f_waste['Country'].isin(asean_countries)].copy()
waste_household = waste_household[['Country', 'Household estimate (kg/capita/year)']].rename(
    columns={'Household estimate (kg/capita/year)': 'Household Waste (kg/capita/yr)'}
)

# --- 4. Merge Datasets to Create the Urban Paradox DataFrame ---
# Sequentially merge the processed dataframes to create a single, comprehensive view.
# This unified dataset will be the foundation for our first storyboard visualization.
urban_paradox_df = pd.merge(population_urban, fs_urban_insecurity, on='Country', how='left')
urban_paradox_df = pd.merge(urban_paradox_df, waste_household, on='Country', how='left')

# Calculate the total household waste in tonnes for each country to show the scale.
urban_paradox_df['Total Household Waste (tonnes/yr)'] = (urban_paradox_df['Urban Population'] * urban_paradox_df['Household Waste (kg/capita/yr)']) / 1000

print("--- Urban Paradox Data Ready for Visualization ---")
urban_paradox_df

--- Urban Paradox Data Ready for Visualization ---


Unnamed: 0,Country,Population Year,Urban Population,FS Year,Urban Food Insecurity (%),Household Waste (kg/capita/yr),Total Household Waste (tonnes/yr)
0,Brunei Darussalam,2023,363647.0,2021-2023,,76,27637.172
1,Cambodia,2023,4445360.0,2021-2023,56.7,85,377855.6
2,Indonesia,2023,163963233.0,2021-2023,5.4,53,8690051.349
3,Lao People's Democratic Republic,2023,2849332.0,2021-2023,39.5,89,253590.548
4,Malaysia,2023,26866688.0,2021-2023,12.1,81,2176201.728
5,Myanmar,2023,18032802.0,2021-2023,37.0,78,1406558.556
6,Philippines,2023,55287662.0,2021-2023,42.7,26,1437479.212
7,Singapore,2023,6080859.0,2021-2023,7.4,68,413498.412
8,Thailand,2023,37322064.0,2021-2023,7.1,86,3209697.504
9,Viet Nam,2023,39908501.0,2021-2023,14.1,72,2873412.072
