In [1]:
# Importing packages
import pandas as pd
import numpy as np
import pycountry

In [2]:
# Import data set
fao = pd.read_csv('https://raw.githubusercontent.com/SamBickelBarlow/SamBickelBarlow.github.io/refs/heads/main/Project/Write_up/water_stress_map/water_stress_map_raw_data.csv')

In [4]:
# Isolate water stress variable
ws = fao[fao['Variable'] == 'SDG 6.4.2. Water Stress'].reset_index()

In [6]:
# Function to get the ISO3 code for a specific country
def get_iso3(area):
    if area == 'Bolivia (Plurinational State of)':
        return 'BOL'
    elif area == 'Democratic Republic of the Congo':
        return 'COD'
    elif area == 'Iran (Islamic Republic of)':
        return 'IRN'
    elif area == 'Netherlands (Kingdom of the)':
        return 'NLD'
    elif area == 'Venezuela (Bolivarian Republic of)':
        return 'VEN'
    elif area == 'Niger':
        return 'NER'
    elif area == 'Republic of Korea':
        return 'KOR'
    elif area == 'Republic of Korea':
        return 'KOR'
    else:
        # Attempt to use pycountry to get the ISO3 code
        try:
            return pycountry.countries.search_fuzzy(area)[0].alpha_3
        except:
            return 'Not Found'  # Return 'Not Found' if the country is not found

# Applying the function to the 'Area' column and creating the 'ISO3' column
ws['ISO3'] = ws['Area'].apply(get_iso3)

In [12]:
# Drop extra columns
ws2 = ws.drop(['index' , 'VariableGroup' , 'Subgroup' , 'Variable' , 'Unit' , 'Symbol' , 'IsAggregate'], axis=1)

In [13]:
# Remove the 'Not Found' ISO3s
ws2 = ws2[ws2['ISO3'] != 'Not Found']

In [14]:
# Remerge 2002 data back to 2022 data to calculate difference
ws2 = ws2[ws2['Year'] == 2022].merge(ws2[ws2['Year'] == 2002] , how='left' , on='ISO3')

In [15]:
# Calculate differences (net, pct)
ws2['Area'] = ws2['Area_x']
ws2['Water Stress net change'] = ws2['Value_x'] - ws2['Value_y']
ws2['Water Stress pct change'] = (ws2['Value_x'] - ws2['Value_y']) / ws2['Value_y']
ws2['Water Stress 2002'] = ws2['Value_y']
ws2['Water Stress 2022'] = ws2['Value_x']

In [16]:
# Drop extra columns
ws2 = ws2.drop(['Area_x' , 'Year_x' , 'Area_y' , 'Year_y' , 'Value_x' , 'Value_y'], axis=1)

In [17]:
# Output to csv
ws2.to_csv('https://raw.githubusercontent.com/SamBickelBarlow/SamBickelBarlow.github.io/refs/heads/main/Project/Write_up/water_stress_map/water_stress_map_clean_data.csv')