## Pre-processing

Extract all major flood dates for every country of the FEWS Network.
Extract for each event, extract food security data for before & after (month).

In [1]:
import pandas as pd
import os
os.getcwd()

'C:\\Users\\offne\\Documents\\Dissertation'

In [2]:
# Read Food Security data
FS = pd.read_csv('Data\predicting_food_crises_data.csv')
FS

Unnamed: 0,country,admin_code,admin_name,centx,centy,year_month,year,month,fews_ipc,fews_ha,...,et_mean,et_anom,acled_count,acled_fatalities,p_staple_food,area,cropland_pct,pop,ruggedness_mean,pasture_pct
0,Afghanistan,202,Kandahar,65.709343,31.043618,2007_01,2007,1,,,...,5.065768,0.173497,0,0,0.630214,54174.53381,1.417796,1161872.35,101047.1587,16.246279
1,Afghanistan,202,Kandahar,65.709343,31.043618,2007_02,2007,2,,,...,8.242891,2.817895,0,0,0.630214,54174.53381,1.417796,1161872.35,101047.1587,16.246279
2,Afghanistan,202,Kandahar,65.709343,31.043618,2007_03,2007,3,,,...,6.728089,2.613962,0,0,0.631055,54174.53381,1.417796,1161872.35,101047.1587,16.246279
3,Afghanistan,202,Kandahar,65.709343,31.043618,2007_04,2007,4,,,...,1.649312,-1.147484,0,0,0.687716,54174.53381,1.417796,1161872.35,101047.1587,16.246279
4,Afghanistan,202,Kandahar,65.709343,31.043618,2007_05,2007,5,,,...,1.178138,-0.137859,0,0,0.743923,54174.53381,1.417796,1161872.35,101047.1587,16.246279
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
183591,Zimbabwe,612,Zvishavane,30.077970,-20.284340,2019_10,2019,10,3.0,0.0,...,3.571050,-1.086821,0,0,1.514037,2640.70400,18.077180,156612.00,177041.2000,50.382350
183592,Zimbabwe,612,Zvishavane,30.077970,-20.284340,2019_11,2019,11,,,...,6.007722,-1.884687,0,0,1.555623,2640.70400,18.077180,156612.00,177041.2000,50.382350
183593,Zimbabwe,612,Zvishavane,30.077970,-20.284340,2019_12,2019,12,,,...,12.198760,-0.210147,0,0,1.592231,2640.70400,18.077180,156612.00,177041.2000,50.382350
183594,Zimbabwe,612,Zvishavane,30.077970,-20.284340,2020_01,2020,1,,,...,17.411580,-0.404875,0,0,1.697524,2640.70400,18.077180,159843.90,177041.2000,50.382350


In [3]:
# Read Flood Event data
FE = pd.read_csv('Data\global_flooding_events.csv')
FE

Unnamed: 0,index,country,year,month,day,area,exposed
0,1586.0,Australia,2000,2,18,3.167333e+08,38.298830
1,1587.0,Madagascar,2000,2,17,2.285013e+08,4594.561996
2,1587.0,Comoros,2000,2,17,0.000000e+00,0.000000
3,1595.0,Turkey,2000,4,5,0.000000e+00,0.000000
4,1595.0,Italy,2000,4,5,0.000000e+00,0.000000
...,...,...,...,...,...,...,...
4822,4704.0,Thailand,2018,11,24,4.295975e+06,0.000000
4823,4704.0,Laos,2018,11,24,7.963812e+07,0.000000
4824,4704.0,Cambodia,2018,11,24,4.867521e+09,113716.602256
4825,4711.0,United States,2018,12,5,2.046451e+09,27482.241092


In [4]:
# Extract list of FEWS NET countries
country_list = set(FS['country'].tolist())

In [5]:
# Extract Flood events data that only includes FEW NET country
FE2 = pd.DataFrame(columns=list(FE.columns))
for i in FE.index:
    for country in country_list:
        if FE['country'][i] == country:
            FE2 = FE2.append(FE.loc[i])
FE2 = FE2.sort_values(by=['country', 'year', 'month', 'day'])
FE2 = FE2.drop(['index', 'area', 'exposed'], axis=1)

In [6]:
# Create Timestamps for each Dataset (MONTHLY)
FS['date'] = FS['year'].astype(str) + '-' + FS['month'].astype(str)
FS['datetime'] = pd.to_datetime(FS['date'])
FS = FS.drop(['date'], axis=1)

FE2['date'] = FE2['year'].astype(str) + '-' + FE2['month'].astype(str) + '-' +FE2['day'].astype(str)
FE2['datetime'] = pd.to_datetime(FE2['date'])
FE2 = FE2.drop(['date'], axis=1)
# Round flood dates to be monthly
FE2['datetime_round'] = FE2['datetime'] + pd.offsets.MonthBegin(-1)
# Remove dates before 2009
FE2.drop(FE2.index[FE2['year'] == 2003], inplace=True)
FE2.drop(FE2.index[FE2['year'] == 2004], inplace=True)
FE2.drop(FE2.index[FE2['year'] == 2005], inplace=True)
FE2.drop(FE2.index[FE2['year'] == 2006], inplace=True)
FE2.drop(FE2.index[FE2['year'] == 2007], inplace=True)
FE2.drop(FE2.index[FE2['year'] == 2008], inplace=True)


In [7]:
# # Filter FS data to only include data before/after/during each event
# FS2 = pd.DataFrame(columns=list(FS.columns))
# for i in FS.index:
#     for event in FE2.index:
#         if FS['country'][i] == FE2['country'][event]:
#             FS_date = FS['datetime'][i]
#             FE_date = FE2['datetime_round'][event]
#             before = FE_date + pd.DateOffset(months=1)
#             after = FE_date - pd.DateOffset(months=1)
#             if FS_date == before: # or FS_date == FE_date
#                 FS2.append(FS.loc[i])
#             elif FS_date == after:
#                 FS2.append(FS.loc[i])
#             else:
#                 continue

# dataset size approx 500


In [8]:
# Save data
# FS2.to_csv('Data/FS_before_after.csv', index = False)
