In [1]:
import json
import geodaisy.converters as convert
import geopandas as gpd
import pandas as pd
import timeit

start = timeit.default_timer()

## Combine GEE data

In [2]:
# Load & Combine Burkina Faso events
bf1 = pd.read_csv('Data/GEE/Burkina Faso/FloodedDiff_2016-09-14_BF.csv')
bf2 = pd.read_csv('Data/GEE/Burkina Faso/FloodedDiff_2018-08-20_BF.csv')
bf3 = pd.read_csv('Data/GEE/Burkina Faso/FloodedDiff_2018-09-01_BF.csv')

bf1 = bf1.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)
bf2 = bf2.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)
bf3 = bf3.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)

bf1['flood_date'] = '2016-09-14'
bf2['flood_date'] = '2018-08-20'
bf3['flood_date'] = '2018-09-01'

bf = pd.DataFrame( pd.concat([bf1,bf2,bf3], ignore_index=True) )


In [3]:
# Load Mali events
ml1 = pd.read_csv('Data/GEE/Mali/FloodedDiff 2018-08-20 ML.csv')
ml2 = pd.read_csv('Data/GEE/Mali/FloodedDiff 2018-09-01 ML.csv')

ml1 = ml1.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)
ml2 = ml2.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)


ml1['flood_date'] = '2018-08-20'
ml2['flood_date'] = '2018-09-01'

ml = pd.DataFrame( pd.concat([ml1,ml2], ignore_index=True) )

In [4]:
# Load Niger events
ne1 = pd.read_csv('Data/GEE/Niger/FloodedDiff 2016-09-14 NE.csv')
ne2 = pd.read_csv('Data/GEE/Niger/FloodedDiff 2018-08-20 NE.csv')
ne3 = pd.read_csv('Data/GEE/Niger/FloodedDiff 2018-09-01 NE.csv')

ne1 = ne1.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)
ne2 = ne2.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)
ne3 = ne3.drop(['system:index', 'ADM0_CODE', 'ADM1_CODE', 'ADM1_NAME', 'ADM2_CODE', 'DISP_AREA', 'EXP2_YEAR', 'STATUS',
                'STR2_YEAR', 'Shape_Area', 'Shape_Leng'], axis=1)

ne1['flood_date'] = '2016-09-14'
ne2['flood_date'] = '2018-08-20'
ne3['flood_date'] = '2018-09-01'

ne = pd.DataFrame( pd.concat([ne1,ne2,ne3], ignore_index=True) )

In [5]:
# Combine all GEE data
gee = pd.DataFrame( pd.concat([bf,ml,ne], ignore_index=True) )
gee = gee[['ADM0_NAME', 'ADM2_NAME', 'flood_date', 'Diff', '.geo']]
gee.rename(columns = {'ADM0_NAME': 'country', 'ADM2_NAME':'admin_name'}, inplace = True)
gee['flood_date'] = pd.to_datetime(gee['flood_date'])

## Preprocess the FS dataset


In [6]:
# Load FS data for differencing
FS = pd.read_csv('Data/FS_before_after_data.csv')
FS = FS.drop(['geometry'], axis=1)
print(list(FS.columns))

['country', 'admin_name', 'centx', 'centy', 'state', 'datetime', 'flood_date', 'fews_ipc', 'ndvi_mean', 'rain_mean', 'et_mean', 'acled_count', 'acled_fatalities', 'p_staple_food', 'area', 'cropland_pct', 'pop', 'ruggedness_mean', 'pasture_pct', 'spacelag', 'timelag1', 'timelag2']


In [7]:
col = ['country', 'admin_name', 'centx', 'centy', 'flood_date', 'fews_ipc', 'ndvi_mean', 'rain_mean', 'et_mean', 'acled_count',
       'acled_fatalities', 'p_staple_food', 'area', 'cropland_pct', 'pop', 'ruggedness_mean', 'pasture_pct', 'spacelag',
       'timelag1', 'timelag2']

df = pd.DataFrame(columns=col)

x = int(len(FS)/2)
skip = [n*2 for n in range(0, x)]
counter = 0
for i in skip:
    before = i
    after = i+2
    counter = i+1
    diff = FS[FS.columns[7:]][before:after].diff()
    df = df.append(diff)
    df.loc[counter, ['country']] = FS.loc[counter, ['country']]
    df.loc[counter, ['admin_name']] = FS.loc[counter, ['admin_name']]
    df.loc[counter, ['flood_date']] = FS.loc[counter, ['flood_date']]
    df.loc[counter, ['centx']] = FS.loc[counter, ['centx']]
    df.loc[counter, ['centy']] = FS.loc[counter, ['centy']]

In [8]:
df = df.dropna(how='all')
df = df.reset_index()
df = df.drop(['index'], axis=1)

## Combine GEE & FS Data

If the DF admin_name centx/centy is within any of the GEE polygons
Assign that DF admin row the GEE admin name

In [9]:
print(len(df))
print(len(gee))

553
349


In [27]:
prep = gee.drop_duplicates(subset=['country', 'admin_name'])

for i in prep.index:
    x = prep.loc[i, ['.geo']]
    x = json.loads(x[0])
    try:
        prep.loc[i, ['.geo']] = convert.geojson_to_wkt(x)
    except: # Error handling for geometry collections
        y = list(x.values())
        y = y[1]
        prep.loc[i, ['.geo']] = convert.geojson_to_wkt(y[1])
        continue

prep['.geo'] = gpd.GeoSeries.from_wkt(prep['.geo'])
gee_gdf = gpd.GeoDataFrame(prep, geometry='.geo')
gee_gdf.plot()



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_column(loc, value, pi)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._set_item(key, value)


In [None]:
# Check strings and fix differences

l = set(df['admin_name'].tolist())
x = set(gee['admin_name'].tolist())
# set(list(l - x))

In [13]:
# set(list(x - l))


In [14]:
stop = timeit.default_timer()
print('Running Time: ', stop - start, 'seconds')

Running Time:  5.609913699999999 seconds
