Skip to content

Commit

Permalink
Merge pull request #111 from JannisHoch/poly_neighbours
Browse files Browse the repository at this point in the history
Poly neighbours
  • Loading branch information
JannisHoch committed Dec 9, 2020
2 parents 79d847b + f4a6a15 commit b5cc738
Show file tree
Hide file tree
Showing 9 changed files with 331 additions and 40 deletions.
71 changes: 59 additions & 12 deletions copro/conflict.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,20 +3,19 @@
import numpy as np
import os, sys

def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year):
def conflict_in_year_bool(conflict_gdf, extent_gdf, sim_year):
"""Creates a list for each timestep with boolean information whether a conflict took place in a polygon or not.
Args:
conflict_gdf (geodataframe): geo-dataframe containing georeferenced information of conflict (tested with PRIO/UCDP data)
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
sim_year (int): year for which data is extracted
conflict_gdf (geodataframe): geo-dataframe containing georeferenced information of conflict (tested with PRIO/UCDP data).
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted.
sim_year (int): year for which data is extracted.
Raises:
AssertionError: raised if the length of output list does not match length of input geo-dataframe
AssertionError: raised if the length of output list does not match length of input geo-dataframe.
Returns:
list: list containing 0/1 per polygon depending on conflict occurence
list: list containing 0/1 per polygon depending on conflict occurence.
"""

# select the entries which occured in this year
Expand All @@ -27,19 +26,67 @@ def conflict_in_year_bool(conflict_gdf, extent_gdf, config, sim_year):

# determine the aggregated amount of fatalities in one region (e.g. water province)
try:
fatalities_per_watProv = data_merged['best'].groupby(data_merged['watprovID']).sum().to_frame().rename(columns={"best": 'total_fatalities'})
fatalities_per_poly = data_merged['best'].groupby(data_merged['watprovID']).sum().to_frame().rename(columns={"best": 'total_fatalities'})
except:
fatalities_per_watProv = data_merged['best'].groupby(data_merged['name']).sum().to_frame().rename(columns={"best": 'total_fatalities'})
fatalities_per_poly = data_merged['best'].groupby(data_merged['name']).sum().to_frame().rename(columns={"best": 'total_fatalities'})

# loop through all regions and check if exists in sub-set
# if so, this means that there was conflict and thus assign value 1
list_out = []
for i in range(len(extent_gdf)):
try:
i_watProv = extent_gdf.iloc[i]['watprovID']
i_poly = extent_gdf.iloc[i]['watprovID']
except:
i_watProv = extent_gdf.iloc[i]['name']
if i_watProv in fatalities_per_watProv.index.values:
i_poly = extent_gdf.iloc[i]['name']
if i_poly in fatalities_per_poly.index.values:
list_out.append(1)
else:
list_out.append(0)

if not len(extent_gdf) == len(list_out):
raise AssertionError('the dataframe with polygons has a lenght {0} while the lenght of the resulting list is {1}'.format(len(extent_gdf), len(list_out)))

return list_out

def conflict_in_previous_year(conflict_gdf, extent_gdf, sim_year, t_0_flag=None):
"""Creates a list for each timestep with boolean information whether a conflict took place in a polygon at the previous timestep or not.
If the current time step is the first (t=0), then conflict data of this year is used instead due to the lack of earlier data.
Args:
conflict_gdf (geodataframe): geo-dataframe containing georeferenced information of conflict (tested with PRIO/UCDP data).
extent_gdf (geodataframe): geo-dataframe containing one or more polygons with geometry information for which values are extracted.
sim_year (int): year for which data is extracted.
t_0_flag (bool, optional): Flag whether first time step is run. If so, needs to be set to True. Defaults to None.
Raises:
ValueError: raised if t_0_flag is invalid.
AssertionError: raised if the length of output list does not match length of input geo-dataframe.
Returns:
list: list containing 0/1 per polygon depending on conflict occurence.
"""

# if it is the first time step (t_0), the data of this year will be used
if t_0_flag == True:
print('... it is the first year, so no conflict for previous year is known')
temp_sel_year = conflict_gdf.loc[conflict_gdf.year == sim_year]
# else, the data from the previous time step (t-1) is used
elif t_0_flag == None:
temp_sel_year = conflict_gdf.loc[conflict_gdf.year == sim_year-1]
else:
raise ValueError('ERROR: the t_0_flag should either be None or True.')

# merge the dataframes with polygons and conflict information, creating a sub-set of polygons/regions
data_merged = gpd.sjoin(temp_sel_year, extent_gdf)

fatalities_per_poly = data_merged['best'].groupby(data_merged['watprovID']).sum().to_frame().rename(columns={"best": 'total_fatalities'})

# loop through all regions and check if exists in sub-set
# if so, this means that there was conflict and thus assign value 1
list_out = []
for i in range(len(extent_gdf)):
i_poly = extent_gdf.iloc[i]['watprovID']
if i_poly in fatalities_per_poly.index.values:
list_out.append(1)
else:
list_out.append(0)
Expand Down
44 changes: 40 additions & 4 deletions copro/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,8 @@ def initiate_XY_data(config):
XY['poly_geometry'] = pd.Series()
for key in config.items('data'):
XY[str(key[0])] = pd.Series(dtype=float)
XY['conflict'] = pd.Series(dtype=int)
XY['conflict_t-1'] = pd.Series(dtype=bool)
XY['conflict'] = pd.Series(dtype=bool)

if config.getboolean('general', 'verbose'): print('{}'.format(XY) + os.linesep)

Expand Down Expand Up @@ -73,7 +74,8 @@ def fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf):

# go through all simulation years as specified in config-file
model_period = np.arange(config.getint('settings', 'y_start'), config.getint('settings', 'y_end') + 1, 1)
for sim_year in model_period:

for (sim_year, i) in zip(model_period, range(len(model_period))):

print('INFO: entering year {}'.format(sim_year))

Expand All @@ -83,7 +85,16 @@ def fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf):
if key == 'conflict':

data_series = value
data_list = conflict.conflict_in_year_bool(conflict_gdf, polygon_gdf, config, sim_year)
data_list = conflict.conflict_in_year_bool(conflict_gdf, polygon_gdf, sim_year)
data_series = data_series.append(pd.Series(data_list), ignore_index=True)
XY[key] = data_series

elif key == 'conflict_t-1':

data_series = value
if i==0: t_0_flag = True
else: t_0_flag = None
data_list = conflict.conflict_in_previous_year(conflict_gdf, polygon_gdf, sim_year, t_0_flag=t_0_flag)
data_series = data_series.append(pd.Series(data_list), ignore_index=True)
XY[key] = data_series

Expand Down Expand Up @@ -151,4 +162,29 @@ def split_XY_data(XY, config):
fraction_Y_1 = 100*len(np.where(Y != 0)[0])/len(Y)
print('DEBUG: a fraction of {} percent in the data corresponds to conflicts.'.format(round(fraction_Y_1, 2)))

return X, Y
return X, Y

def neighboring_polys(config, extent_gdf, identifier='watprovID'):

# initialise empty dataframe
df = pd.DataFrame()

# go through each polygon aka water province
for i in range(len(extent_gdf)):
if config.getboolean('general', 'verbose'): print('DEBUG: finding touching neighbours for identifier {} {}'.format(identifier, extent_gdf[identifier].iloc[i]))
# get geometry of current polygon
wp = extent_gdf.geometry.iloc[i]
# check which polygons in geodataframe (i.e. all water provinces) touch the current polygon
# also create a dataframe from result (boolean)
# the transpose is needed to easier append
df_temp = pd.DataFrame(extent_gdf.geometry.touches(wp), columns=[extent_gdf[identifier].iloc[i]]).T
# append the dataframe
df = df.append(df_temp)

# replace generic indices with actual water province IDs
df.set_index(extent_gdf[identifier], inplace=True)

# replace generic columns with actual water province IDs
df.columns = extent_gdf[identifier].values

return df
1 change: 1 addition & 0 deletions copro/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -323,6 +323,7 @@ def get_feature_importance(clf, config, out_dir):
dict_out = dict()
for key, x in zip(config.items('data'), range(len(arr))):
dict_out[key[0]] = arr[x]
dict_out['conflict_t-1'] = arr[-1]

df = pd.DataFrame.from_dict(dict_out, orient='index', columns=['feature_importance'])

Expand Down
4 changes: 2 additions & 2 deletions copro/machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,14 +78,14 @@ def split_scale_train_test_split(X, Y, config, scaler):
arrays: arrays containing training-data and test-data as well as IDs and geometry for training-data and test-data.
"""

##- separate arrays for geomety and variable values
##- separate arrays for ID, geometry, and sample values per polygon
X_ID, X_geom, X_data = conflict.split_conflict_geom_data(X)

if config.getboolean('general', 'verbose'): print('DEBUG: fitting and transforming X')
##- scaling only the variable values
X_ft = scaler.fit_transform(X_data)

##- combining geometry and scaled variable values
##- combining ID, geometry and scaled sample values per polygon
X_cs = np.column_stack((X_ID, X_geom, X_ft))

if config.getboolean('general', 'verbose'): print('DEBUG: splitting both X and Y in train and test data')
Expand Down
6 changes: 0 additions & 6 deletions copro/selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,14 +99,8 @@ def climate_zoning(gdf, extent_gdf, config, root_dir):
geo-dataframe: polygons of study area clipped to climate zones.
dataframe: global look-up dataframe linking polygon ID with geometry information.
"""

# Koeppen_Geiger_fo = os.path.join(os.path.abspath(config.get('general', 'input_dir')),
# config.get('climate', 'shp'))

Koeppen_Geiger_fo = os.path.join(root_dir, config.get('general', 'input_dir'), config.get('climate', 'shp'))

# code2class_fo = os.path.join(os.path.abspath(config.get('general', 'input_dir')),
# config.get('climate', 'code2class'))

code2class_fo = os.path.join(root_dir, config.get('general', 'input_dir'), config.get('climate', 'code2class'))

Expand Down
Binary file modified docs/_static/roc_curve.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
58 changes: 55 additions & 3 deletions example/nb01_model_init_and_selection.ipynb

Large diffs are not rendered by default.

154 changes: 154 additions & 0 deletions example/nb02_XY_data.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -202,23 +202,170 @@
"name": "stdout",
"output_type": "stream",
"text": [
"{'poly_ID': Series([], dtype: float64), 'poly_geometry': Series([], dtype: float64), 'total_evaporation': Series([], dtype: float64), 'precipitation': Series([], dtype: float64), 'temperature': Series([], dtype: float64), 'irr_water_demand': Series([], dtype: float64), 'conflict_t-1': Series([], dtype: bool), 'conflict': Series([], dtype: bool)}\n",
"\n",
"INFO: reading data for period from 2000 to 2015\n",
"INFO: entering year 2000\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"... it is the first year, so no conflict for previous year is known\n",
"DEBUG: key conflict\n",
"INFO: entering year 2001\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2002\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2003\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2004\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2005\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2006\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2007\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2008\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2009\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2010\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2011\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2012\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2013\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2014\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: entering year 2015\n",
"DEBUG: key poly_ID\n",
"DEBUG: key poly_geometry\n",
"DEBUG: key total_evaporation\n",
"DEBUG: key precipitation\n",
"DEBUG: key temperature\n",
"DEBUG: key irr_water_demand\n",
"DEBUG: key conflict_t-1\n",
"YOOOOO: now computing conflict for t-1\n",
"DEBUG: key conflict\n",
"INFO: all data read\n",
"INFO: saving XY data by default to file C:\\Users\\hoch0001\\Documents\\_code\\copro\\example\\./OUT\\XY.npy\n"
]
Expand Down Expand Up @@ -254,6 +401,13 @@
"source": [
"os.path.isfile(os.path.join(os.path.abspath(config.get('general', 'output_dir')), 'XY.npy'))"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
Expand Down
33 changes: 20 additions & 13 deletions example/nb03_model_execution_and_evaluation.ipynb

Large diffs are not rendered by default.

0 comments on commit b5cc738

Please sign in to comment.