Skip to content

Commit

Permalink
Merge pull request #139 from JannisHoch/update_docs
Browse files Browse the repository at this point in the history
Update docs
  • Loading branch information
JannisHoch committed Jun 4, 2021
2 parents 55df87f + ccfd370 commit 4e2d3fe
Show file tree
Hide file tree
Showing 11 changed files with 317 additions and 502 deletions.
2 changes: 1 addition & 1 deletion copro/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@

__author__ = """Jannis M. Hoch, Sophie de Bruin, Niko Wanders"""
__email__ = 'j.m.hoch@uu.nl'
__version__ = '0.0.8b'
__version__ = '0.0.8'
14 changes: 8 additions & 6 deletions copro/plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import geopandas as gpd
import seaborn as sbs
import seaborn as sns
sns.set_palette('colorblind')
import numpy as np
import os, sys
from sklearn import metrics
Expand Down Expand Up @@ -42,7 +43,7 @@ def selected_conflicts(conflict_gdf, **kwargs):

return ax

def metrics_distribution(out_dict, **kwargs):
def metrics_distribution(out_dict, metrics, **kwargs):
"""Plots the value distribution of a range of evaluation metrics based on all model simulations.
Args:
Expand All @@ -57,9 +58,10 @@ def metrics_distribution(out_dict, **kwargs):

fig, ax = plt.subplots(1, 1, **kwargs)

sbs.histplot(out_dict['Accuracy'], ax=ax, color="k", label='Accuracy')
sbs.histplot(out_dict['Precision'], ax=ax, color="r", label='Precision')
sbs.histplot(out_dict['Recall'], ax=ax, color="b", label='Recall')
for metric, color in zip(metrics, sns.color_palette('colorblind')):

sns.histplot(out_dict[str(metric)], ax=ax, kde=True, stat='density', color=color, label=str(metric))

plt.legend()

return ax
Expand All @@ -79,7 +81,7 @@ def correlation_matrix(df, **kwargs):

df_corr = evaluation.calc_correlation_matrix(df)

ax = sbs.heatmap(df_corr, **kwargs)
ax = sns.heatmap(df_corr, **kwargs)

return ax

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
import os

@click.command()
@click.option('-id', '--polygon-id', multiple=True, type=int)
@click.option('-id', '--polygon-id', multiple=True)
@click.option('-c', '--column', help='column name', default='chance_of_conflict', type=str)
@click.option('-t', '--title', help='title for plot and file_object name', type=str)
@click.option('--verbose/--no-verbose', help='verbose on/off', default=False)
Expand All @@ -18,50 +18,87 @@ def main(input_dir=None, polygon_id=None, column=None, title=None, output_dir=No
"""Quick and dirty function to plot the develoment of a column in the outputted geojson-files over time.
"""

assert(len(polygon_id) > 0), AssertionError('please specify one polygon ID to be sampled')
assert(len(polygon_id) > 0), AssertionError('please specify at least one polygon ID to be sampled or select ''all'' for sampling the entire study area')

# absolute path to input_dir
input_dir = os.path.abspath(input_dir)
click.echo('\ngetting geojson-files from {}'.format(input_dir))

# collect all files in input_dir
all_files = glob.glob(os.path.join(input_dir, '*.geojson'))

if verbose:
if polygon_id != 'all':
click.echo('sampling from IDs'.format(polygon_id))
else:
click.echo('sampling over entire study area')

# create dictionary with list for areas (either IDs or entire study area) to be sampled from
out_dict = dict()
for idx in polygon_id:
out_dict[idx] = list()
if polygon_id != 'all':
out_dict[int(idx)] = list()
else:
out_dict[idx] = list()

# create a list to keep track of year-values in files
years = list()

print('retrieving values from column {}'.format(column))
# go through all files
click.echo('retrieving values from column {}'.format(column))
for geojson in all_files:
if verbose: print('reading file {}'.format(geojson))

if verbose: click.echo('reading file {}'.format(geojson))
# read file and convert to geo-dataframe
gdf = gpd.read_file(geojson, driver='GeoJSON')
# convert geo-dataframe to dataframe
df = pd.DataFrame(gdf.drop(columns='geometry'))

# get year-value
year = int(str(str(os.path.basename(geojson)).rsplit('.')[0]).rsplit('_')[-1])
years.append(year)

for idx in polygon_id:
if verbose: print('sampling ID {}'.format(idx))

if idx not in df.ID.values:
print('WARNING: ID {} is not in {} - NaN set'.format(idx, geojson))
vals = np.nan
else:
vals = df[column].loc[df.ID==idx].values[0]

if polygon_id != 'all':
# go throough all IDs
for idx in polygon_id:
if verbose:
if polygon_id != 'all':
print('sampling ID {}'.format(idx))

# if ID not in file, assign NaN
if idx not in df.ID.values:
print('WARNING: ID {} is not in {} - NaN set'.format(idx, geojson))
vals = np.nan
# otherwise, get value of column at this ID
else:
vals = df[column].loc[df.ID==idx].values[0]

# append this value to list in dict
idx_list = out_dict[idx]
idx_list.append(vals)

else:
# compute mean value over column
vals = df[column].mean()
# append this value to list in dict
idx_list = out_dict[idx]
idx_list.append(vals)


# create a dataframe from dict and assign year-values as index
df = pd.DataFrame().from_dict(out_dict)
years = pd.to_datetime(years, format='%Y')
df.index = years

# create an output folder, if not yet there
if not os.path.isdir(os.path.abspath(output_dir)):
click.echo('creating output folder {}'.format(os.path.abspath(output_dir)))
os.makedirs(os.path.abspath(output_dir))

# save dataframe as csv-file
df.to_csv(os.path.abspath(os.path.join(output_dir, '{}_dev.csv'.format(column))))

# create a simple plot and save to file
fig, axes = plt.subplots(nrows=len(polygon_id), ncols=1, sharex=True)
df.plot(subplots=True, ax=axes)
for ax in axes:
Expand Down
Binary file modified docs/_static/roc_curve.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
6 changes: 0 additions & 6 deletions example/_scripts/run_notebooks.sh

This file was deleted.

86 changes: 40 additions & 46 deletions example/nb01_model_init_and_selection.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"source": [
"# Model initialization and selection procedure\n",
"\n",
"In this notebook, we will show how CoPro is initialized and how the polygons and conflicts are selected."
"In this notebook, we will show how CoPro is initialized and the selection procedure of spatial aggregation units and conflicts works."
]
},
{
Expand Down Expand Up @@ -40,7 +40,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"For better reproducibility, the version numbers of all key packages are provided."
"For better reproducibility, the version numbers of all key packages used to run this notebook are provided."
]
},
{
Expand All @@ -53,7 +53,7 @@
"output_type": "stream",
"text": [
"Python version: 3.7.8 | packaged by conda-forge | (default, Jul 31 2020, 01:53:57) [MSC v.1916 64 bit (AMD64)]\n",
"copro version: 0.0.8b\n",
"copro version: 0.0.8\n",
"geopandas version: 0.9.0\n",
"xarray version: 0.15.1\n",
"rasterio version: 1.1.0\n",
Expand All @@ -74,14 +74,14 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### The configurations-file (cfg-file)"
"## The configurations-file (cfg-file)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In the cfg-file, all the settings for the analysis are defined. Note that the cfg-file can be stored anywhere, not per se in the same directory where the model data is stored (as in this example case). Make sure that the paths in the cfg-file are updated if you use relative paths and change the folder location of th cfg-file."
"In the configurations-file (cfg-file), all the settings for the analysis are defined. The cfg-file contains, amongst others, all paths to input files, settings for the machine-learning model, and the various selection criteria for spatial aggregation units and conflicts. Note that the cfg-file can be stored anywhere, not per se in the same directory where the model data is stored (as in this example case). Make sure that the paths in the cfg-file are updated if you use relative paths and change the folder location of th cfg-file!"
]
},
{
Expand All @@ -97,7 +97,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"Based on this cfg-file, the set-up of the run can be initialized. Here, the cfg-file is parsed (i.e. read) and all settings and paths become known to the model. Also, the output folder is created (if it does not exist yet) and the cfg-file is copied to the output folder for improved reusability."
"Based on this cfg-file, the set-up of the run can be initialized. Here, the cfg-file is parsed (i.e. read) and all settings and paths become 'known' to the model. Also, the output folder is created (if it does not exist yet) and the cfg-file is copied to the output folder for improved reusability.\n",
"\n",
"If you set `verbose=True`, then additional statements are printed during model execution. This can help to track the behaviour of the model."
]
},
{
Expand All @@ -110,7 +112,7 @@
"output_type": "stream",
"text": [
"\n",
"#### CoPro version 0.0.8b ####\n",
"#### CoPro version 0.0.8 ####\n",
"#### For information about the model, please visit https://copro.readthedocs.io/ ####\n",
"#### Copyright (2020-2021): Jannis M. Hoch, Sophie de Bruin, Niko Wanders ####\n",
"#### Contact via: j.m.hoch@uu.nl ####\n",
Expand All @@ -130,7 +132,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"One of the outputs is a dictionary containing the parsed configurations as well as output directories of both the reference run and the various projection runs:"
"One of the outputs is a dictionary (here `main_dict`) containing the parsed configurations (they are stored in computer memory, therefore the slighly odd specification) as well as output directories of both the reference run and the various projection runs specified in the cfg-file. \n",
"\n",
"For the reference run, only the respective entries are required."
]
},
{
Expand All @@ -139,65 +143,46 @@
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'_REF': [<configparser.RawConfigParser at 0x1b251bac548>,\n",
" 'C:\\\\Users\\\\hoch0001\\\\Documents\\\\_code\\\\copro\\\\example\\\\./OUT\\\\_REF'],\n",
" 'proj_nr_1': [[<configparser.RawConfigParser at 0x1b251f8ce88>],\n",
" 'C:\\\\Users\\\\hoch0001\\\\Documents\\\\_code\\\\copro\\\\example\\\\./OUT\\\\_PROJ\\\\proj_nr_1']}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"the configuration of the reference run is <configparser.RawConfigParser object at 0x000002AE32A09E88>\n",
"the output directory of the reference run is C:\\Users\\hoch0001\\Documents\\_code\\copro\\example\\./OUT\\_REF\n"
]
}
],
"source": [
"main_dict"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For the reference run, only the respective entries are required."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"config_REF = main_dict['_REF'][0]\n",
"out_dir_REF = main_dict['_REF'][1] "
"print('the configuration of the reference run is {}'.format(config_REF))\n",
"out_dir_REF = main_dict['_REF'][1] \n",
"print('the output directory of the reference run is {}'.format(out_dir_REF))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Filter conflicts and polygons\n",
"## Filter conflicts and spatial aggregation units\n",
"\n",
"### Background\n",
"\n",
"As conflict database, we use the [UCDP Georeferenced Event Dataset](https://ucdp.uu.se/downloads/index.html#ged_global) v201. Not all conflicts of the database may always need to be used for a simulation. This can be, for example, because they belong to a non-relevant type of conflict we are not interested in, or because it is simply not in our area-of-interest. Therefore, it is possible to filter the conflicts on various properties:\n",
"As conflict database, we use the [UCDP Georeferenced Event Dataset](https://ucdp.uu.se/downloads/index.html#ged_global). Not all conflicts of the database may need to be used for a simulation. This can be, for example, because they belong to a non-relevant type of conflict we are not interested in, or because it is simply not in our area-of-interest. Therefore, it is possible to filter the conflicts on various properties:\n",
"\n",
"1. min_nr_casualties: minimum number of casualties of a reported conflict; \n",
"1. type_of_violence: 1=state-based armed conflict; 2=non-state conflict; 3=one-sided violence.\n",
"1. *min_nr_casualties: minimum number of casualties of a reported conflict;* \n",
"1. *type_of_violence: 1=state-based armed conflict; 2=non-state conflict; 3=one-sided violence.*\n",
"\n",
"To unravel the interplay between climate and conflict, it may be beneficial to run the model only for conflicts in particular climate zones. It is hence also possible to select only those conflcits that fall within a climate zone following the [Koeppen-Geiger classification](http://koeppen-geiger.vu-wien.ac.at/).\n",
"\n",
"### Selection procedure\n",
"\n",
"In the selection procedure, we first load the conflict database and convert it to a georeferenced dataframe (geo-dataframe). To define the study area, a shape-file containing polygons (in this case water provinces) is loaded and converted to geo-dataframe as well.\n",
"\n",
"We then apply the selection criteria (see above) as specified in the cfg-file, and keep the remaining data points and polygons. "
"We then apply the selection criteria (see above) as specified in the cfg-file, and keep the remaining data points and associated polygons. "
]
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 6,
"metadata": {},
"outputs": [
{
Expand All @@ -222,7 +207,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 7,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -251,12 +236,21 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"It's nicely visible that for this example-run, not all provinces are considered but we focus on specified climate zones only."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Temporary files\n",
"\n",
"To be able to also run the following notebooks, some of the data has to be written to file temporarily. This is **not** part of the CoPro workflow but merely needed to split up the workflow in different notebooks outlining the main steps to go through when using CoPro."
]
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -266,7 +260,7 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -276,7 +270,7 @@
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand Down

0 comments on commit 4e2d3fe

Please sign in to comment.