Skip to content

Commit

Permalink
Merge pull request #106 from JannisHoch/setuptool_console_script
Browse files Browse the repository at this point in the history
copro executable
  • Loading branch information
JannisHoch committed Dec 2, 2020
2 parents a31963e + 3ae7fcd commit 8ffe305
Show file tree
Hide file tree
Showing 20 changed files with 198 additions and 150 deletions.
18 changes: 12 additions & 6 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,11 @@ You can then install the model package into this environment.
$ conda env create -f environment.yml
$ conda activate copro
To install CoPro in editable mode in this environment, run this command next:
To install CoPro in editable mode in this environment, run this command next in the CoPro-folder:

.. code-block:: console
$ pip install -e <path/to/copro>/copro
$ pip install -e .
Command-line script
--------------------
Expand All @@ -73,7 +73,7 @@ To run the model from command line, a command line script is provided. The usage

.. code-block:: console
Usage: copro_runner.py [OPTIONS] CFG
Usage: copro_runner [OPTIONS] CFG
Main command line script to execute the model. All settings are read from
cfg-file. One cfg-file is required argument to train, test, and evaluate
Expand All @@ -92,10 +92,16 @@ This help information can be also accessed with

.. code-block:: console
$ python copro_runner.py --help
$ copro_runner --help
All data and settings are retrieved from the settings-file (cfg-file) which needs to be provided as inline argument.

In case issues occur, updating ``setuptools`` may be required.

.. code-block:: console
$ pip3 install --upgrade pip setuptools
Example data
----------------

Expand Down Expand Up @@ -132,8 +138,8 @@ If more projections are ought to be made, multiple cfg-files can be provided wit
.. code-block:: console
$ cd path/to/copro/example
$ python ../scripts/copro_runner.py example_settings.cfg
$ python ../scripts/copro_runner.py example_settings.cfg -proj example_settings_proj.cfg
$ copro_runner example_settings.cfg
$ copro_runner example_settings.cfg -proj example_settings_proj.cfg
Alternatively, the same commands can be executed using a bash-file.

Expand Down
9 changes: 5 additions & 4 deletions copro/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,15 @@ def initiate_X_data(config):

return X

def fill_XY(XY, config, conflict_gdf, polygon_gdf):
def fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf):
"""Fills the XY-dictionary with data for each variable and conflict for each polygon for each simulation year.
The number of rows should therefore equal to number simulation years times number of polygons.
At end of last simulation year, the dictionary is converted to a numpy-array.
Args:
XY (dict): initiated, i.e. empty, XY-dictionary
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
root_dir (str): path to location of cfg-file.
conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
Expand Down Expand Up @@ -102,17 +103,17 @@ def fill_XY(XY, config, conflict_gdf, polygon_gdf):

else:

nc_ds = xr.open_dataset(os.path.join(config.get('general', 'input_dir'), config.get('data', key)))
nc_ds = xr.open_dataset(os.path.join(root_dir, config.get('general', 'input_dir'), config.get('data', key)))

if (np.dtype(nc_ds.time) == np.float32) or (np.dtype(nc_ds.time) == np.float64):
data_series = value
data_list = variables.nc_with_float_timestamp(polygon_gdf, config, key, sim_year)
data_list = variables.nc_with_float_timestamp(polygon_gdf, config, root_dir, key, sim_year)
data_series = data_series.append(pd.Series(data_list), ignore_index=True)
XY[key] = data_series

elif np.dtype(nc_ds.time) == 'datetime64[ns]':
data_series = value
data_list = variables.nc_with_continous_datetime_timestamp(polygon_gdf, config, key, sim_year)
data_list = variables.nc_with_continous_datetime_timestamp(polygon_gdf, config, root_dir, key, sim_year)
data_series = data_series.append(pd.Series(data_list), ignore_index=True)
XY[key] = data_series

Expand Down
15 changes: 8 additions & 7 deletions copro/machine_learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,14 +127,15 @@ def fit_predict(X_train, y_train, X_test, clf, config, pickle_dump=True):

return y_pred, y_prob

def pickle_clf(scaler, clf, config):
def pickle_clf(scaler, clf, config, root_dir):
"""(Re)fits a classifier with all available data and pickles it.
Can then be used to make projections in conjuction with projected values.
Args:
scaler (scaler): the specified scaling method instance.
clf (classifier): the specified model instance.
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
root_dir (str): path to location of cfg-file.
Returns:
classifier: classifier fitted with all available data.
Expand All @@ -143,20 +144,20 @@ def pickle_clf(scaler, clf, config):
print('INFO: fitting the classifier with all data from reference period')

if config.get('pre_calc', 'XY') is '':
if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy'))))
XY_fit = np.load(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy')), allow_pickle=True)
if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.join(root_dir, config.get('general', 'output_dir'), 'XY.npy')))
XY_fit = np.load(os.path.join(root_dir, config.get('general', 'output_dir'), 'XY.npy'), allow_pickle=True)
else:
if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.abspath(config.get('pre_calc', 'XY'))))
XY_fit = np.load(os.path.abspath(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY'))), allow_pickle=True)
if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.join(root_dir, config.get('pre_calc', 'XY'))))
XY_fit = np.load(os.path.join(root_dir, config.get('pre_calc', 'XY')), allow_pickle=True)

X_fit, Y_fit = data.split_XY_data(XY_fit, config)
X_ID_fit, X_geom_fit, X_data_fit = conflict.split_conflict_geom_data(X_fit)
X_ft_fit = scaler.fit_transform(X_data_fit)

clf.fit(X_ft_fit, Y_fit)

print('INFO: dumping classifier to {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'clf.pkl'))))
with open(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'clf.pkl')), 'wb') as f:
print('INFO: dumping classifier to {}'.format(os.path.join(root_dir, config.get('general', 'output_dir'), 'clf.pkl')))
with open(os.path.join(root_dir, config.get('general', 'output_dir'), 'clf.pkl'), 'wb') as f:
pickle.dump(clf, f)

return clf
10 changes: 5 additions & 5 deletions copro/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def dubbelsteen(X, Y, config, scaler, clf, out_dir):

return X_df, y_df, eval_dict

def predictive(X, scaler, config):
def predictive(X, scaler, config, root_dir):
"""Predictive model to use the already fitted classifier to make projections.
As other models, it reads data which are then scaled and used in conjuction with the classifier to project conflict risk.
Expand All @@ -176,12 +176,12 @@ def predictive(X, scaler, config):
##- scaling only the variable values
X_ft = scaler.fit_transform(X_data)

if os.path.isfile(os.path.abspath(config.get('pre_calc', 'clf'))):
with open(os.path.abspath(config.get('pre_calc', 'clf')), 'rb') as f:
print('INFO: loading classifier from {}'.format(os.path.abspath(config.get('pre_calc', 'clf'))))
if os.path.isfile(os.path.join(root_dir, config.get('pre_calc', 'clf'))):
with open(os.path.join(root_dir, config.get('pre_calc', 'clf')), 'rb') as f:
print('INFO: loading classifier from {}'.format(os.path.join(root_dir, config.get('pre_calc', 'clf'))))
clf = pickle.load(f)
else:
raise ValueError('ERROR: no pre-computed classifier specified in cfg-file, currently specified file {} does not exist'.format(os.path.abspath(config.get('pre_calc', 'clf'))))
raise ValueError('ERROR: no pre-computed classifier specified in cfg-file, currently specified file {} does not exist'.format(os.path.join(root_dir, config.get('pre_calc', 'clf'))))

print('INFO: making the projection')
y_pred = clf.predict(X_ft)
Expand Down
48 changes: 33 additions & 15 deletions copro/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,15 +4,17 @@
import os, sys


def create_XY(config, polygon_gdf, conflict_gdf):
def create_XY(config, out_dir, root_dir, polygon_gdf, conflict_gdf):
"""Top-level function to create the X-array and Y-array.
If the XY-data was pre-computed and specified in cfg-file, the data is loaded.
If not, variable values and conflict data are read from file and stored in array. The resulting array is by default saved as npy-format to file.
Args:
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
out_dir (str): path to output folder.
root_dir (str): path to location of cfg-file.
polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
Returns:
array: X-array containing variable values.
Expand All @@ -23,35 +25,50 @@ def create_XY(config, polygon_gdf, conflict_gdf):

XY = data.initiate_XY_data(config)

XY = data.fill_XY(XY, config, conflict_gdf, polygon_gdf)
XY = data.fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf)

print('INFO: saving XY data by default to file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy'))))
np.save(os.path.join(config.get('general', 'output_dir'),'XY'), XY)
print('INFO: saving XY data by default to file {}'.format(os.path.join(out_dir, 'XY.npy')))
np.save(os.path.join(out_dir,'XY'), XY)

else:

print('INFO: loading XY data from file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY')))))
XY = np.load(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY')), allow_pickle=True)
print('INFO: loading XY data from file {}'.format(os.path.join(root_dir, config.get('pre_calc', 'XY'))))
XY = np.load(os.path.join(root_dir, config.get('pre_calc', 'XY')), allow_pickle=True)

X, Y = data.split_XY_data(XY, config)

return X, Y

def create_X(config, polygon_gdf, conflict_gdf=None):
def create_X(config, out_dir, root_dir, polygon_gdf, conflict_gdf=None):
"""Top-level function to create the X-array.
If the X-data was pre-computed and specified in cfg-file, the data is loaded.
If not, variable values are read from file and stored in array.
The resulting array is by default saved as npy-format to file.
Args:
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
out_dir (str): path to output folder.
root_dir (str): path to location of cfg-file.
polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
Returns:
array: X-array containing variable values.
"""

if config.get('pre_calc', 'XY') is '':

X = data.initiate_X_data(config)

X = data.fill_XY(X, config, conflict_gdf, polygon_gdf)
X = data.fill_XY(X, config, root_dir, conflict_gdf, polygon_gdf)

print('INFO: saving X data by default to file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'X.npy'))))
np.save(os.path.join(config.get('general', 'output_dir'),'X'), X)
print('INFO: saving X data by default to file {}'.format(os.path.join(out_dir, 'X.npy')))
np.save(os.path.join(out_dir,'X'), X)

else:

print('INFO: loading XY data from file {}'.format(os.path.abspath(config.get('pre_calc', 'X'))))
X = np.load(os.path.abspath(config.get('pre_calc', 'X')), allow_pickle=True)
print('INFO: loading XY data from file {}'.format(os.path.join(root_dir, config.get('pre_calc', 'X'))))
X = np.load(os.path.join(root_dir, config.get('pre_calc', 'X')), allow_pickle=True)

return X

Expand Down Expand Up @@ -105,13 +122,14 @@ def run_reference(X, Y, config, scaler, clf, out_dir):

return X_df, y_df, eval_dict

def run_prediction(X, scaler, config):
def run_prediction(X, scaler, config, root_dir):
"""Top-level function to run a predictive model with a already fitted classifier and new data.
Args:
X (array): X-array containing variable values.
scaler (scaler): the specified scaler instance.
config (ConfigParser-object): object containing the parsed configuration-settings of the model.
root_dir (str): path to location of cfg-file.
Raises:
ValueError: raised if another model type than the one using all data is specified in cfg-file.
Expand All @@ -123,6 +141,6 @@ def run_prediction(X, scaler, config):
if config.getint('general', 'model') != 1:
raise ValueError('ERROR: making a prediction is only possible with model type 1, i.e. using all data')

y_df = models.predictive(X, scaler, config)
y_df = models.predictive(X, scaler, config, root_dir)

return y_df
Empty file added copro/scripts/__init__.py
Empty file.
25 changes: 9 additions & 16 deletions scripts/copro_runner.py → copro/scripts/copro_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,12 @@
import warnings
warnings.filterwarnings("ignore")

@click.group()
def cli():
pass

@click.command()
@click.argument('cfg', type=click.Path())
@click.option('--projection-settings', '-proj', help='path to cfg-file with settings for a projection run', multiple=True, type=click.Path())
@click.option('--verbose', '-v', help='command line switch to turn on verbose mode', is_flag=True)

def main(cfg, projection_settings=[], verbose=False):
def cli(cfg, projection_settings=[], verbose=False):
"""Main command line script to execute the model.
All settings are read from cfg-file.
One cfg-file is required argument to train, test, and evaluate the model.
Expand All @@ -29,15 +25,15 @@ def main(cfg, projection_settings=[], verbose=False):
"""

#- parsing settings-file and getting path to output folder
config, out_dir = copro.utils.initiate_setup(cfg)
config, out_dir, root_dir = copro.utils.initiate_setup(cfg)

if verbose:
config.set('general', 'verbose', str(verbose))

click.echo(click.style('\nINFO: reference run started\n', fg='cyan'))

#- selecting conflicts and getting area-of-interest and aggregation level
conflict_gdf, extent_gdf, extent_active_polys_gdf, global_df = copro.selection.select(config, out_dir)
conflict_gdf, extent_gdf, extent_active_polys_gdf, global_df = copro.selection.select(config, out_dir, root_dir)
#- plot selected polygons and conflicts
fig, ax = plt.subplots(1, 1)
copro.plots.selected_polygons(extent_active_polys_gdf, figsize=(20, 10), ax=ax)
Expand All @@ -46,7 +42,7 @@ def main(cfg, projection_settings=[], verbose=False):

#- create X and Y arrays by reading conflict and variable files;
#- or by loading a pre-computed array (npy-file)
X, Y = copro.pipeline.create_XY(config, extent_active_polys_gdf, conflict_gdf)
X, Y = copro.pipeline.create_XY(config, out_dir, root_dir, extent_active_polys_gdf, conflict_gdf)

#- defining scaling and model algorithms
scaler, clf = copro.pipeline.prepare_ML(config)
Expand Down Expand Up @@ -104,7 +100,7 @@ def main(cfg, projection_settings=[], verbose=False):
copro.plots.metrics_distribution(out_dict, figsize=(20, 10))
plt.savefig(os.path.join(out_dir, 'metrics_distribution.png'), dpi=300, bbox_inches='tight')

clf = copro.machine_learning.pickle_clf(scaler, clf, config)
clf = copro.machine_learning.pickle_clf(scaler, clf, config, root_dir)
#- plot relative importance of each feature based on ALL data points
fig, ax = plt.subplots(1, 1)
copro.plots.factor_importance(clf, config, out_dir=out_dir, ax=ax, figsize=(20, 10))
Expand All @@ -118,13 +114,10 @@ def main(cfg, projection_settings=[], verbose=False):

click.echo(click.style('\nINFO: projection run started, based on {}'.format(os.path.abspath(proj)), fg='cyan'))

config, out_dir = copro.utils.initiate_setup(proj)

X = copro.pipeline.create_X(config, extent_active_polys_gdf)
config, out_dir, root_dir = copro.utils.initiate_setup(proj)

y_df = copro.pipeline.run_prediction(X, scaler, config)
X = copro.pipeline.create_X(config, out_dir, root_dir, extent_active_polys_gdf)

df_hit, gdf_hit = copro.evaluation.polygon_model_accuracy(y_df, global_df, out_dir=out_dir, make_proj=True)
y_df = copro.pipeline.run_prediction(X, scaler, config, root_dir)

if __name__ == '__main__':
main()
df_hit, gdf_hit = copro.evaluation.polygon_model_accuracy(y_df, global_df, out_dir=out_dir, make_proj=True)

0 comments on commit 8ffe305

Please sign in to comment.