Merge pull request #106 from JannisHoch/setuptool_console_script

copro executable
JannisHoch · Dec 2, 2020 · 8ffe305 · 8ffe305
2 parents a31963e + 3ae7fcd
commit 8ffe305
Show file tree

Hide file tree

Showing 20 changed files with 198 additions and 150 deletions.
diff --git a/README.rst b/README.rst
@@ -54,11 +54,11 @@ You can then install the model package into this environment.
     $ conda env create -f environment.yml
     $ conda activate copro
 
-To install CoPro in editable mode in this environment, run this command next:
+To install CoPro in editable mode in this environment, run this command next in the CoPro-folder:
 
 .. code-block:: console
 
-    $ pip install -e <path/to/copro>/copro
+    $ pip install -e .
 
 Command-line script
 --------------------
@@ -73,7 +73,7 @@ To run the model from command line, a command line script is provided. The usage
 
 .. code-block:: console
 
-    Usage: copro_runner.py [OPTIONS] CFG
+    Usage: copro_runner [OPTIONS] CFG
 
     Main command line script to execute the model.  All settings are read from
     cfg-file. One cfg-file is required argument to train, test, and evaluate
@@ -92,10 +92,16 @@ This help information can be also accessed with
 
 .. code-block:: console
 
-    $ python copro_runner.py --help
+    $ copro_runner --help
 
 All data and settings are retrieved from the settings-file (cfg-file) which needs to be provided as inline argument.
 
+In case issues occur, updating ``setuptools`` may be required.
+
+.. code-block:: console
+
+    $ pip3 install --upgrade pip setuptools
+
 Example data
 ----------------
 
@@ -132,8 +138,8 @@ If more projections are ought to be made, multiple cfg-files can be provided wit
 .. code-block:: console
 
     $ cd path/to/copro/example
-    $ python ../scripts/copro_runner.py example_settings.cfg
-    $ python ../scripts/copro_runner.py example_settings.cfg -proj example_settings_proj.cfg
+    $ copro_runner example_settings.cfg
+    $ copro_runner example_settings.cfg -proj example_settings_proj.cfg
 
 Alternatively, the same commands can be executed using a bash-file.
 

diff --git a/copro/data.py b/copro/data.py
@@ -50,14 +50,15 @@ def initiate_X_data(config):
 
     return X
 
-def fill_XY(XY, config, conflict_gdf, polygon_gdf):
+def fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf):
     """Fills the XY-dictionary with data for each variable and conflict for each polygon for each simulation year. 
     The number of rows should therefore equal to number simulation years times number of polygons.
     At end of last simulation year, the dictionary is converted to a numpy-array.
 
     Args:
         XY (dict): initiated, i.e. empty, XY-dictionary
         config (ConfigParser-object): object containing the parsed configuration-settings of the model.
+        root_dir (str): path to location of cfg-file.
         conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
         polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
 
@@ -102,17 +103,17 @@ def fill_XY(XY, config, conflict_gdf, polygon_gdf):
 
             else:
 
-                nc_ds = xr.open_dataset(os.path.join(config.get('general', 'input_dir'), config.get('data', key)))
+                nc_ds = xr.open_dataset(os.path.join(root_dir, config.get('general', 'input_dir'), config.get('data', key)))
 
                 if (np.dtype(nc_ds.time) == np.float32) or (np.dtype(nc_ds.time) == np.float64):
                     data_series = value
-                    data_list = variables.nc_with_float_timestamp(polygon_gdf, config, key, sim_year)
+                    data_list = variables.nc_with_float_timestamp(polygon_gdf, config, root_dir, key, sim_year)
                     data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                     XY[key] = data_series
 
                 elif np.dtype(nc_ds.time) == 'datetime64[ns]':
                     data_series = value
-                    data_list = variables.nc_with_continous_datetime_timestamp(polygon_gdf, config, key, sim_year)
+                    data_list = variables.nc_with_continous_datetime_timestamp(polygon_gdf, config, root_dir, key, sim_year)
                     data_series = data_series.append(pd.Series(data_list), ignore_index=True)
                     XY[key] = data_series
 

diff --git a/copro/machine_learning.py b/copro/machine_learning.py
@@ -127,14 +127,15 @@ def fit_predict(X_train, y_train, X_test, clf, config, pickle_dump=True):
 
     return y_pred, y_prob
 
-def pickle_clf(scaler, clf, config):
+def pickle_clf(scaler, clf, config, root_dir):
     """(Re)fits a classifier with all available data and pickles it.
     Can then be used to make projections in conjuction with projected values.
 
     Args:
         scaler (scaler): the specified scaling method instance.
         clf (classifier): the specified model instance.
         config (ConfigParser-object): object containing the parsed configuration-settings of the model.
+        root_dir (str): path to location of cfg-file.
 
     Returns:
         classifier: classifier fitted with all available data.
@@ -143,20 +144,20 @@ def pickle_clf(scaler, clf, config):
     print('INFO: fitting the classifier with all data from reference period')
 
     if config.get('pre_calc', 'XY') is '':
-        if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy'))))
-        XY_fit = np.load(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy')), allow_pickle=True)
+        if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.join(root_dir, config.get('general', 'output_dir'), 'XY.npy')))
+        XY_fit = np.load(os.path.join(root_dir, config.get('general', 'output_dir'), 'XY.npy'), allow_pickle=True)
     else:
-        if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.abspath(config.get('pre_calc', 'XY'))))
-        XY_fit = np.load(os.path.abspath(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY'))), allow_pickle=True)
+        if config.getboolean('general', 'verbose'): print('DEBUG: loading XY data from {}'.format(os.path.join(root_dir, config.get('pre_calc', 'XY'))))
+        XY_fit = np.load(os.path.join(root_dir, config.get('pre_calc', 'XY')), allow_pickle=True)
 
     X_fit, Y_fit = data.split_XY_data(XY_fit, config)
     X_ID_fit, X_geom_fit, X_data_fit = conflict.split_conflict_geom_data(X_fit)
     X_ft_fit = scaler.fit_transform(X_data_fit)
 
     clf.fit(X_ft_fit, Y_fit)
 
-    print('INFO: dumping classifier to {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'clf.pkl'))))
-    with open(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'clf.pkl')), 'wb') as f:
+    print('INFO: dumping classifier to {}'.format(os.path.join(root_dir, config.get('general', 'output_dir'), 'clf.pkl')))
+    with open(os.path.join(root_dir, config.get('general', 'output_dir'), 'clf.pkl'), 'wb') as f:
         pickle.dump(clf, f)
 
     return clf
diff --git a/copro/models.py b/copro/models.py
@@ -151,7 +151,7 @@ def dubbelsteen(X, Y, config, scaler, clf, out_dir):
 
     return X_df, y_df, eval_dict
 
-def predictive(X, scaler, config):
+def predictive(X, scaler, config, root_dir):
     """Predictive model to use the already fitted classifier to make projections.
     As other models, it reads data which are then scaled and used in conjuction with the classifier to project conflict risk.
 
@@ -176,12 +176,12 @@ def predictive(X, scaler, config):
     ##- scaling only the variable values
     X_ft = scaler.fit_transform(X_data)
 
-    if os.path.isfile(os.path.abspath(config.get('pre_calc', 'clf'))):
-        with open(os.path.abspath(config.get('pre_calc', 'clf')), 'rb') as f:
-            print('INFO: loading classifier from {}'.format(os.path.abspath(config.get('pre_calc', 'clf'))))
+    if os.path.isfile(os.path.join(root_dir, config.get('pre_calc', 'clf'))):
+        with open(os.path.join(root_dir, config.get('pre_calc', 'clf')), 'rb') as f:
+            print('INFO: loading classifier from {}'.format(os.path.join(root_dir, config.get('pre_calc', 'clf'))))
             clf = pickle.load(f)
     else:
-        raise ValueError('ERROR: no pre-computed classifier specified in cfg-file, currently specified file {} does not exist'.format(os.path.abspath(config.get('pre_calc', 'clf'))))
+        raise ValueError('ERROR: no pre-computed classifier specified in cfg-file, currently specified file {} does not exist'.format(os.path.join(root_dir, config.get('pre_calc', 'clf'))))
 
     print('INFO: making the projection')
     y_pred = clf.predict(X_ft)

diff --git a/copro/pipeline.py b/copro/pipeline.py
@@ -4,15 +4,17 @@
 import os, sys
 
 
-def create_XY(config, polygon_gdf, conflict_gdf):
+def create_XY(config, out_dir, root_dir, polygon_gdf, conflict_gdf):
     """Top-level function to create the X-array and Y-array.
     If the XY-data was pre-computed and specified in cfg-file, the data is loaded.
     If not, variable values and conflict data are read from file and stored in array. The resulting array is by default saved as npy-format to file.
 
     Args:
         config (ConfigParser-object): object containing the parsed configuration-settings of the model.
-        conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
+        out_dir (str): path to output folder.
+        root_dir (str): path to location of cfg-file.
         polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
+        conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
 
     Returns:
         array: X-array containing variable values.
@@ -23,35 +25,50 @@ def create_XY(config, polygon_gdf, conflict_gdf):
 
         XY = data.initiate_XY_data(config)
 
-        XY = data.fill_XY(XY, config, conflict_gdf, polygon_gdf)
+        XY = data.fill_XY(XY, config, root_dir, conflict_gdf, polygon_gdf)
 
-        print('INFO: saving XY data by default to file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'XY.npy'))))
-        np.save(os.path.join(config.get('general', 'output_dir'),'XY'), XY)
+        print('INFO: saving XY data by default to file {}'.format(os.path.join(out_dir, 'XY.npy')))
+        np.save(os.path.join(out_dir,'XY'), XY)
 
     else:
 
-        print('INFO: loading XY data from file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY')))))
-        XY = np.load(os.path.join(config.get('general', 'output_dir'), config.get('pre_calc', 'XY')), allow_pickle=True)
+        print('INFO: loading XY data from file {}'.format(os.path.join(root_dir, config.get('pre_calc', 'XY'))))
+        XY = np.load(os.path.join(root_dir, config.get('pre_calc', 'XY')), allow_pickle=True)
 
     X, Y = data.split_XY_data(XY, config)    
 
     return X, Y
 
-def create_X(config, polygon_gdf, conflict_gdf=None):
+def create_X(config, out_dir, root_dir, polygon_gdf, conflict_gdf=None):
+    """Top-level function to create the X-array.
+    If the X-data was pre-computed and specified in cfg-file, the data is loaded.
+    If not, variable values are read from file and stored in array. 
+    The resulting array is by default saved as npy-format to file.
+
+    Args:
+        config (ConfigParser-object): object containing the parsed configuration-settings of the model.
+        out_dir (str): path to output folder.
+        root_dir (str): path to location of cfg-file.
+        polygon_gdf (geo-dataframe): geo-dataframe containing the selected polygons.
+        conflict_gdf (geo-dataframe): geo-dataframe containing the selected conflicts.
+
+    Returns:
+        array: X-array containing variable values.
+    """    
 
     if config.get('pre_calc', 'XY') is '':
 
         X = data.initiate_X_data(config)
 
-        X = data.fill_XY(X, config, conflict_gdf, polygon_gdf)
+        X = data.fill_XY(X, config, root_dir, conflict_gdf, polygon_gdf)
 
-        print('INFO: saving X data by default to file {}'.format(os.path.abspath(os.path.join(config.get('general', 'output_dir'), 'X.npy'))))
-        np.save(os.path.join(config.get('general', 'output_dir'),'X'), X)
+        print('INFO: saving X data by default to file {}'.format(os.path.join(out_dir, 'X.npy')))
+        np.save(os.path.join(out_dir,'X'), X)
 
     else:
 
-        print('INFO: loading XY data from file {}'.format(os.path.abspath(config.get('pre_calc', 'X'))))
-        X = np.load(os.path.abspath(config.get('pre_calc', 'X')), allow_pickle=True)
+        print('INFO: loading XY data from file {}'.format(os.path.join(root_dir, config.get('pre_calc', 'X'))))
+        X = np.load(os.path.join(root_dir, config.get('pre_calc', 'X')), allow_pickle=True)
 
     return X
 
@@ -105,13 +122,14 @@ def run_reference(X, Y, config, scaler, clf, out_dir):
 
     return X_df, y_df, eval_dict
 
-def run_prediction(X, scaler, config):
+def run_prediction(X, scaler, config, root_dir):
     """Top-level function to run a predictive model with a already fitted classifier and new data.
 
     Args:
         X (array): X-array containing variable values.
         scaler (scaler): the specified scaler instance.
         config (ConfigParser-object): object containing the parsed configuration-settings of the model.
+        root_dir (str): path to location of cfg-file.
 
     Raises:
         ValueError: raised if another model type than the one using all data is specified in cfg-file.
@@ -123,6 +141,6 @@ def run_prediction(X, scaler, config):
     if config.getint('general', 'model') != 1:
         raise ValueError('ERROR: making a prediction is only possible with model type 1, i.e. using all data')
 
-    y_df = models.predictive(X, scaler, config)
+    y_df = models.predictive(X, scaler, config, root_dir)
 
     return y_df
diff --git a/copro/scripts/__init__.py b/copro/scripts/__init__.py
diff --git a/scripts/copro_runner.py → copro/scripts/copro_runner.py b/scripts/copro_runner.py → copro/scripts/copro_runner.py
@@ -9,16 +9,12 @@
 import warnings
 warnings.filterwarnings("ignore")
 
-@click.group()
-def cli():
-    pass
-
 @click.command()
 @click.argument('cfg', type=click.Path())
 @click.option('--projection-settings', '-proj', help='path to cfg-file with settings for a projection run', multiple=True, type=click.Path())
 @click.option('--verbose', '-v', help='command line switch to turn on verbose mode', is_flag=True)
 
-def main(cfg, projection_settings=[], verbose=False):   
+def cli(cfg, projection_settings=[], verbose=False):   
     """Main command line script to execute the model. 
     All settings are read from cfg-file.
     One cfg-file is required argument to train, test, and evaluate the model.
@@ -29,15 +25,15 @@ def main(cfg, projection_settings=[], verbose=False):
     """ 
 
     #- parsing settings-file and getting path to output folder
-    config, out_dir = copro.utils.initiate_setup(cfg)
+    config, out_dir, root_dir = copro.utils.initiate_setup(cfg)
 
     if verbose:
         config.set('general', 'verbose', str(verbose))
 
     click.echo(click.style('\nINFO: reference run started\n', fg='cyan'))
 
     #- selecting conflicts and getting area-of-interest and aggregation level
-    conflict_gdf, extent_gdf, extent_active_polys_gdf, global_df = copro.selection.select(config, out_dir)
+    conflict_gdf, extent_gdf, extent_active_polys_gdf, global_df = copro.selection.select(config, out_dir, root_dir)
     #- plot selected polygons and conflicts
     fig, ax = plt.subplots(1, 1)
     copro.plots.selected_polygons(extent_active_polys_gdf, figsize=(20, 10), ax=ax)
@@ -46,7 +42,7 @@ def main(cfg, projection_settings=[], verbose=False):
 
     #- create X and Y arrays by reading conflict and variable files;
     #- or by loading a pre-computed array (npy-file)
-    X, Y = copro.pipeline.create_XY(config, extent_active_polys_gdf, conflict_gdf)
+    X, Y = copro.pipeline.create_XY(config, out_dir, root_dir, extent_active_polys_gdf, conflict_gdf)
 
     #- defining scaling and model algorithms
     scaler, clf = copro.pipeline.prepare_ML(config)
@@ -104,7 +100,7 @@ def main(cfg, projection_settings=[], verbose=False):
     copro.plots.metrics_distribution(out_dict, figsize=(20, 10))
     plt.savefig(os.path.join(out_dir, 'metrics_distribution.png'), dpi=300, bbox_inches='tight')
 
-    clf = copro.machine_learning.pickle_clf(scaler, clf, config)
+    clf = copro.machine_learning.pickle_clf(scaler, clf, config, root_dir)
     #- plot relative importance of each feature based on ALL data points
     fig, ax = plt.subplots(1, 1)
     copro.plots.factor_importance(clf, config, out_dir=out_dir, ax=ax, figsize=(20, 10))
@@ -118,13 +114,10 @@ def main(cfg, projection_settings=[], verbose=False):
 
             click.echo(click.style('\nINFO: projection run started, based on {}'.format(os.path.abspath(proj)), fg='cyan'))
 
-            config, out_dir = copro.utils.initiate_setup(proj)
-
-            X = copro.pipeline.create_X(config, extent_active_polys_gdf)
+            config, out_dir, root_dir = copro.utils.initiate_setup(proj)
 
-            y_df = copro.pipeline.run_prediction(X, scaler, config)
+            X = copro.pipeline.create_X(config, out_dir, root_dir, extent_active_polys_gdf)
 
-            df_hit, gdf_hit = copro.evaluation.polygon_model_accuracy(y_df, global_df, out_dir=out_dir, make_proj=True)
+            y_df = copro.pipeline.run_prediction(X, scaler, config, root_dir)
 
-if __name__ == '__main__':
-    main()
+            df_hit, gdf_hit = copro.evaluation.polygon_model_accuracy(y_df, global_df, out_dir=out_dir, make_proj=True)