Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

replace clusters_bins with capacity_bins #48

Merged
merged 7 commits into from
Mar 9, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/ReEDS/offshore_wind/offshore_wind_config.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"classify": {
"cluster_on": "trans_cap_cost",
"cap_bins": 3,
"regions": "model_region",
"resource_classes": "/shared-projects/rev/projects/reeds_jan2020/wind/offshore_wind/offshore_wind_resource_classes.csv",
"rev_table": "/shared-projects/rev/projects/reeds_jan2020/wind/reeds_wind_sc.csv",
"sc_bins": 5
"sort_bins_by": "trans_cap_cost"
},
"directories": {
"log_directory": "/shared-projects/rev/projects/reeds_jan2020/wind/offshore_wind/logs/",
Expand Down
4 changes: 2 additions & 2 deletions examples/ReEDS/onshore_wind/onshore_wind_config.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"classify": {
"cluster_on": "trans_cap_cost",
"cap_bins": 3,
"regions": "model_region",
"resource_classes": "/shared-projects/rev/projects/reeds_jan2020/wind/onshore_wind/onshore_wind_resource_classes.csv",
"rev_table": "/shared-projects/rev/projects/reeds_jan2020/wind/reeds_wind_sc.csv",
"sc_bins": 5
"sort_bins_by": "trans_cap_cost"
},
"directories": {
"log_directory": "/shared-projects/rev/projects/reeds_jan2020/wind/onshore_wind/logs/",
Expand Down
4 changes: 2 additions & 2 deletions examples/ReEDS/pv/pv_rural_config.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"classify": {
"cluster_on": "trans_cap_cost",
"cap_bins": 3,
"regions": "model_region",
"resource_classes": "/shared-projects/rev/projects/reeds_jan2020/pv_rural/pv_resource_classes.csv",
"rev_table": "/shared-projects/rev/projects/reeds_jan2020/pv_rural/outputs_sc.csv",
"sc_bins": 5
"sort_bins_by": "trans_cap_cost"
},
"directories": {
"log_directory": "/shared-projects/rev/projects/reeds_jan2020/pv_rural/logs/",
Expand Down
4 changes: 2 additions & 2 deletions examples/ReEDS/pv/pv_urban_config.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
{
"classify": {
"cluster_on": "trans_cap_cost",
"cap_bins": 3,
"regions": "model_region",
"resource_classes": "/shared-projects/rev/projects/reeds_jan2020/pv_urban/pv_resource_classes.csv",
"rev_table": "/shared-projects/rev/projects/reeds_jan2020/pv_urban/outputs_sc.csv",
"sc_bins": 5
"sort_bins_by": "trans_cap_cost"
},
"directories": {
"log_directory": "/shared-projects/rev/projects/reeds_jan2020/pv_urban/logs/",
Expand Down
16 changes: 8 additions & 8 deletions reVX/config/reeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,8 +68,8 @@ def __init__(self, config):
"""
super().__init__(config)
self._default_regions = 'reeds_region'
self._default_sc_bins = 3
self._default_cluster_on = 'trans_cap_cost'
self._default_cap_bins = 3
self._default_sort_bins_by = 'trans_cap_cost'

@property
def rev_table(self):
Expand All @@ -88,15 +88,15 @@ def regions(self):
return self.get('regions', self._default_regions)

@property
def sc_bins(self):
"""Get the number of supply curve bins (clusters) to make per
def cap_bins(self):
"""Get the number of capacity bins to make per
region/resource class combination."""
return self.get('sc_bins', self._default_sc_bins)
return self.get('cap_bins', self._default_cap_bins)

@property
def cluster_on(self):
"""Get default string column label to cluster on."""
return self.get('cluster_on', self._default_cluster_on)
def sort_bins_by(self):
"""Get default string column label to sort on before binning."""
return self.get('sort_bins_by', self._default_sort_bins_by)

@property
def filter(self):
Expand Down
99 changes: 53 additions & 46 deletions reVX/reeds/reeds_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
import pandas as pd
from warnings import warn

from reVX.utilities.cluster_methods import ClusteringMethods
from reVX.utilities.exceptions import ReedsValueError, ReedsKeyError

logger = logging.getLogger(__name__)
Expand All @@ -26,9 +25,8 @@ class ReedsClassifier:
'trans_cap_cost', 'dist_mi')

def __init__(self, rev_table, resource_classes, region_map='reeds_region',
sc_bins=5, cluster_kwargs={'cluster_on': 'trans_cap_cost',
'method': 'kmeans', 'norm': None},
filter=None, trg_by_region=False):
cap_bins=3, sort_bins_by='trans_cap_cost', filter=None,
trg_by_region=False):
"""
Parameters
----------
Expand All @@ -46,11 +44,11 @@ def __init__(self, rev_table, resource_classes, region_map='reeds_region',
NOTE: 'TRG_cap' can only be combined with categorical bins
region_map : str | pandas.DataFrame
Mapping of supply curve points to region to create classes for
sc_bins : int
Number of supply curve bins (clusters) to create for each
cap_bins : int
Number of equal capacity bins to create for each
region-class
cluster_kwargs : dict
kwargs for _cluster_sc_bins and underlying clustering method
sort_bins_by : str | list
Column(s) to sort by before capacity binning
filter : dict | NoneType
Column value pair(s) to filter on. If None don't filter
trg_by_region : bool
Expand All @@ -59,14 +57,16 @@ def __init__(self, rev_table, resource_classes, region_map='reeds_region',
rev_table = self._parse_table(rev_table)
if filter is not None:
for col, v in filter.items():
logger.debug('Subsetting reV table to {} in {}'
.format(v, col))
mask = rev_table[col] == v
rev_table = rev_table.loc[mask]

rev_table = self._map_region(rev_table, region_map)
rev_table = self._resource_classes(rev_table, resource_classes,
trg_by_region=trg_by_region)
self._rev_table = self._cluster_sc_bins(rev_table, sc_bins,
**cluster_kwargs)
self._rev_table = self._capacity_bins(rev_table, cap_bins,
sort_bins_by=sort_bins_by)
self._groups = self._rev_table.groupby(['region', 'class', 'bin'])
self._i = 0

Expand Down Expand Up @@ -377,16 +377,16 @@ def _TRG_bins(rev_table, trg_bins, by_region=False):
trg_classes['class'] = 1
for _, df in trg_classes.groupby('region'):
df = df.sort_values('mean_lcoe')
cum_sum = df['capacity'].cumsum()
df.loc[:, 'class'] = pd.cut(x=cum_sum, bins=cap_breaks,
cum_cap = df['capacity'].cumsum()
df.loc[:, 'class'] = pd.cut(x=cum_cap, bins=cap_breaks,
labels=labels)
classes.append(df)

trg_classes = pd.concat(classes)
else:
trg_classes = trg_classes.sort_values('mean_lcoe')
cum_sum = trg_classes['capacity'].cumsum()
trg_classes.loc[:, 'class'] = pd.cut(x=cum_sum, bins=cap_breaks,
cum_cap = trg_classes['capacity'].cumsum()
trg_classes.loc[:, 'class'] = pd.cut(x=cum_cap, bins=cap_breaks,
labels=labels)

rev_table = rev_table.merge(trg_classes[['sc_gid', 'class']],
Expand Down Expand Up @@ -546,53 +546,59 @@ def _resource_classes(rev_table, resource_classes, trg_by_region=False):
return rev_table

@staticmethod
def _cluster_sc_bins(rev_table, sc_bins, cluster_on='trans_cap_cost',
method='kmeans', norm=None, **kwargs):
def _capacity_bins(rev_table, cap_bins, sort_bins_by='trans_cap_cost'):
"""
Create classes in each region-class group using given clustering method
Create equal capacity bins in each region-class sorted by given
column(s)

Parameters
----------
rev_table : pandas.DataFrame
reV supply curve or aggregation table
sc_bins : int
Number of supply curve bins (clusters) to create for each
cap_bins : int
Number of equal capacity bins to create for each
region-class
cluster_on : str | list
Columns in rev_table to cluster on
method : str
Clustering method to use for creating classes
norm : str
Normalization method to use (see sklearn.preprocessing.normalize)
if None range normalize
kwargs : dict
kwargs for clustering method
sort_bins_by : str | list, optional
Column(s) to sort by before capacity binning,
by default 'mean_lcoe'

Returns
-------
rev_table : pandas.DataFrame
Updated table with classes
"""
c_func = getattr(ClusteringMethods, method)

if isinstance(cluster_on, str):
cluster_on = [cluster_on, ]
if not isinstance(sort_bins_by, list):
sort_bins_by = [sort_bins_by]

cols = ['sc_gid', 'capacity', 'region', 'class'] + sort_bins_by
capacity_bins = rev_table[cols].copy()

bins = []
capacity_bins['bin'] = 1
labels = list(range(1, cap_bins + 1))
for _, df in capacity_bins.groupby(['region', 'class']):
df = df.sort_values(sort_bins_by)
cum_cap = df['capacity'].cumsum()
bin_labels = pd.cut(x=cum_cap, bins=cap_bins, labels=labels)
unique_l = np.unique(bin_labels)
if len(unique_l) < (cap_bins / 2):
msg = ("Only {} bins where filled: {}"
.format(len(unique_l), unique_l))
warn(msg)
logger.warning(msg)

func = ClusteringMethods._normalize_values
data = func(rev_table[cluster_on].values, norm=norm)
labels = c_func(data, n_clusters=sc_bins,
**kwargs)
if np.min(labels) == 0:
labels = np.array(labels) + 1
df.loc[:, 'bin'] = bin_labels
bins.append(df)

rev_table['bin'] = labels
capacity_bins = pd.concat(bins)
rev_table = rev_table.merge(capacity_bins[['sc_gid', 'bin']],
on='sc_gid', how='left')

return rev_table

@classmethod
def create(cls, rev_table, resource_classes, region_map='reeds_region',
sc_bins=5, cluster_kwargs={'cluster_on': 'trans_cap_cost',
'method': 'kmeans', 'norm': None},
cap_bins=3, sort_bins_by='trans_cap_cost',
filter=None, trg_by_region=False):
"""
Identify ReEDS regions and classes and dump and updated table
Expand All @@ -613,11 +619,12 @@ def create(cls, rev_table, resource_classes, region_map='reeds_region',
NOTE: 'TRG_cap' can only be combined with categorical bins
region_map : str | pandas.DataFrame
Mapping of supply curve points to region to create classes for
sc_bins : int
Number of supply curve bins (clusters) to create for each
cap_bins : int
Number of equal capacity bins to create for each
region-class
cluster_kwargs : dict
kwargs for _cluster_classes
sort_bins_by : str | list, optional
Column(s) to sort by before capacity binning,
by default 'mean_lcoe'
filter : dict | NoneType
Column value pair(s) to filter on. If None don't filter
trg_by_region : bool
Expand All @@ -638,7 +645,7 @@ def create(cls, rev_table, resource_classes, region_map='reeds_region',
AGG_TABLE_OUT_COLS.
"""
classes = cls(rev_table, resource_classes, region_map=region_map,
sc_bins=sc_bins, cluster_kwargs=cluster_kwargs,
cap_bins=cap_bins, sort_bins_by=sort_bins_by,
filter=filter, trg_by_region=trg_by_region)
out = (classes.table, classes.table_slim, classes.aggregate_table,
classes.aggregate_table_slim)
Expand Down
27 changes: 13 additions & 14 deletions reVX/reeds/reeds_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,9 +55,9 @@ def run_local(ctx, config):
rev_table=config.classify.rev_table,
resource_classes=config.classify.resource_classes,
regions=config.classify.regions,
sc_bins=config.classify.sc_bins,
cluster_on=config.classify.cluster_on,
filter=config. classify.filter)
cap_bins=config.classify.cap_bins,
sort_bins_by=config.classify.sort_bins_by,
filter=config.classify.filter)

if config.profiles is not None:
ctx.invoke(profiles,
Expand Down Expand Up @@ -158,15 +158,15 @@ def local(ctx, out_dir, log_dir, verbose):
"bins"))
@click.option('--regions', '-r', type=str, default='reeds_region',
help='Mapping of supply curve points to geographic region')
@click.option('--sc_bins', '-scb', type=int, default=3,
help=('Number of bins (clusters) to create for each '
@click.option('--cap_bins', '-cb', type=int, default=3,
help=('Number of capacity bins to create for each '
'region/resource bin combination'))
@click.option('--cluster_on', '-cl', type=str, default='trans_cap_cost',
help='Column(s) in rev_table to cluster on')
@click.option('--sort_bins_by', '-sb', type=str, default='trans_cap_cost',
help='Column(s) in rev_table to sort before binning')
@click.option('--filter', '-f', type=STR, default=None,
help='Column value pair(s) to filter on. If None do not filter')
@click.pass_context
def classify(ctx, rev_table, resource_classes, regions, sc_bins, cluster_on,
def classify(ctx, rev_table, resource_classes, regions, cap_bins, sort_bins_by,
filter):
"""
Extract ReEDS (region, bin, class) groups
Expand All @@ -176,13 +176,12 @@ def classify(ctx, rev_table, resource_classes, regions, sc_bins, cluster_on,

logger.info('Extracting ReEDS (region, bin, class) groups using '
'reV sc table {}'.format(rev_table))
kwargs = {'cluster_on': cluster_on, 'method': 'kmeans'}
if isinstance(filter, str):
filter = dict_str_load(filter)

out = ReedsClassifier.create(rev_table, resource_classes,
region_map=regions, sc_bins=sc_bins,
cluster_kwargs=kwargs,
region_map=regions, cap_bins=cap_bins,
sort_bins_by=sort_bins_by,
filter=filter)
table_full, table, agg_table_full, agg_table = out

Expand Down Expand Up @@ -355,12 +354,12 @@ def get_node_cmd(config):

if config.classify is not None:
args += ('classify -rt {rev_table} -rc {resource_classes} '
'-r {regions} -scb {sc_bins} -cl {cluster_on} -f {filter} '
'-r {regions} -scb {cap_bins} -cl {sort_bins_by} -f {filter} '
.format(rev_table=s(config.classify.rev_table),
resource_classes=s(config.classify.resource_classes),
regions=s(config.classify.regions),
sc_bins=s(config.classify.sc_bins),
cluster_on=s(config.classify.cluster_on),
cap_bins=s(config.classify.cap_bins),
sort_bins_by=s(config.classify.sort_bins_by),
filter=s(config.classify.filter)))

if config.profiles is not None:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ NREL-PySAM==1.2.1
geopandas>=0.4
pyproj>=1.9
pywavelets>=1.0
rasterio>=1.0
rasterio>=1.1
scikit-learn==0.21.3
xarray>=0.12
dask>=2.8
Expand Down
Loading