diff --git a/powersimdata/design/investment/create_mapping_files.py b/powersimdata/design/investment/create_mapping_files.py index 08d8b2167..9c0857ea4 100644 --- a/powersimdata/design/investment/create_mapping_files.py +++ b/powersimdata/design/investment/create_mapping_files.py @@ -8,25 +8,23 @@ def sjoin_nearest(left_df, right_df, search_dist=0.06): - """ - Perform a spatial join between two input layers. - If a geometry in left_df falls outside (all) geometries in right_df, the data from - nearest Polygon will be used as a result. - To make queries faster, change "search_dist." + """Perform a spatial join between two input layers. + :param geopandas.GeoDataFrame left_df: A dataframe of Points. - :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons - :param float/int search_dist: parameter (specified in map units) is used to limit - the search area for geometries around source points. Smaller -> faster runtime. - :return: (*geopandas.GeoDataFrame*) -- A dataframe of Points mapped to each polygon - in right_df. + :param geopandas.GeoDataFrame right_df: A dataframe of Polygons/Multipolygons. + :param float/int search_dist: radius (in map units) around point to detect polygons. + :return: (*geopandas.GeoDataFrame*) -- data frame of Points mapped to each Polygon. + + .. note:: data from nearest Polygon/Multipolygon will be used as a result if a + Point falls outside all available Polygon/Multipolygons. """ def _find_nearest(series, polygons, search_dist): - """Given a row with a bus id and a Point, find the closest polygon. + """Find the closest polygon. :param pandas.Series series: point to map. :param geopandas.geodataframe.GeoDataFrame polygons: polygons to select from. - :param float search_dist: radius around point to detect polygons in. + :param float search_dist: radius around point to detect polygons. """ geom = series[left_df.geometry.name] # Get geometries within search distance @@ -83,17 +81,16 @@ def _find_nearest(series, polygons, search_dist): def points_to_polys(df, name, shpfile, search_dist=0.04): - """Given a dataframe which includes 'lat' and 'lon' columns, and a shapefile of - Polygons/Multipolygon regions, map df.index to closest regions. - - :param pandas.DataFrame df: includes an index, and 'lat' and 'lon' columns. - :param str name: what to name the id (bus, plant, substation, etc) - :param str shpfile: name of shapefile containing a collection Polygon/Multipolygon - shapes with region IDs. - :param float/int search_dist: distance to search from point for nearest polygon. - :raises ValueError: if some points are dropped because too far away from polys. - :return: (*geopandas.GeoDataFrame*) -- - columns: index id, (point) geometry, [region, other properties of region] + """Map node to closest region. + + :param pandas.DataFrame df: data frame with node id as index and *'lat'* and + *'lon'* as columns. + :param str name: name of node, e.g., bus, plant, substation, etc. + :param str shpfile: shapefile enclosing Polygon/Multipolygon with region id. + :param float/int search_dist: radius around point to detect polygons. + :raises ValueError: if some points are dropped because too far away from polygons. + :return: (*geopandas.GeoDataFrame*) -- columns: id name, (point) geometry, + region and properties of region. """ gpd = _check_import("geopandas") polys = gpd.read_file(shpfile) @@ -123,7 +120,7 @@ def points_to_polys(df, name, shpfile, search_dist=0.04): err_msg = ( "Some points dropped because could not be mapped to regions. " "Check your lat/lon values to be sure it's in the US. " - f"Or increase search_dist if close. Problem ids: {dropped}" + f"Or increase search_dist. ids dropped: {dropped}" ) raise ValueError(err_msg) @@ -131,14 +128,11 @@ def points_to_polys(df, name, shpfile, search_dist=0.04): def bus_to_reeds_reg(df): - """Given a dataframe of buses, return a dataframe of bus_id's with associated - ReEDS regions (wind resource regions (rs) and BA regions (rb)). - Used to map regional generation investment cost multipliers. - region_map.csv is from: "/bokehpivot/in/reeds2/region_map.csv". - rs/rs.shp is created with :py:func:`write_poly_shapefile`. - - :param pandas.DataFrame df: grid bus dataframe. - :return: (*pandas.DataFrame*) -- bus_id map. columns: bus_id, rs, rb + """Map bus to ReEDS regions. + + :param pandas.DataFrame df: bus data frame. + :return: (*pandas.DataFrame*) -- index: bus id, columns rs (wind resource region) + and rb (BA region). """ pts_poly = points_to_polys( df, "bus", const.reeds_wind_shapefile_path, search_dist=2 @@ -156,18 +150,15 @@ def bus_to_reeds_reg(df): def bus_to_neem_reg(df): - """Given a dataframe of buses, return a dataframe of bus_id's with associated - NEEM region, lat, and lon of bus. - Used to map regional transmission investment cost multipliers. - Shapefile used to map is 'data/NEEM/NEEMregions.shp' which is pulled from Energy - Zones `Mapping tool `_. This map is overly detailed, so I - simplified the shapes using 1 km distance (Douglas-Peucker) method in QGIS. - - :param pandas.DataFrame df: grid.bus instance. - :return: (*pandas.DataFrame*) -- bus_id map. - columns: bus_id, lat, lon, name_abbr (NEEM region) - - Note: mapping may take a while, especially for many points. + """Map bus to NEEM regions. + + :param pandas.DataFrame df: bus data frame. + :return: (*pandas.DataFrame*) -- index: bus id, columns: lat, lon, name_abbr + (NEEM region) + + .. note:: the shapefile used for mapping is pulled from the Energy Zones `Mapping + tool `_. This map is overly detailed, so the shapes are + simplified using 1 km distance (Douglas-Peucker) method in QGIS. """ pts_poly = points_to_polys(df, "bus", const.neem_shapefile_path, search_dist=1) @@ -184,11 +175,7 @@ def bus_to_neem_reg(df): def write_bus_neem_map(): - """ - Maps the bus locations from the base USA grid to NEEM regions. - Writes out csv with bus numbers, associated NEEM region, and lat/lon of bus - (to check if consistent with bus location in _calculate_ac_inv_costs). - """ + """Write bus location to NEEM region mapping to file""" base_grid = Grid(["USA"]) df_pts_bus = bus_to_neem_reg(base_grid.bus) df_pts_bus.sort_index(inplace=True) @@ -197,10 +184,7 @@ def write_bus_neem_map(): def write_bus_reeds_map(): - """ - Maps the bus locations from the base USA grid to ReEDS regions. - Writes out csv with bus numbers, associated ReEDS regions, and distances. - """ + """Write bus location to ReEDS region mapping to file.""" base_grid = Grid(["USA"]) df_pts_bus = bus_to_reeds_reg(base_grid.bus) df_pts_bus.sort_index(inplace=True) @@ -209,17 +193,10 @@ def write_bus_reeds_map(): def write_poly_shapefile(): - """ - Converts a ReEDS csv-format file to a shapefile. Shouldn't need to run again - unless new source data. - Right now, hard-coded read ReEDS wind resource regions (labelled rs). - gis_rs.csv is from ReEDS open-source: "/bokehpivot/in/gis_rs.csv" - hierarchy.csv is from: "/bokehpivot/in/reeds2/hierarchy.csv" - writes out the shapefile in "rs/rs.shp" - - Note: These ReEDS wind resource region shapes are approximate. Thus, there are - probably some mistakes, but this is currently only used for mapping plant - regional multipliers, which are approximate anyway, so it should be fine. + """Convert ReEDS wind resource csv-format file to a shapefile. + + .. note:: *gis_rs.csv* is from ReEDS open-source: */bokehpivot/in/gis_rs.csv*, + *hierarchy.csv* is from: */bokehpivot/in/reeds2/hierarchy.csv*. """ fiona = _check_import("fiona") shapely_geometry = _check_import("shapely.geometry") diff --git a/powersimdata/design/investment/investment_costs.py b/powersimdata/design/investment/investment_costs.py index e1cab962b..04a8044d0 100644 --- a/powersimdata/design/investment/investment_costs.py +++ b/powersimdata/design/investment/investment_costs.py @@ -14,16 +14,12 @@ def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None): - """Given a Scenario object, calculate the total cost of building that scenario's - upgrades of lines and transformers. - Currently uses NEEM regions to find regional multipliers. - Currently ignores financials, but all values are in 2010 $-year. - Need to test that there aren't any na values in regional multipliers - (some empty parts of table) + """Calculate cost of upgrading AC lines and/or transformers in a scenario. + NEEM regions are used to find regional multipliers. :param powersimdata.scenario.scenario.Scenario scenario: scenario instance. - :param boolean sum_results: if True, sum dataframe for each category. - :return: (*dict*) -- Total costs (line costs, transformer costs) (in $2010). + :param bool sum_results: sum data frame for each branch type. + :return: (*dict*) -- cost of upgrading branches in $2010. """ base_grid = Grid(scenario.info["interconnect"].split("_")) @@ -44,25 +40,24 @@ def calculate_ac_inv_costs(scenario, sum_results=True, exclude_branches=None): def _calculate_ac_inv_costs(grid_new, sum_results=True): - """Given a grid, calculate the total cost of building that grid's - lines and transformers. - This function is separate from calculate_ac_inv_costs() for testing purposes. - Currently counts Transformer and TransformerWinding as transformers. - Currently uses NEEM regions to find regional multipliers. + """Calculate cost of upgrading AC lines and/or transformers. NEEM regions are + used to find regional multipliers. Note that a transformer winding is considered + as a transformer. :param powersimdata.input.grid.Grid grid_new: grid instance. - :param boolean sum_results: if True, sum dataframe for each category. - :return: (*dict*) -- Total costs (line costs, transformer costs). + :param bool sum_results: sum data frame for each branch type. + :return: (*dict*) -- cost of upgrading branches in $2010. """ def select_mw(x, cost_df): - """Given a single branch, determine the closest kV/MW combination and return - the corresponding cost $/MW-mi. - - :param pandas.core.series.Series x: data for a single branch - :param pandas.core.frame.DataFrame cost_df: DataFrame with kV, MW, cost columns - :return: (*pandas.core.series.Series*) -- series of ['MW', 'costMWmi'] to be - assigned to given branch + """Determine the closest kV/MW combination for a single branch and return + the corresponding cost (in $/MW-mi). + + :param pandas.Series x: data for a single branch + :param pandas.DataFrame cost_df: data frame with *'kV'*, *'MW'*, *'costMWmi'* + as columns + :return: (*pandas.Series*) -- series of [*'MW'*, *'costMWmi'*] to be assigned + to branch. """ # select corresponding cost table of selected kV @@ -75,9 +70,9 @@ def select_mw(x, cost_df): def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()): """Determine the regional multiplier based on kV and power (closest). - :param pandas.core.series.Series x: data for a single transformer. - :param pandas.core.frame.DataFrame bus_reg: data frame with bus regions - :param pandas.core.frame.DataFrame ac_reg_mult: data frame with regional mults. + :param pandas.Series x: data for a single transformer. + :param pandas.DataFrame bus_reg: data frame with bus regions. + :param pandas.DataFrame ac_reg_mult: data frame with regional multipliers. :param set xfmr_lookup_alerted: set of (voltage, region) tuples for which a message has already been printed that this lookup was not found. :return: (*float*) -- regional multiplier. @@ -140,8 +135,8 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()): lines[["MW", "costMWmi"]] = lines.apply(lambda x: select_mw(x, ac_cost), axis=1) # check that all buses included in this file and lat/long values match, - # otherwise re-run mapping script on mis-matching buses. - # these buses are missing in region file + # otherwise re-run mapping script on mis-matching buses. These buses are missing + # in region file bus_fix_index = bus[~bus.index.isin(bus_reg.index)].index bus_mask = bus[~bus.index.isin(bus_fix_index)] bus_mask = bus_mask.merge(bus_reg, how="left", on="bus_id") @@ -213,12 +208,11 @@ def get_transformer_mult(x, bus_reg, ac_reg_mult, xfmr_lookup_alerted=set()): def calculate_dc_inv_costs(scenario, sum_results=True): - """Given a Scenario object, calculate the total cost of that grid's dc line - investment. Currently ignores financials, but all values are in 2015 $-year. + """Calculate cost of upgrading HVDC lines in a scenario. :param powersimdata.scenario.scenario.Scenario scenario: scenario instance. - :param boolean sum_results: if True, sum Series to return float. - :return: (*pandas.Series/float*) -- [Summed] dc line costs. + :param bool sum_results: sum series to return total cost. + :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015. """ base_grid = Grid(scenario.info["interconnect"].split("_")) grid = scenario.state.get_grid() @@ -235,22 +229,20 @@ def calculate_dc_inv_costs(scenario, sum_results=True): def _calculate_dc_inv_costs(grid_new, sum_results=True): - """Given a grid, calculate the total cost of that grid's dc line investment. - This function is separate from calculate_dc_inv_costs() for testing purposes. + """Calculate cost of upgrading HVDC lines. :param powersimdata.input.grid.Grid grid_new: grid instance. - :param boolean sum_results: if True, sum Series to return float. - :return: (*pandas.Series/float*) -- [Summed] dc line costs. + :param bool sum_results: sum series to return total cost. + :return: (*pandas.Series/float*) -- cost of upgrading HVDC lines in $2015. """ def _calculate_single_line_cost(line, bus): - """Given a series representing a DC line upgrade/addition, and a dataframe of - bus locations, calculate this line's upgrade cost. + """Calculate cost of upgrading a single HVDC line. - :param pandas.Series line: DC line series featuring: - {"from_bus_id", "to_bus_id", "Pmax"}. - :param pandas.Dataframe bus: Bus data frame featuring {"lat", "lon"}. - :return: (*float*) -- DC line upgrade cost (in $2015). + :param pandas.Series line: HVDC line series featuring *'from_bus_id'*', + *'to_bus_id'* and *'Pmax'*. + :param pandas.Dataframe bus: bus data frame featuring *'lat'*, *'lon'*. + :return: (*float*) -- HVDC line upgrade cost in $2015. """ # Calculate distance from_lat = bus.loc[line.from_bus_id, "lat"] @@ -280,20 +272,19 @@ def _calculate_single_line_cost(line, bus): def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True): - """Given a Scenario object, calculate the total cost of building that scenario's - upgrades of generation. - Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs. - Will want to fix for wind/solar (based on resource supply curves). - Currently uses ReEDS regions to find regional multipliers. + """Calculate cost of upgrading generators in a scenario. ReEDS regions are used to + find regional multipliers. :param powersimdata.scenario.scenario.Scenario scenario: scenario instance. - :param int/str year: year of builds. - :param str cost_case: the ATB cost case of data: - 'Moderate': mid cost case, - 'Conservative': generally higher costs, - 'Advanced': generally lower costs - :return: (*pandas.DataFrame*) -- Total generation investment cost summed by + :param int/str year: building year. + :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case, + *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs + :return: (*pandas.DataFrame*) -- total generation investment cost summed by technology. + + .. todo:: it currently uses one (arbitrary) sub-technology. The rest of the costs + are dropped. Wind and solar will need to be fixed based on the resource supply + curves. """ base_grid = Grid(scenario.info["interconnect"].split("_")) @@ -322,38 +313,33 @@ def calculate_gen_inv_costs(scenario, year, cost_case, sum_results=True): def _calculate_gen_inv_costs(grid_new, year, cost_case, sum_results=True): - """Given a grid, calculate the total cost of building that generation investment. - Computes total capital cost as CAPEX_total = - CAPEX ($/MW) * Pmax (MW) * reg_cap_cost_mult (regional cost multiplier) - This function is separate from calculate_gen_inv_costs() for testing purposes. - Currently only uses one (arbutrary) sub-technology. Drops the rest of the costs. - Will want to fix for wind/solar (based on resource supply curves). - Currently uses ReEDS regions to find regional multipliers. + """Calculate cost of upgrading generators. ReEDS regions are used to find + regional multipliers. :param powersimdata.input.grid.Grid grid_new: grid instance. - :param int/str year: year of builds (used in financials). - :param str cost_case: the ATB cost case of data: - 'Moderate': mid cost case - 'Conservative': generally higher costs - 'Advanced': generally lower costs + :param int/str year: year of builds. + :param str cost_case: ATB cost case of data. *'Moderate'*: mid cost case + *'Conservative'*: generally higher costs, *'Advanced'*: generally lower costs. :raises ValueError: if year not 2020 - 2050, or cost case not an allowed option. - :raises TypeError: if year gets the wrong type, or if cost_case is not str. - :return: (*pandas.Series*) -- Total generation investment cost, - summed by technology. + :raises TypeError: if year not int/str or cost_case not str. + :return: (*pandas.Series*) -- total generation investment cost, summed by + technology. + + .. note:: the function computes the total capital cost as: + CAPEX_total = CAPEX ($/MW) * Pmax (MW) * regional multiplier """ def load_cost(year, cost_case): - """ - Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX). - Can be adapted in the future for FOM, VOM, & CAPEX. - This data is pulled from the ATB xlsx file Summary pages (saved as csv's). - Therefore, currently uses default financials, but will want to create custom - financial functions in the future. + """Load in base costs from NREL's 2020 ATB for generation technologies (CAPEX). :param int/str year: year of cost projections. - :param str cost_case: the ATB cost case of data - (see :py:func:`write_poly_shapefile` for details). - :return: (*pandas.DataFrame*) -- Cost by technology/subtype (in $2018). + :param str cost_case: ATB cost case of data (see + :return: (*pandas.DataFrame*) -- cost by technology/subtype in $2018. + + .. todo:: it can be adapted in the future for FOM, VOM, & CAPEX. This data is + pulled from the ATB xlsx file summary pages. Therefore, it currently uses + default financials, but will want to create custom financial functions in + the future. """ cost = pd.read_csv(const.gen_inv_cost_path) cost = cost.dropna(axis=0, how="all") diff --git a/powersimdata/scenario/analyze.py b/powersimdata/scenario/analyze.py index aa88c21e7..950896bf8 100644 --- a/powersimdata/scenario/analyze.py +++ b/powersimdata/scenario/analyze.py @@ -88,10 +88,10 @@ def print_scenario_info(self): print("%s: %s" % (key, val)) def _parse_infeasibilities(self): - """Parses infeasibilities. When the optimizer cannot find a solution in - a time interval, the remedy is to decrease demand by some amount - until a solution is found. The purpose of this function is to get - the interval number(s) and the associated decrease(s). + """Parses infeasibilities. When the optimizer cannot find a solution in a time + interval, the remedy is to decrease demand by some amount until a solution is + found. The purpose of this function is to get the interval number(s) and the + associated decrease(s). :return: (*dict*) -- keys are the interval number and the values are the decrease in percent (%) applied to the original demand profile. diff --git a/powersimdata/scenario/create.py b/powersimdata/scenario/create.py index 924aa267f..19d8573fb 100644 --- a/powersimdata/scenario/create.py +++ b/powersimdata/scenario/create.py @@ -175,7 +175,7 @@ def print_scenario_info(self): print("%s: %s" % (key, val)) def set_builder(self, *args, **kwargs): - """Alias to set_grid.""" + """Alias to :func:`~powersimdata.scenario.create.Create.set_grid`""" warnings.warn( "set_builder is deprecated, use set_grid instead", DeprecationWarning ) @@ -327,7 +327,7 @@ def set_name(self, plan_name, scenario_name): def set_time(self, start_date, end_date, interval): """Sets scenario start and end dates as well as the interval that will - be used to split the date range. + be used to split the date range. :param str start_date: start date. :param str end_date: start date.