From a88c4d76ea894b5b12467a0e4fad2fbd23290b86 Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Thu, 25 Mar 2021 19:10:53 -0700
Subject: [PATCH 1/3] refactor: make cost curve visualization variable names
 more descriptive

---
 powersimdata/design/generation/cost_curves.py | 277 ++++++++++--------
 .../generation/tests/test_cost_curves.py      |  38 +--
 2 files changed, 174 insertions(+), 141 deletions(-)

diff --git a/powersimdata/design/generation/cost_curves.py b/powersimdata/design/generation/cost_curves.py
index 69207dcef..f1511f269 100644
--- a/powersimdata/design/generation/cost_curves.py
+++ b/powersimdata/design/generation/cost_curves.py
@@ -33,9 +33,9 @@ def linearize_gencost(input_grid, num_segments=1):
         raise ValueError("gencost currently limited to quadratic")
 
     # Access the quadratic cost curve information
-    old_a = gencost_before.c2
-    old_b = gencost_before.c1
-    old_c = gencost_before.c0
+    quad_term = gencost_before.c2
+    lin_term = gencost_before.c1
+    const_term = gencost_before.c0
 
     # Convert dispatchable generators to piecewise segments
     dispatchable_gens = plant.Pmin != plant.Pmax
@@ -51,12 +51,18 @@ def linearize_gencost(input_grid, num_segments=1):
         gencost_after.loc[dispatchable_gens, "n"] = num_segments + 1
         power_step = (plant.Pmax - plant.Pmin) / num_segments
         for i in range(num_segments + 1):
-            x_label = "p" + str(i + 1)
-            y_label = "f" + str(i + 1)
-            x_data = plant.Pmin + power_step * i
-            y_data = old_a * x_data ** 2 + old_b * x_data + old_c
-            gencost_after.loc[dispatchable_gens, x_label] = x_data[dispatchable_gens]
-            gencost_after.loc[dispatchable_gens, y_label] = y_data[dispatchable_gens]
+            capacity_label = "p" + str(i + 1)
+            price_label = "f" + str(i + 1)
+            capacity_data = plant.Pmin + power_step * i
+            price_data = (
+                quad_term * capacity_data ** 2 + lin_term * capacity_data + const_term
+            )
+            gencost_after.loc[dispatchable_gens, capacity_label] = capacity_data[
+                dispatchable_gens
+            ]
+            gencost_after.loc[dispatchable_gens, price_label] = price_data[
+                dispatchable_gens
+            ]
     else:
         grid.gencost["after"] = gencost_before.copy()
 
@@ -70,9 +76,9 @@ def linearize_gencost(input_grid, num_segments=1):
             nondispatchable_gens, "n"
         ]
         power = plant.Pmax
-        y_data = old_a * power ** 2 + old_b * power + old_c
+        price_data = quad_term * power ** 2 + lin_term * power + const_term
         gencost_after.loc[nondispatchable_gens, ["c2", "c1"]] = 0
-        gencost_after.loc[nondispatchable_gens, "c0"] = y_data[nondispatchable_gens]
+        gencost_after.loc[nondispatchable_gens, "c0"] = price_data[nondispatchable_gens]
 
     gencost_after["interconnect"] = gencost_before["interconnect"]
 
@@ -139,12 +145,13 @@ def get_supply_data(grid, num_segments=1, save=None):
     return supply_df
 
 
-def check_supply_data(data, num_segments=1):
+def check_supply_data(supply_data, num_segments=1):
     """Checks to make sure that the input supply data is a DataFrame and has the
     correct columns. This is especially needed for checking instances where the input
     supply data is not the DataFrame returned from get_supply_data().
 
-    :param pandas.DataFrame data: DataFrame containing the supply curve information.
+    :param pandas.DataFrame supply_data: DataFrame containing the supply curve
+        information.
     :param int num_segments: The number of segments into which the piecewise linear
         cost curve will be split.
     :raises TypeError: if the input supply data is not a pandas.DataFrame.
@@ -153,8 +160,8 @@ def check_supply_data(data, num_segments=1):
     """
 
     # Check that the data is input as a DataFrame
-    if not isinstance(data, pd.DataFrame):
-        raise TypeError("Supply data must be input as a DataFrame.")
+    if not isinstance(supply_data, pd.DataFrame):
+        raise TypeError("supply_data must be input as a DataFrame.")
 
     # Mandatory columns to be contained in the DataFrame
     mand_cols = {
@@ -174,7 +181,7 @@ def check_supply_data(data, num_segments=1):
             mand_cols.update(["p_diff" + str(i), "slope" + str(i)])
 
     # Make sure all of the mandatory columns are contained in the input DataFrame
-    miss_cols = mand_cols - set(data.columns)
+    miss_cols = mand_cols - set(supply_data.columns)
     if len(miss_cols) > 0:
         raise ValueError(f'Missing columns: {", ".join(miss_cols)}')
 
@@ -211,60 +218,62 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
         )
 
     # Obtain the desired generator cost and plant information data
-    data = get_supply_data(grid, num_segments)
+    supply_data = get_supply_data(grid, num_segments)
 
     # Check the input supply data
-    check_supply_data(data, num_segments)
+    check_supply_data(supply_data, num_segments)
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    if data["slope1"].isnull().values.any():
-        data.dropna(subset=["slope1"], inplace=True)
+    if supply_data["slope1"].isnull().values.any():
+        supply_data.dropna(subset=["slope1"], inplace=True)
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return [], []
 
     # Combine the p_diff and slope information for each cost segment
-    df_cols = []
+    supply_df_cols = []
     for i in range(num_segments):
-        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
-        df_cols[i].rename(
+        supply_df_cols.append(
+            supply_data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]
+        )
+        supply_df_cols[i].rename(
             columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
             inplace=True,
         )
-    df = pd.concat(df_cols, axis=0)
+    supply_df = pd.concat(supply_df_cols, axis=0)
 
     # Sort the trimmed DataFrame by slope
-    df = df.sort_values(by="slope")
-    df = df.reset_index(drop=True)
+    supply_df = supply_df.sort_values(by="slope")
+    supply_df = supply_df.reset_index(drop=True)
 
     # Determine the points that comprise the supply curve
-    P = []  # noqa: N806
-    F = []  # noqa: N806
-    p_diff_sum = 0
-    for i in df.index:
-        P.append(p_diff_sum)
-        F.append(df["slope"][i])
-        P.append(df["p_diff"][i] + p_diff_sum)
-        F.append(df["slope"][i])
-        p_diff_sum += df["p_diff"][i]
+    capacity_data = []  # noqa: N806
+    price_data = []  # noqa: N806
+    capacity_diff_sum = 0
+    for i in supply_df.index:
+        capacity_data.append(capacity_diff_sum)
+        price_data.append(supply_df["slope"][i])
+        capacity_data.append(supply_df["p_diff"][i] + capacity_diff_sum)
+        price_data.append(supply_df["slope"][i])
+        capacity_diff_sum += supply_df["p_diff"][i]
 
     # Plot the curve
     if plot:
         plt = _check_import("matplotlib.pyplot")
         plt.figure(figsize=[20, 10])
-        plt.plot(P, F)
+        plt.plot(capacity_data, price_data)
         plt.title(f"Supply curve for {gen_type} generators in {area}", fontsize=20)
         plt.xlabel("Capacity (MW)", fontsize=20)
         plt.ylabel("Price ($/MW)", fontsize=20)
@@ -273,41 +282,49 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
         plt.show()
 
     # Return the capacity and bid amounts
-    return P, F
+    return capacity_data, price_data
 
 
-def lower_bound_index(x, l):
+def lower_bound_index(desired_capacity, capacity_data):
     """Determines the index of the lower capacity value that defines a price segment.
     Useful for accessing the prices associated with capacity values that aren't
     explicitly stated in the capacity lists that are generated by the
     build_supply_curve() function. Needed for ks_test().
 
-    :param float/int x: Capacity value for which you want to determine the index of the
-        lowest capacity value in a price segment.
-    :param list l: List of capacity values used to generate a supply curve.
+    :param float/int desired_capacity: Capacity value for which you want to determine
+        the index of the lowest capacity value in a price segment.
+    :param list capacity_data: List of capacity values used to generate a supply curve.
     :return: (*int*) -- Index of a price segment's capacity lower bound.
     """
 
     # Check that the list is not empty and that the capacity falls within the list range
-    if not l or l[0] > x:
+    if not capacity_data or capacity_data[0] > desired_capacity:
         return None
 
-    # Get the index of the value that is immediately less than the provided capacity
-    for i, j in enumerate(l):
-        if j > x:
+    # Get the index of the capacity that is immediately less than the desired capacity
+    for i, j in enumerate(capacity_data):
+        if j > desired_capacity:
             return i - 1
 
 
-def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
+def ks_test(
+    capacity_data1,
+    price_data1,
+    capacity_data2,
+    price_data2,
+    area=None,
+    gen_type=None,
+    plot=True,
+):  # noqa: N803
     """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
     two supply curves as inputs and returns the greatest difference in price between
     the two supply curves. This function requires that the supply curves offer the same
     amount of capacity.
 
-    :param list P1: List of capacity values for the first supply curve.
-    :param list F1: List of price values for the first supply curve.
-    :param list P2: List of capacity values for the second supply curve.
-    :param list F2: List of price values for the second supply curve.
+    :param list capacity_data1: List of capacity values for the first supply curve.
+    :param list price_data1: List of price values for the first supply curve.
+    :param list capacity_data2: List of capacity values for the second supply curve.
+    :param list price_data2: List of price values for the second supply curve.
     :param str area: Either the load zone, state name, state abbreviation, or
         interconnect. Defaults to None because it's not essential.
     :param str gen_type: Generation type. Defaults to None because it's not essential.
@@ -319,49 +336,55 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
     """
 
     # Check that input capacities and prices are provided as lists
-    if not all(isinstance(i, list) for i in [P1, F1, P2, F2]):
-        raise TypeError("P1, F1, P2, and F2 must be input as lists.")
+    if not all(
+        isinstance(i, list)
+        for i in [capacity_data1, price_data1, capacity_data2, price_data2]
+    ):
+        raise TypeError("Supply curve data must be input as lists.")
 
     # Check that the supply curves offer the same amount of capacity
-    if max(P1) != max(P2):
+    if max(capacity_data1) != max(capacity_data2):
         raise ValueError(
             "The two supply curves do not offer the same amount of capacity (MW)."
         )
 
     # Create a list that has every capacity value in which either supply curve steps up
-    P_all = list(set(P1) | set(P2))  # noqa: N806
-    P_all.sort()
+    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))  # noqa: N806
+    capacity_data_all.sort()
 
     # For each capacity value, associate the two corresponding price values
-    F_all = []  # noqa: N806
-    for i in range(len(P_all)):
+    price_data_all = []  # noqa: N806
+    for i in range(len(capacity_data_all)):
         # Determine the correpsonding price from the first supply curve
-        if P_all[i] == P1[-1]:
-            f1 = F1[-1]
+        if capacity_data_all[i] == capacity_data1[-1]:
+            f1 = price_data1[-1]
         else:
-            f1 = F1[lower_bound_index(P_all[i], P1)]
+            f1 = price_data1[lower_bound_index(capacity_data_all[i], capacity_data1)]
 
         # Determine the correpsonding price from the second supply curve
-        if P_all[i] == P2[-1]:
-            f2 = F2[-1]
+        if capacity_data_all[i] == capacity_data2[-1]:
+            f2 = price_data2[-1]
         else:
-            f2 = F2[lower_bound_index(P_all[i], P2)]
+            f2 = price_data2[lower_bound_index(capacity_data_all[i], capacity_data2)]
 
         # Pair the two price values
-        F_all.append([f1, f2])
+        price_data_all.append([f1, f2])
 
     # Determine the price differences for each capacity value
-    F_diff = [abs(F_all[i][0] - F_all[i][1]) for i in range(len(F_all))]  # noqa: N806
+    price_data_diff = [
+        abs(price_data_all[i][0] - price_data_all[i][1])
+        for i in range(len(price_data_all))
+    ]  # noqa: N806
 
     # Determine the maximum price difference
-    max_diff = max(F_diff)
+    max_diff = max(price_data_diff)
 
     # Plot the two supply curves overlaid
     if plot:
         plt = _check_import("matplotlib.pyplot")
         plt.figure(figsize=[20, 10])
-        plt.plot(P1, F1)
-        plt.plot(P2, F2)
+        plt.plot(capacity_data1, price_data1)
+        plt.plot(capacity_data2, price_data2)
         if None in {area, gen_type}:
             plt.title("Supply Curve Comparison", fontsize=20)
         else:
@@ -379,7 +402,7 @@ def ks_test(P1, F1, P2, F2, area=None, gen_type=None, plot=True):  # noqa: N803
     return max_diff
 
 
-def plot_c1_vs_c2(
+def plot_linear_vs_quadratic_terms(
     grid,
     area,
     gen_type,
@@ -389,7 +412,8 @@ def plot_c1_vs_c2(
     num_sd=3,
     alpha=0.1,
 ):
-    """Compares the c1 and c2 parameters from the quadratic generator cost curves.
+    """Compares the linear (c1) and quadratic (c2) parameters from the quadratic
+    generator cost curves.
 
     :param powersimdata.input.grid.Grid grid: Grid object.
     :param str area: Either the load zone, state name, state abbreviation, or
@@ -398,15 +422,16 @@ def plot_c1_vs_c2(
     :param str area_type: one of: *'loadzone'*, *'state'*, *'state_abbr'*,
         *'interconnect'*. Defaults to None, which allows
         :func:`powersimdata.network.model.area_to_loadzone` to infer the type.
-    :param bool plot: If True, the c1 vs. c2 plot is shown. If False, the plot is not
-        shown.
-    :param bool zoom: If True, filters out c2 outliers to enable better visualization.
-        If False, there is no filtering.
-    :param float/int num_sd: The number of standard deviations used to filter out c2
-        outliers.
+    :param bool plot: If True, the linear term vs. quadratic term plot is shown. If
+        False, the plot is not shown.
+    :param bool zoom: If True, filters out quadratic term outliers to enable better
+        visualization. If False, there is no filtering.
+    :param float/int num_sd: The number of standard deviations used to filter out
+        quadratic term outliers.
     :param float alpha: The alpha blending value for the scatter plot; takes values
         between 0 (transparent) and 1 (opaque).
-    :return: (*None*) -- The c1 vs. c2 plot is displayed according to the user.
+    :return: (*None*) -- The linear term vs. quadratic term plot is displayed according
+        to the user.
     :raises TypeError: if a powersimdata.input.grid.Grid object is not input.
     :raises ValueError: if the specified area or generator type is not applicable.
     """
@@ -425,7 +450,7 @@ def plot_c1_vs_c2(
     plant_df = grid.plant
 
     # Create a new DataFrame with the desired columns
-    data = pd.concat(
+    supply_data = pd.concat(
         [
             plant_df[["type", "interconnect", "zone_name", "Pmin", "Pmax"]],
             gencost_df[
@@ -438,37 +463,37 @@ def plot_c1_vs_c2(
     )
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    data = data[data["Pmin"] != data["Pmax"]]
+    supply_data = supply_data[supply_data["Pmin"] != supply_data["Pmax"]]
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return
 
     # Filters out large c2 outlier values so the overall trend can be better visualized
     zoom_name = ""
     if zoom:
         # Drop values outside a specified number of standard deviations of c2
-        sd_c2 = np.std(data["c2"])
-        mean_c2 = np.mean(data["c2"])
-        cutoff = mean_c2 + num_sd * sd_c2
-        if len(data[data["c2"] > cutoff]) > 0:
+        quad_term_sd = np.std(supply_data["c2"])
+        quad_term_mean = np.mean(supply_data["c2"])
+        cutoff = quad_term_mean + num_sd * quad_term_sd
+        if len(supply_data[supply_data["c2"] > cutoff]) > 0:
             zoom = True
-            data = data[data["c2"] <= cutoff]
-            max_ylim = np.max(data["c2"] + 0.01)
-            min_ylim = np.min(data["c2"] - 0.01)
-            max_xlim = np.max(data["c1"] + 1)
-            min_xlim = np.min(data["c1"] - 1)
+            supply_data = supply_data[supply_data["c2"] <= cutoff]
+            max_ylim = np.max(supply_data["c2"] + 0.01)
+            min_ylim = np.min(supply_data["c2"] - 0.01)
+            max_xlim = np.max(supply_data["c1"] + 1)
+            min_xlim = np.min(supply_data["c1"] - 1)
             zoom_name = "(zoomed)"
         else:
             zoom = False
@@ -478,22 +503,24 @@ def plot_c1_vs_c2(
         fig, ax = plt.subplots()
         fig.set_size_inches(20, 10)
         plt.scatter(
-            data["c1"],
-            data["c2"],
-            s=np.sqrt(data["Pmax"]) * 10,
+            supply_data["c1"],
+            supply_data["c2"],
+            s=np.sqrt(supply_data["Pmax"]) * 10,
             alpha=alpha,
-            c=data["Pmax"],
+            c=supply_data["Pmax"],
             cmap="plasma",
         )
         plt.grid()
         plt.title(
-            f"c1 vs. c2 for {gen_type} generators in {area} {zoom_name}", fontsize=20
+            f"Linear term vs. Quadratic term for {gen_type} generator cost curves in "
+            + f"{area} {zoom_name}",
+            fontsize=20,
         )
         if zoom:
             plt.ylim([min_ylim, max_ylim])
             plt.xlim([min_xlim, max_xlim])
-        plt.xlabel("c1", fontsize=20)
-        plt.ylabel("c2", fontsize=20)
+        plt.xlabel("Linear Term", fontsize=20)
+        plt.ylabel("Quadratic Term", fontsize=20)
         plt.xticks(fontsize=20)
         plt.yticks(fontsize=20)
         cbar = plt.colorbar()
@@ -537,51 +564,55 @@ def plot_capacity_vs_price(
         )
 
     # Obtain the desired generator cost and plant information data
-    data = get_supply_data(grid, num_segments)
+    supply_data = get_supply_data(grid, num_segments)
 
     # Check the input supply data
-    check_supply_data(data, num_segments)
+    check_supply_data(supply_data, num_segments)
 
     # Check to make sure the generator type is valid
-    if gen_type not in data["type"].unique():
+    if gen_type not in supply_data["type"].unique():
         raise ValueError(f"{gen_type} is not a valid generation type.")
 
     # Identify the load zones that correspond to the specified area and area_type
     returned_zones = area_to_loadzone(grid.get_grid_model(), area, area_type)
 
     # Trim the DataFrame to only be of the desired area and generation type
-    data = data.loc[data.zone_name.isin(returned_zones)]
-    data = data.loc[data["type"] == gen_type]
+    supply_data = supply_data.loc[supply_data.zone_name.isin(returned_zones)]
+    supply_data = supply_data.loc[supply_data["type"] == gen_type]
 
     # Remove generators that have no capacity (e.g., Maine coal generators)
-    if data["slope1"].isnull().values.any():
-        data.dropna(subset=["slope1"], inplace=True)
+    if supply_data["slope1"].isnull().values.any():
+        supply_data.dropna(subset=["slope1"], inplace=True)
 
     # Check if the area contains generators of the specified type
-    if data.empty:
+    if supply_data.empty:
         return
 
     # Combine the p_diff and slope information for each cost segment
-    df_cols = []
+    supply_df_cols = []
     for i in range(num_segments):
-        df_cols.append(data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))])
-        df_cols[i].rename(
+        supply_df_cols.append(
+            supply_data.loc[:, ("p_diff" + str(i + 1), "slope" + str(i + 1))]
+        )
+        supply_df_cols[i].rename(
             columns={"p_diff" + str(i + 1): "p_diff", "slope" + str(i + 1): "slope"},
             inplace=True,
         )
-    df = pd.concat(df_cols, axis=0)
-    df = df.reset_index(drop=True)
+    supply_df = pd.concat(supply_df_cols, axis=0)
+    supply_df = supply_df.reset_index(drop=True)
 
-    # Determine the average
-    total_cap = df["p_diff"].sum()
-    if total_cap == 0:
-        data_avg = 0
+    # Determine the average price
+    total_capacity = supply_df["p_diff"].sum()
+    if total_capacity == 0:
+        average_price = 0
     else:
-        data_avg = (df["slope"] * df["p_diff"]).sum() / total_cap
+        average_price = (
+            supply_df["slope"] * supply_df["p_diff"]
+        ).sum() / total_capacity
 
     # Plot the comparison
     if plot:
-        ax = df.plot.scatter(
+        ax = supply_df.plot.scatter(
             x="p_diff", y="slope", s=50, figsize=[20, 10], grid=True, fontsize=20
         )
         plt.title(
@@ -589,5 +620,5 @@ def plot_capacity_vs_price(
         )
         plt.xlabel("Segment Capacity (MW)", fontsize=20)
         plt.ylabel("Segment Price ($/MW)", fontsize=20)
-        ax.plot(df["p_diff"], [data_avg] * len(df.index), c="red")
+        ax.plot(supply_df["p_diff"], [average_price] * len(supply_df.index), c="red")
         plt.show()
diff --git a/powersimdata/design/generation/tests/test_cost_curves.py b/powersimdata/design/generation/tests/test_cost_curves.py
index ceea40c59..38dfec72f 100644
--- a/powersimdata/design/generation/tests/test_cost_curves.py
+++ b/powersimdata/design/generation/tests/test_cost_curves.py
@@ -176,11 +176,11 @@ def test_get_supply_data():
 
 
 def test_build_supply_curve_1seg():
-    Ptest, Ftest = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(  # noqa: N806
         grid, 1, "Colorado", "ng", "loadzone", plot=False
     )
-    Pexp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
-    Fexp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
+    price_exp = [  # noqa: N806
         25.10,
         25.10,
         30.40,
@@ -192,16 +192,16 @@ def test_build_supply_curve_1seg():
         40.00,
         40.00,
     ]
-    assert all([Ptest[i] == Pexp[i] for i in range(len(Ptest))])
-    assert all([Ftest[i] == Fexp[i] for i in range(len(Ptest))])
+    assert all([capacity_test[i] == capacity_exp[i] for i in range(len(capacity_test))])
+    assert all([price_test[i] == price_exp[i] for i in range(len(capacity_test))])
 
 
 def test_build_supply_curve_2seg():
-    Ptest, Ftest = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(  # noqa: N806
         grid, 2, "Utah", "coal", "loadzone", plot=False
     )
-    Pexp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
-    Fexp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
+    price_exp = [  # noqa: N806
         30.100,
         30.100,
         30.300,
@@ -215,15 +215,15 @@ def test_build_supply_curve_2seg():
         42.500,
         42.500,
     ]
-    assert all([Ptest[i] == Pexp[i] for i in range(len(Ptest))])
-    assert all([Ftest[i] == Fexp[i] for i in range(len(Ptest))])
+    assert all([capacity_test[i] == capacity_exp[i] for i in range(len(capacity_test))])
+    assert all([price_test[i] == price_exp[i] for i in range(len(capacity_test))])
 
 
 def test_ks_test():
-    P1, F1 = build_supply_curve(  # noqa: N806
+    capacity_data1, price_data1 = build_supply_curve(  # noqa: N806
         grid, 1, "Washington", "coal", "loadzone", plot=False
     )
-    P2 = [  # noqa: N806
+    capacity_data2 = [  # noqa: N806
         0,
         15,
         15,
@@ -237,9 +237,9 @@ def test_ks_test():
         190,
         225,
         225,
-        max(P1),
+        max(capacity_data1),
     ]
-    F2 = [  # noqa: N806
+    price_data2 = [  # noqa: N806
         23.00,
         23.00,
         27.00,
@@ -255,14 +255,16 @@ def test_ks_test():
         38.00,
         38.00,
     ]
-    test_diff = ks_test(P1, F1, P2, F2, plot=False)
+    test_diff = ks_test(
+        capacity_data1, price_data1, capacity_data2, price_data2, plot=False
+    )
     exp_diff = 4.5
     assert test_diff == exp_diff
 
 
 def test_lower_bound_index():
-    x = 10
-    l = [0, 5, 5, 9, 9, 12, 12, 18]
-    ind_test = lower_bound_index(x, l)
+    desired_capacity = 10
+    capacity_data = [0, 5, 5, 9, 9, 12, 12, 18]
+    ind_test = lower_bound_index(desired_capacity, capacity_data)
     ind_exp = 4
     assert ind_test == ind_exp

From 79dd8c14e461f65757e3ba12fbaba8c85ae0d10a Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Thu, 25 Mar 2021 20:02:39 -0700
Subject: [PATCH 2/3] docs: update README to reflect cost curve visualization
 refactor

---
 README.md | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/README.md b/README.md
index afb608e34..ec7bf9abc 100644
--- a/README.md
+++ b/README.md
@@ -418,51 +418,51 @@ where `scenario` is a `Scenario` instance.
 #### I. Accessing and Saving Relevant Supply Information
 Analyzing generator supply and cost curves requires the proper generator cost and plant information to be accessed from a Grid object. This data can be accessed using the following:
 ```python
-supply_df = powersimdata.design.generation.cost_curves.get_supply_data(grid, num_segments)
-```
-where `grid` is a `Grid` object and `num_segments` is the number of linearized cost curve segments into which the provided quadratic cost curve should be split.
+from powersimdata.design.generation.cost_curves import get_supply_data
 
-The above returns a data frame that contains information about each generator's fuel type, quadratic cost curve, and linearized cost curve, as well as the interconnect and load zone to which the generator belongs. The above function can store the data frame as a CSV file if `save` is passed a valid file path and file name string in `get_supply_data`; by default, `save=None`. The `get_supply_data` function is used within many of the following supply and cost curve visualization and analysis functions.
+supply_df = get_supply_data(grid, num_segments, save)
+```
+where `grid` is a `Grid` object, `num_segments` is the number of linearized cost curve segments into which the provided quadratic cost curve should be split, and `save` is a string representing the desired file path and file name to which the resulting data will be saved. `save` defaults to `None`. `get_supply_data` returns a DataFrame that contains information about each generator's fuel type, quadratic cost curve, and linearized cost curve, as well as the interconnect and load zone to which the generator belongs. `get_supply_data` is used within many of the following supply and cost curve visualization and analysis functions.
 
 
 #### II. Visualizing Generator Supply Curves
 To obtain the supply curve for a particular fuel type and area, the following is used:
 ```python
-P, F = powersimdata.design.generation.cost_curves.build_supply_curve(grid, num_segments, area, type)
-```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import build_supply_curve
 
-By default, the above function plots the created supply curve (plotting can be suppressed by including `plot=False` in `build_supply_curve`). `P` and `F`, the supply curve capacity and bid quantities, respectively, are also returned. This function also allows for the area type (e.g., load zone, state, and interconnect are different area types) to be specified. By default, the area type is inferred, though there are instances where specifying the area type can be useful (e.g., Texas can refer to both a state and an interconnect, though they are not the same thing). To specify the area type, `area_type` must be passed a valid area type string in `build_supply_curve`.
+P, F = build_supply_curve(grid, num_segments, area, gen_type, area_type, plot)
+```
+where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred; there are instances where specifying the area type can be useful (e.g., Texas can refer to both a state and an interconnect, though they are not the same thing). `plot` defaults to `True`. `build_supply_curve` returns `P` and `F`, the supply curve capacity and price quantities, respectively.
 
 
 #### III. Comparing Supply Curves
 When updating generator cost curve information, it can be useful to see the corresponding effect on the supply curve for a particular area and fuel type pair. Instead of only performing a visual inspection between the original and new supply curves, the maximum price difference between the two supply curves can be calculated. This metric, which is similar to the Kolmogorov-Smirnov test, serves as a goodness-of-fit test between the two supply curves, where a lower score is desired. This metric can be calculated as follows:
 ```python
-max_diff = powersimdata.design.generation.cost_curves.ks_test(P1, F1, P2, F2)
-```
-where `P1` and `P2` are lists containing supply curve capacity data and `F1` and `F2` are lists containing corresponding supply curve price data. These lists can be created using `build_supply_curve` or can be created manually.
+from powersimdata.design.generation.cost_curves import ks_test
 
-It should be noted that the two supply curves must offer the same amount of capacity (i.e., `max(P1) = max(P2)`). By default, the above function plots the two supply curves overlaid on a single plot (plotting can be suppressed by including `plot=False` in `ks_test()`).
+max_diff = ks_test(P1, F1, P2, F2, area, gen_type, plot)
+```
+where `P1` and `P2` are lists containing supply curve capacity data; `F1` and `F2` are lists containing corresponding supply curve price data; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; and `plot` is a boolean that indicates whether or not the plot is shown. The pairs of supply curve data, (`P1`, `F1`) and (`P2`, `F2`), can be created using `build_supply_curve` or can be created manually.  It should be noted that the two supply curves must offer the same amount of capacity (i.e., `max(P1) = max(P2)`). `area` and `gen_type` both default to `None`. `plot` defaults to `True`. `ks_test` returns `max_diff`, which is the maximum price difference between the two supply curves.
 
 
 #### IV. Comparing Cost Curve Parameters
-When designing generator cost curves, it can be instructive to visually compare the quadratic cost curve parameters for generators in a particular area and fuel type pair. The `c1` and `c2` parameters for a given area and fuel type can be compared in a plot using the following:
+When designing generator cost curves, it can be instructive to visually compare the quadratic cost curve parameters for generators in a particular area and fuel type pair. The linear terms (`c1`) and quadratic terms (`c2`) for a given area and fuel type can be compared in a plot using the following:
 ```python
-powersimdata.design.generation.cost_curves.plot_c1_vs_c2(grid, area, type)
-```
-where `grid` is a `Grid` object; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import plot_linear_vs_quadratic_terms
 
-This function features a zoom capability (enabled by including `zoom=True` in `plot_c1_vs_c2`) that filters out `c2` outliers to enable better visualization. `c2` outliers outside of a specified number of standard deviations (the default is `num_sd=3`) are filtered out. The desired number of standard deviations can be changed by defining `num_sd` in `plot_c1_vs_c2`. Similar to `build_supply_curve`, this function also provides users with the ability to specify a particular area type.
+plot_linear_vs_quadratic_terms(grid, area, gen_type, area_type, plot, zoom, num_sd, alpha)
+```
+where `grid` is a `Grid` object; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; `plot` is a boolean that indicates whether or not the plot is shown; `zoom` is a boolean that indicates whether or not the zoom capability that filters out quadratic term outliers for better visualization is enabled; `num_sd` is the number of standard deviations outside of which quadratic terms are filtered; and `alpha` is the alpha blending parameter for the scatter plot. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`. `zoom` defaults to `False`. `num_sd` defaults to `3`. `alpha`, which can take values between `0` and `1`, defaults to `0.1`. 
 
 
 #### V. Comparing Generators by Capacity and Price
 When designing generator cost curves, it can be useful to visually compare the capacity and price parameters for each generator in a specified area and fuel type pair. The generator capacity and price parameters for a given area and fuel type can be compared in a plot using the following:
 ```python
-powersimdata.design.generation.cost_curves.plot_capacity_vs_price(grid, num_segments, area, type)
-```
-where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; and `type` is a string describing an appropriate fuel type.
+from powersimdata.design.generation.cost_curves import plot_capacity_vs_price
 
-Similar to `build_supply_curve` and `plot_c1_vs_c2`, this function also provides users with the ability to specify a particular area type.
+plot_capacity_vs_price(grid, num_segments, area, gen_type, area_type, plot)
+```
+where `grid` is a `Grid` object; `num_segments` is the number of linearized cost curve segments to create; `area` is a string describing an appropriate load zone, interconnect, or state; `gen_type` is a string describing an appropriate fuel type; `area_type` is a string describing the type of region that is being considered; and `plot` is a boolean that indicates whether or not the plot is shown. `area_type` defaults to `None`, which allows the area type to be inferred. `plot` defaults to `True`.
 
 
 [PreREISE]: https://github.com/Breakthrough-Energy/PreREISE

From 2ef0758f1f2de38e97bb4419c0ffb21603361d6f Mon Sep 17 00:00:00 2001
From: Lane Smith <lane.smith@breakthroughenergy.org>
Date: Fri, 26 Mar 2021 11:08:09 -0700
Subject: [PATCH 3/3] chore: remove noqa comments following cost curve refactor

---
 powersimdata/design/generation/cost_curves.py  | 12 ++++++------
 .../generation/tests/test_cost_curves.py       | 18 +++++++++---------
 2 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/powersimdata/design/generation/cost_curves.py b/powersimdata/design/generation/cost_curves.py
index f1511f269..9ff257017 100644
--- a/powersimdata/design/generation/cost_curves.py
+++ b/powersimdata/design/generation/cost_curves.py
@@ -259,8 +259,8 @@ def build_supply_curve(grid, num_segments, area, gen_type, area_type=None, plot=
     supply_df = supply_df.reset_index(drop=True)
 
     # Determine the points that comprise the supply curve
-    capacity_data = []  # noqa: N806
-    price_data = []  # noqa: N806
+    capacity_data = []
+    price_data = []
     capacity_diff_sum = 0
     for i in supply_df.index:
         capacity_data.append(capacity_diff_sum)
@@ -315,7 +315,7 @@ def ks_test(
     area=None,
     gen_type=None,
     plot=True,
-):  # noqa: N803
+):
     """Runs a test that is similar to the Kolmogorov-Smirnov test. This function takes
     two supply curves as inputs and returns the greatest difference in price between
     the two supply curves. This function requires that the supply curves offer the same
@@ -349,11 +349,11 @@ def ks_test(
         )
 
     # Create a list that has every capacity value in which either supply curve steps up
-    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))  # noqa: N806
+    capacity_data_all = list(set(capacity_data1) | set(capacity_data2))
     capacity_data_all.sort()
 
     # For each capacity value, associate the two corresponding price values
-    price_data_all = []  # noqa: N806
+    price_data_all = []
     for i in range(len(capacity_data_all)):
         # Determine the correpsonding price from the first supply curve
         if capacity_data_all[i] == capacity_data1[-1]:
@@ -374,7 +374,7 @@ def ks_test(
     price_data_diff = [
         abs(price_data_all[i][0] - price_data_all[i][1])
         for i in range(len(price_data_all))
-    ]  # noqa: N806
+    ]
 
     # Determine the maximum price difference
     max_diff = max(price_data_diff)
diff --git a/powersimdata/design/generation/tests/test_cost_curves.py b/powersimdata/design/generation/tests/test_cost_curves.py
index 38dfec72f..5ae2347bd 100644
--- a/powersimdata/design/generation/tests/test_cost_curves.py
+++ b/powersimdata/design/generation/tests/test_cost_curves.py
@@ -176,11 +176,11 @@ def test_get_supply_data():
 
 
 def test_build_supply_curve_1seg():
-    capacity_test, price_test = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(
         grid, 1, "Colorado", "ng", "loadzone", plot=False
     )
-    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]  # noqa: N806
-    price_exp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 30, 30, 50, 50, 100, 100, 200]
+    price_exp = [
         25.10,
         25.10,
         30.40,
@@ -197,11 +197,11 @@ def test_build_supply_curve_1seg():
 
 
 def test_build_supply_curve_2seg():
-    capacity_test, price_test = build_supply_curve(  # noqa: N806
+    capacity_test, price_test = build_supply_curve(
         grid, 2, "Utah", "coal", "loadzone", plot=False
     )
-    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]  # noqa: N806
-    price_exp = [  # noqa: N806
+    capacity_exp = [0, 10, 10, 20, 20, 45, 45, 70, 70, 120, 120, 170]
+    price_exp = [
         30.100,
         30.100,
         30.300,
@@ -220,10 +220,10 @@ def test_build_supply_curve_2seg():
 
 
 def test_ks_test():
-    capacity_data1, price_data1 = build_supply_curve(  # noqa: N806
+    capacity_data1, price_data1 = build_supply_curve(
         grid, 1, "Washington", "coal", "loadzone", plot=False
     )
-    capacity_data2 = [  # noqa: N806
+    capacity_data2 = [
         0,
         15,
         15,
@@ -239,7 +239,7 @@ def test_ks_test():
         225,
         max(capacity_data1),
     ]
-    price_data2 = [  # noqa: N806
+    price_data2 = [
         23.00,
         23.00,
         27.00,