# Benchmarking Fitness Functions for Speed

In [47]:
import pandas as pd
import polars as pl
import numpy as np
import plotnine as pn
import plotly.graph_objs as go
import plotly.express as px
from tqdm.notebook import tqdm
from IPython.display import clear_output, display
import os
from itertools import product

# Import own implementations
from milp import MILP
import genetic
from genetic import GA_Actions_Elite, GA_Actions_Tournament

# Importing tuning libraries
import ray
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.schedulers import ASHAScheduler

background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)

%load_ext blackcellmagic

The blackcellmagic extension is already loaded. To reload it, use:
  %reload_ext blackcellmagic


## Reading the Price data

In [48]:
df = pd.read_csv("../01 - Data/example_week.csv").assign(utc_time=lambda x: pd.to_datetime(x.utc_time))
df.head(2)

Unnamed: 0,spot,utc_time
0,101.54,2022-01-01 00:00:00+00:00
1,52.13,2022-01-01 01:00:00+00:00


In [49]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype              
---  ------    --------------  -----              
 0   spot      168 non-null    float64            
 1   utc_time  168 non-null    datetime64[ns, UTC]
dtypes: datetime64[ns, UTC](1), float64(1)
memory usage: 2.8 KB


## The Power Plant

In [50]:
plant_params = {
    "EFFICIENCY": 0.75,
    "MAX_STORAGE_M3": 5000,
    "MIN_STORAGE_M3": 0,
    "TURBINE_POWER_MW": 100,
    "PUMP_POWER_MW": 100,
    "TURBINE_RATE_M3H": 500,
    "MIN_STORAGE_MWH": 0,
    "INITIAL_WATER_LEVEL_PCT": 0,
}
plant_params["INITIAL_WATER_LEVEL"] = (
    plant_params["INITIAL_WATER_LEVEL_PCT"] * plant_params["MAX_STORAGE_M3"]
)
plant_params["PUMP_RATE_M3H"] = (
    plant_params["TURBINE_RATE_M3H"] * plant_params["EFFICIENCY"]
)
plant_params["MAX_STORAGE_MWH"] = (
    plant_params["MAX_STORAGE_M3"] / plant_params["TURBINE_RATE_M3H"]
) * plant_params["TURBINE_POWER_MW"]

## The Profit Functions

First: For loop

In [51]:
def fitness_loop(population, ps_params, prices):

    fitness_scores = np.array([])

    for parameter_combination in population:

        water_level = ps_params["INITIAL_WATER_LEVEL"]
        fitness_score = 0

        for action, price in zip(parameter_combination, prices["spot"]):
            # Pump (-1)
            if action == -1:
                if (
                    water_level + ps_params["PUMP_RATE_M3H"]
                    < ps_params["MAX_STORAGE_M3"]
                ):
                    fitness_score -= ps_params["PUMP_POWER_MW"] * price
                    water_level += ps_params["PUMP_RATE_M3H"]
                else:
                    fitness_score -= 100_000
            # Turbine (1)
            if action == 1:
                if (
                    water_level - ps_params["TURBINE_RATE_M3H"]
                    > ps_params["MIN_STORAGE_M3"]
                ):
                    fitness_score += ps_params["TURBINE_POWER_MW"] * price
                    water_level -= ps_params["TURBINE_RATE_M3H"]
                else:
                    fitness_score -= 100_000
            # Do nothing (0)
            # Nothing happens to the fitness score and the water level

        fitness_scores = np.append(fitness_scores, fitness_score)

    return fitness_scores

Second: numpy arrays

In [52]:
def fitness_array(population, plant_params, prices):

    fitness_scores = np.array([])

    for individual in population:
        # Calculate revenues from actions
        revenues = np.select(
            condlist=[
                np.array(individual) == -1,
                np.array(individual) == 1,
            ],
            choicelist=[
                -plant_params["PUMP_POWER_MW"] * prices["spot"],
                plant_params["TURBINE_POWER_MW"] * prices["spot"],
            ],
            default=0,
        )

        # Calculate water level exceedances
        water_levels = np.select(
            condlist=[
                np.array(individual) == -1,
                np.array(individual) == 1,
            ],
            choicelist=[
                plant_params["PUMP_RATE_M3H"],
                -plant_params["TURBINE_RATE_M3H"],
            ],
            default=0,
        ).cumsum()

        exceedances = (
            (water_levels >= plant_params["MAX_STORAGE_M3"])
            | (water_levels <= plant_params["MIN_STORAGE_M3"])
        ).sum()

        if exceedances > 0:
            fitness_scores = np.append(fitness_scores, revenues.sum() - 1e7)
        else:
            fitness_scores = np.append(fitness_scores, revenues.sum())
        
    return fitness_scores

## Creating a Random Population

In [53]:
population = np.random.choice([-1, 0, 1], size=(100, 168))
population

array([[ 0, -1,  0, ..., -1,  0,  0],
       [-1, -1,  0, ...,  1,  0,  0],
       [ 1, -1, -1, ..., -1,  1,  0],
       ...,
       [-1,  0,  0, ..., -1, -1,  1],
       [ 1,  0,  1, ...,  0, -1, -1],
       [ 0,  0,  0, ...,  0,  0,  1]])

## Benchmarking Timem

In [56]:
%%timeit

fitness_loop(population, plant_params, df)

4.66 ms ± 227 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [57]:
%%timeit

fitness_array(population, plant_params, df)

18.4 ms ± 1.15 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


The for loop is much faster. Let's keep that one.