# Pumped-Storage Optimisation with Genetic Algorithm and MILP

In [1]:
import pandas as pd
import numpy as np
import plotnine as pn
import plotly.graph_objs as go
import plotly.express as px
from tqdm.notebook import tqdm
from IPython.display import clear_output, display
import os
from itertools import product

# Import own implementations
from milp import MILP
import genetic
from genetic import GA_Actions

# Importing tuning libraries
import ray
from ray import train, tune
from ray.tune.search.optuna import OptunaSearch
from ray.tune.schedulers import ASHAScheduler

background_colour = "#F2F2F2"
pn.theme_set(
    pn.theme_classic()
    + pn.theme(
        text=pn.element_text(family="monospace"),
        plot_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        panel_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
        legend_background=pn.element_rect(
            fill=background_colour, colour=background_colour
        ),
    )
)

%load_ext blackcellmagic

## Reading the Price data

In [2]:
df = pd.read_csv("../01 - Data/example_week.csv")
df.head(2)

Unnamed: 0,spot,utc_time
0,101.54,2022-01-01 00:00:00+00:00
1,52.13,2022-01-01 01:00:00+00:00


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 168 entries, 0 to 167
Data columns (total 2 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   spot      168 non-null    float64
 1   utc_time  168 non-null    object 
dtypes: float64(1), object(1)
memory usage: 2.8+ KB


## The Power Plant

In [4]:
plant_params = {
    "EFFICIENCY": 0.75,
    "MAX_STORAGE_M3": 5000,
    "MIN_STORAGE_M3": 0,
    "TURBINE_POWER_MW": 100,
    "PUMP_POWER_MW": 100,
    "TURBINE_RATE_M3H": 500,
    "MIN_STORAGE_MWH": 0,
    "INITIAL_WATER_LEVEL_PCT": 0,
}
plant_params["INITIAL_WATER_LEVEL"] = (
    plant_params["INITIAL_WATER_LEVEL_PCT"] * plant_params["MAX_STORAGE_M3"]
)
plant_params["PUMP_RATE_M3H"] = (
    plant_params["TURBINE_RATE_M3H"] * plant_params["EFFICIENCY"]
)
plant_params["MAX_STORAGE_MWH"] = (
    plant_params["MAX_STORAGE_M3"] / plant_params["TURBINE_RATE_M3H"]
) * plant_params["TURBINE_POWER_MW"]

## MILP

In [5]:
milp_solver = MILP(plant_params=plant_params, spot=df["spot"], utc_time=df["utc_time"])

In [6]:
milp_model, milp_status, milp_profile = milp_solver.solve()

In [7]:
milp_status

'Optimal'

In [8]:
milp_profile.head()

Unnamed: 0,water_level,action,colour_id,utc_time,spot
0,0.0,0,nothing,2022-01-01 00:00:00+00:00,101.54
1,375.0,-1,pump,2022-01-01 01:00:00+00:00,52.13
2,750.0,-1,pump,2022-01-01 02:00:00+00:00,20.78
3,1125.0,-1,pump,2022-01-01 03:00:00+00:00,15.66
4,1500.0,-1,pump,2022-01-01 04:00:00+00:00,21.47


## GA Actions

### Tuning

In [9]:
ga_solver = GA_Actions(plant_params=plant_params, spot=df["spot"], utc_time=df["utc_time"])

In [10]:
# total_generations = 10
# timeout_s = 60

# # Need this line for locally defined modules to work with ray
# ray.init(runtime_env={"working_dir": "."}, ignore_reinit_error=True)

# analysis = tune.run(
#     tune.with_parameters(
#         tune.with_parameters(
#             ga_solver.train,
#             total_generations=total_generations,
#             tune_mode=True,
#         ),
#         timeout_s=timeout_s,
#         total_generations=total_generations,
#         individual_size=df.shape[0],
#         plant_params=plant_params,
#         spot=df["spot"],
#         utc_time=df["utc_time"],
#         tune_mode=True,
#     ),
#     config={
#         "CXPB": tune.uniform(0.2, 0.8),
#         "MUTPB": tune.uniform(0.05, 0.95),
#         "MUT_IND_PB": tune.uniform(0.05, 0.95),
#         "TOURNAMENT_SIZE": tune.randint(1, 10),
#         "POP_SIZE": tune.choice([50, 250, 500, 1000, 5000]),
#     },
#     metric="avg_fitness",
#     mode="max",
#     local_dir="tune_results",
#     name="GA",
#     search_alg=OptunaSearch(),
#     scheduler=ASHAScheduler(
#         time_attr="training_iteration",
#         grace_period=total_generations / 2,
#         reduction_factor=1.5,
#     ),
#     time_budget_s=timeout_s,
#     num_samples=10_000,
#     trial_dirname_creator=lambda trial: f"{trial.trainable_name}_{trial.trial_id}",
# )

In [11]:
analysis = ga_solver.tune(
    tune_config={
        "CXPB": tune.uniform(0.2, 0.8),
        "MUTPB": tune.uniform(0.05, 0.95),
        "MUT_IND_PB": tune.uniform(0.05, 0.95),
        "TOURNAMENT_SIZE": tune.randint(1, 10),
        "POP_SIZE": tune.choice([50, 250, 500, 1000, 5000]),
    },
    total_generations=10,
    timeout_s=60,
)

2024-04-01 14:52:21,318	INFO worker.py:1752 -- Started a local Ray instance.
2024-04-01 14:52:21,374	INFO packaging.py:530 -- Creating a file package for local directory '.'.
2024-04-01 14:52:21,403	INFO packaging.py:358 -- Pushing file package 'gcs://_ray_pkg_1ea999654ab266f6.zip' (0.46MiB) to Ray cluster...
2024-04-01 14:52:21,408	INFO packaging.py:371 -- Successfully pushed file package 'gcs://_ray_pkg_1ea999654ab266f6.zip'.
2024-04-01 14:52:23,302	INFO tune.py:613 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949
[I 2024-04-01 14:52:23,309] A new study created in memory with name: optuna


0,1
Current time:,2024-04-01 14:53:25
Running for:,00:01:00.24
Memory:,14.3/15.8 GiB

Trial name,status,loc,CXPB,MUTPB,MUT_IND_PB,POP_SIZE,TOURNAMENT_SIZE,iter,total time (s),avg_fitness
train_0a0f3094,TERMINATED,127.0.0.1:8200,0.416768,0.263378,0.345088,5000,8,10.0,36.3026,-3758490.0
train_947b630d,TERMINATED,127.0.0.1:30000,0.457407,0.132279,0.480294,250,2,10.0,1.53245,-9378780.0
train_7bd647e6,TERMINATED,127.0.0.1:30980,0.201649,0.802831,0.100926,5000,6,10.0,47.53,-9094690.0
train_86dd4bb2,TERMINATED,127.0.0.1:2144,0.205031,0.0647007,0.688973,50,9,10.0,0.330756,-9435400.0
train_8eda0444,TERMINATED,127.0.0.1:18356,0.722137,0.204519,0.838778,5000,8,7.0,39.5167,-7513240.0
train_dd37a02c,TERMINATED,127.0.0.1:21300,0.781134,0.73808,0.60194,250,6,5.0,1.48822,-9727590.0
train_8b340115,TERMINATED,127.0.0.1:4820,0.554023,0.310386,0.30294,250,1,5.0,1.46452,-9943450.0
train_afc6266b,TERMINATED,127.0.0.1:15220,0.384457,0.356048,0.384957,1000,5,10.0,9.4079,-9141770.0
train_0728cc27,TERMINATED,127.0.0.1:29860,0.690636,0.507902,0.768971,5000,7,2.0,23.1839,-9835500.0
train_2794efc7,TERMINATED,127.0.0.1:19820,0.280836,0.555493,0.1782,250,3,10.0,2.53227,-9353110.0


[36m(train pid=30000)[0m   0%|          | 0/10 [00:00<?, ?it/s]


Trial name,avg_fitness
train_0728cc27,-9993340.0
train_0a0f3094,-3758490.0
train_20c5a875,-9997690.0
train_2794efc7,-9353110.0
train_7bd647e6,-9094690.0
train_86dd4bb2,-9435400.0
train_8b340115,-9943450.0
train_8eda0444,-7513240.0
train_947b630d,-9378780.0
train_9ae24bfb,-9954400.0


[36m(train pid=2144)[0m   0%|          | 0/10 [00:00<?, ?it/s][32m [repeated 2x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/ray-logging.html#log-deduplication for more options.)[0m


[36m(train pid=2144)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit


[36m(train pid=30980)[0m   0%|          | 0/10 [00:00<?, ?it/s]
[36m(train pid=21300)[0m   0%|          | 0/10 [00:00<?, ?it/s]


[36m(train pid=21300)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(train pid=4820)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit


[36m(train pid=18356)[0m   0%|          | 0/10 [00:00<?, ?it/s][32m [repeated 2x across cluster][0m
[36m(train pid=19820)[0m   0%|          | 0/10 [00:00<?, ?it/s][32m [repeated 2x across cluster][0m


[36m(train pid=8200)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit
[36m(train pid=19820)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit


[36m(train pid=29860)[0m   0%|          | 0/10 [00:00<?, ?it/s][32m [repeated 2x across cluster][0m
[36m(train pid=22484)[0m   0%|          | 0/10 [00:00<?, ?it/s]
[36m(train pid=15364)[0m   0%|          | 0/10 [00:00<?, ?it/s]


2024-04-01 14:53:25,659	INFO timeout.py:54 -- Reached timeout of 60 seconds. Stopping all trials.
2024-04-01 14:53:25,701	INFO tune.py:1016 -- Wrote the latest version of all result files and experiment state to 'C:/Users/mathi/ray_results/GA' in 0.0390s.
[36m(train pid=29860)[0m C:\arrow\cpp\src\arrow\filesystem\s3fs.cc:2829:  arrow::fs::FinalizeS3 was not called even though S3 was initialized.  This could lead to a segmentation fault at exit[32m [repeated 2x across cluster][0m
[36m(train pid=18356)[0m 
2024-04-01 14:53:31,194	INFO tune.py:1048 -- Total run time: 67.89 seconds (60.20 seconds for the tuning loop).
- train_74787fe8: FileNotFoundError('Could not fetch metrics for train_74787fe8: both result.json and progress.csv were not found at C:/Users/mathi/ray_results/GA/train_74787fe8')


In [12]:
analysis.best_config

{'CXPB': 0.4167682055642961,
 'MUTPB': 0.26337821121816474,
 'MUT_IND_PB': 0.3450879798979169,
 'TOURNAMENT_SIZE': 8,
 'POP_SIZE': 5000}

In [13]:
fig = px.line(
    data_frame=pd.concat(analysis.trial_dataframes.values()),
    x="training_iteration",
    y="avg_fitness",
    color="trial_id",
)
fig.show()

In [17]:
top_runs = analysis.dataframe().sort_values("avg_fitness", ascending=False).head(10)["trial_id"].to_list()

fig = px.line(
    data_frame=pd.concat(analysis.trial_dataframes.values()).query(
        "trial_id in @top_runs"
    ),
    x="training_iteration",
    y="avg_fitness",
    color="trial_id",
)
fig.show()

In [None]:
(
    analysis.dataframe()
    .query("trial_id == '47670c6e'")
    .filter(regex="config")
    .melt()
    .assign(variable=lambda x: x["variable"].str.replace("config/", ""))
    .set_index("variable")
    .to_dict()["value"]
)

{'CXPB': 0.22808547604749183,
 'MUTPB': 0.05313153710963216,
 'MUT_SD': 2.5861177862246074,
 'MUT_IND_PB': 0.1359577654069344,
 'TOURNAMENT_SIZE': 3.0,
 'POP_SIZE': 5000.0}

### Training

In [None]:
ga_solver.train()