In [19]:
from enum import Enum
from scipy.optimize import differential_evolution
import numpy as np
import matplotlib.pyplot as plt
from typing import List

In [20]:
PRICES = np.array([1, 10, 10, 1, 10, 10, 1, 1, 1, 1, 10, 10, 100, 1, 1])
COOLING_PER_HOUR = 0.5
HEATING_PER_HOUR = 2.0

START_TEMP = 20.0
MIN_TEMP = 18.0
MAX_TEMP = 22.0
PENALTY_SCALE = 1_000.0
BINARY_REG_SCALE = 1e-2  # small, encourages 0/1 while keeping smoothness

def objective(actions: np.ndarray) -> float:
    temp = START_TEMP
    cost = 0
    for i, a in enumerate(actions): 
        action = "ON" if a > 0.5 else "OFF"
        temp -= COOLING_PER_HOUR
        if action == "ON":
            temp += HEATING_PER_HOUR
        cost += PRICES[i] 

        if temp < MIN_TEMP:
            penalty = PENALTY_SCALE * (MIN_TEMP - temp) ** 3
            cost += penalty
        if temp > MAX_TEMP:
            penalty = PENALTY_SCALE * (temp - MAX_TEMP) ** 3
            cost += penalty

    return cost


x0 = np.full(len(PRICES), 0.5)  # nontrivial start to avoid flat region
bounds = [(0, 1)] * len(PRICES)
result = differential_evolution(objective, bounds, maxiter=1000, polish=False)

temp = START_TEMP
print("Actions | Temps | Price")
for i, x in enumerate(result.x):
    action = "ON" if x > 0.5 else "OFF"
    temp -= COOLING_PER_HOUR
    if action == "ON":
        temp += HEATING_PER_HOUR
    print(f"{action:7s} | {temp:.2f} | {PRICES[i]:.2f}")

print("Total cost: ", result.fun)
print("Best action: ", result.x[0])



Actions | Temps | Price
OFF     | 19.50 | 1.00
OFF     | 19.00 | 10.00
OFF     | 18.50 | 10.00
ON      | 20.00 | 1.00
OFF     | 19.50 | 10.00
OFF     | 19.00 | 10.00
ON      | 20.50 | 1.00
OFF     | 20.00 | 1.00
ON      | 21.50 | 1.00
OFF     | 21.00 | 1.00
OFF     | 20.50 | 10.00
ON      | 22.00 | 10.00
OFF     | 21.50 | 100.00
OFF     | 21.00 | 1.00
OFF     | 20.50 | 1.00
Total cost:  168.0
Best action:  0.15826783150752155
