In [1]:
import random

# Define states: (solar_level, demand_level)
states = [
    ("High", "High"),
    ("High", "Low"),
    ("Low", "High"),
    ("Low", "Low")
]

# Define actions
actions = ["Use_Solar", "Use_Generator"]

# Define transition probabilities for solar availability
# P(solar' | solar)
solar_transition = {
    "High": {"High": 0.7, "Low": 0.3},
    "Low": {"High": 0.4, "Low": 0.6}
}

# Define demand transition probabilities
demand_transition = {
    "High": {"High": 0.6, "Low": 0.4},
    "Low": {"High": 0.3, "Low": 0.7}
}

# Reward function
def get_reward(state, action):
    solar, demand = state

    if action == "Use_Solar":
        if solar == "High" and demand == "Low":
            return 10
        elif solar == "High" and demand == "High":
            return 10  # assuming solar is enough
        else:
            return -5  # insufficient solar
    elif action == "Use_Generator":
        return 5  # generator meets demand
    else:
        return 0

# Transition function (simplified)
def get_next_state(current_state, action):
    solar, demand = current_state

    next_solar = random.choices(
        population=["High", "Low"],
        weights=[solar_transition[solar]["High"], solar_transition[solar]["Low"]],
        k=1
    )[0]

    next_demand = random.choices(
        population=["High", "Low"],
        weights=[demand_transition[demand]["High"], demand_transition[demand]["Low"]],
        k=1
    )[0]

    return (next_solar, next_demand)

# Sample expected value calculation
def expected_q_value(state, action, V, gamma=0.9):
    """
    Compute expected Q-value of taking action in state,
    given estimated value function V.
    """
    solar, demand = state
    q_value = 0

    for next_solar in ["High", "Low"]:
        for next_demand in ["High", "Low"]:
            prob_solar = solar_transition[solar][next_solar]
            prob_demand = demand_transition[demand][next_demand]
            prob = prob_solar * prob_demand

            next_state = (next_solar, next_demand)
            reward = get_reward(state, action)
            q_value += prob * (reward + gamma * V[next_state])

    return q_value


# Example usage
if __name__ == "__main__":
    # Define a dummy value function (V) for each state
    V = {
        ("High", "High"): 20,
        ("High", "Low"): 15,
        ("Low", "High"): 10,
        ("Low", "Low"): 12
    }

    # Choose a state and action
    state = ("Low", "High")
    action = "Use_Generator"

    q = expected_q_value(state, action, V)
    print(f"Expected Q-value for state {state} and action '{action}': {q:.2f}")

    # Simulate a transition
    next_state = get_next_state(state, action)
    print(f"Next state (simulated): {next_state}")
    print(f"Reward received: {get_reward(state, action)}")


Expected Q-value for state ('Low', 'High') and action 'Use_Generator': 17.31
Next state (simulated): ('Low', 'Low')
Reward received: 5
