In [6]:
import numpy as np
import pandas as pd

In [7]:
consumption = pd.read_csv('trainData/energy consumption.csv')
solar = pd.read_csv('trainData/solar.csv')
site = pd.read_csv('trainData/site.csv')

In [8]:
# Step 1: Define State Space
states = [(needed_energy, battery_SOC, solar_output, grid_availability) 
          for needed_energy in np.linspace(0, 12, 120) 
          for battery_SOC in np.linspace(14, 100, 86)
          for solar_output in np.linspace(0, 27, 270)
          for grid_availability in [0, 1]]

In [9]:
# Step 2: Define Action Space
actions = [(0, 0), (0, 1), (1, 0), (1, 1)] 

In [None]:
def transition_function(current_state, action, time_step):
    # Unpack the current state
    needed_energy, battery_SOC, solar_output, grid_availability = current_state
    
    # Fetch the next state's values from the database based on time_step
    next_needed_energy = consumption.loc[time_step, 'Energy']
    next_grid_availability = site.loc[time_step, 'plan']
    next_solar_output = solar.loc[time_step, 'Output']
    
    # Calculate the change in battery SOC
    if action == (0, 0):  # Do nothing
        energy_from_battery = min(needed_energy, battery_SOC)  # Use battery if possible
    elif action == (0, 1):  # Use diesel
        energy_from_battery = min(needed_energy, battery_SOC)  # Use battery if possible
    elif action == (1, 0):  # Use grid
        energy_from_battery = min(0, battery_SOC)  # Do not use battery
    elif action == (1, 1):  # Use both grid and diesel
        energy_from_battery = min(0, battery_SOC)  # Do not use battery
    
    # Update battery SOC
    battery_SOC_change = next_solar_output - energy_from_battery
    next_battery_SOC = battery_SOC + battery_SOC_change
    
    # Ensure SOC is within bounds
    next_battery_SOC = min(max(next_battery_SOC, 0), 100)
    
    # Construct the next state
    next_state = (next_needed_energy, next_battery_SOC, next_solar_output, next_grid_availability)
    
    return next_state

In [None]:
def reward_function(previous_state, current_state, action, soc_min, generator_info):
    # Unpack the current state and action
    current_needed_energy, current_battery_SOC, current_solar_output, current_grid_availability = current_state
    
    # Unpack the generator information
    diesel_starts, diesel_total_time, diesel_max_continuous, grid_total_time = generator_info
    
    # Initialize the reward
    reward = 0
    
    # Check if the diesel generator starts
    if action == (0, 1) or action == (1, 1):  # Diesel is used
        if diesel_total_time == 0:  # First time diesel is used in this episode
            diesel_starts += 1
        diesel_total_time += 1  # Increment total operating time
        
        # Check for continuous operation
        diesel_max_continuous += 1
    else:
        diesel_max_continuous = 0  # Reset continuous operation counter if diesel is not used
    
    # Update grid operating time
    if action == (1, 0) or action == (1, 1):  # Grid is used
        grid_total_time += 1
    
    # Calculate the penalty based on the scoring function
    score = (
        300 * diesel_starts +
        1 * diesel_total_time +
        0.95 * diesel_max_continuous +
        0.25 * grid_total_time
    )
    
    # Penalize the reward by the score (negative reward)
    reward -= score
    
    # Penalize if SOC is below the minimum threshold
    if current_battery_SOC < soc_min:
        reward -= 1000  # Heavy penalty for violating SOC constraint
    
    # Return the reward and updated generator information
    updated_generator_info = (diesel_starts, diesel_total_time, diesel_max_continuous, grid_total_time)
    return reward, updated_generator_info

In [None]:
soc_min = 15
time_steps = 672  # 7 days * 24 hours * 4 (15-minute intervals)
gamma = 0.99  # Discount factor
threshold = 0.01  # Convergence threshold

# Initialize generator info
generator_info = (0, 0, 0, 0)  # diesel_starts, diesel_total_time, diesel_max_continuous, grid_total_time

# Initialize the policy and value function
policy = {}
value_function = {}

initial_state = (consumption.loc[0, 'Energy'], 20, solar.loc[0, 'Output'], site.loc[0, 'plan'])

# Step 1: Policy Evaluation
def policy_evaluation(policy, value_function):
    while True:
        delta = 0
        for t in range(time_steps):
            state = (consumption.loc[t, 'Energy'], 20, solar.loc[t, 'Output'], site.loc[t, 'plan'])

            action = policy.get(state, (0, 0))  # Default action if none exists
            next_state = transition_function(state, action, t)
            reward, _ = reward_function(state, next_state, action, soc_min, generator_info)
            
            # Bellman equation: V(s) = R(s, a) + gamma * V(s')
            new_value = reward + gamma * value_function.get(next_state, 0)
            delta = max(delta, np.abs(value_function.get(state, 0) - new_value))
            value_function[state] = new_value
        
        # Check for convergence
        if delta < threshold:
            break
    return value_function

# Step 2: Policy Improvement
def policy_improvement(policy, value_function):
    policy_stable = True
    for t in range(time_steps):
        state = (consumption.loc[t, 'Energy'], 20, solar.loc[t, 'Output'], site.loc[t, 'plan'])
        old_action = policy.get(state, (0, 0))
        best_value = -float('inf')
        best_action = old_action
        
        for action in [(0, 0), (0, 1), (1, 0), (1, 1)]:
            next_state = transition_function(state, action, t)
            reward, _ = reward_function(state, next_state, action, soc_min, generator_info)
            value = reward + gamma * value_function.get(next_state, 0)
            if value > best_value:
                best_value = value
                best_action = action
        
        # Update the policy
        policy[state] = best_action
        if old_action != best_action:
            policy_stable = False
    
    return policy, policy_stable

# Combine Policy Evaluation and Improvement
while True:
    value_function = policy_evaluation(policy, value_function)
    policy, policy_stable = policy_improvement(policy, value_function)
    if policy_stable:
        break