In [None]:
pip install gym numpy stable-baselines3 matplotlib tensorflow keras keras-rl2

In [1]:
import gym
from gym import spaces
import numpy as np

In [3]:
class CompostingEnv(gym.Env):
    def __init__(self):
        super(CompostingEnv, self).__init__()
        
        with open('optimal_conditions.json') as f:
            self.optimal_conditions = json.load(f)
        # Define structured observation space
        self.observation_space = spaces.Dict({
            "temperature_active": spaces.Box(low=0, high=100, shape=(4,), dtype=np.float32),
            "temperature_curing": spaces.Box(low=0, high=100, shape=(2,), dtype=np.float32),
            "moisture_active": spaces.Box(low=0, high=100, shape=(2,), dtype=np.float32),
            "moisture_curing": spaces.Box(low=0, high=100, shape=(2,), dtype=np.float32),
            "gases": spaces.Dict({
                "co2": spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32),
                "oxygen": spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32),
                "methane": spaces.Box(low=0, high=100, shape=(1,), dtype=np.float32)
            }),
        })
        
       # Define the action space with duration for motor and air pump actions
        self.action_space = spaces.Dict({
            "motor": spaces.Dict({
                "id": spaces.Discrete(2),  # Motor ID: 0, 1
                "duration_ms": spaces.Box(low=0, high=10000, shape=(1,), dtype=np.float32)
            }),
            "actions": spaces.Tuple([
                spaces.Dict({
                    "type": spaces.Discrete(1),  # Only 1 type here for air_pump
                    "id": spaces.Discrete(2),  # Air pump ID: 0, 1
                    "duration_ms": spaces.Box(low=0, high=10000, shape=(1,), dtype=np.float32)
                }),
                spaces.Dict({
                    "type": spaces.Discrete(1),  # Only 1 type here for air_pump
                    "id": spaces.Discrete(2),  # Air pump ID: 0, 1
                    "duration_ms": spaces.Box(low=0, high=10000, shape=(1,), dtype=np.float32)
                })
            ])
        })
        
         # Initialize state and any other necessary variables
        self.state = self.reset()

    def reset(self):
        self.state = {
            "temperature_active": np.array([50.0, 50.0, 50.0, 50.0], dtype=np.float32),
            "temperature_curing": np.array([60.0, 60.0], dtype=np.float32),
            "moisture_active": np.array([60.0, 60.0], dtype=np.float32),
            "moisture_curing": np.array([70.0, 70.0], dtype=np.float32),
            "gases": {
                "co2": np.array([5.0], dtype=np.float32),
                "oxygen": np.array([15.0], dtype=np.float32),
                "methane": np.array([1.0], dtype=np.float32)
            }
        }
        return self.state


    def calculate_reward_points(current, minvalue, maxvalue):
    # If the current value is less than the minimum, calculate reward as current / active_min
    if (current / minvalue) < 1:
        return (current / minvalue)
    # If the current value is within the optimal range, return full reward (1)
    elif (current >= minvalue and current <= maxvalue):
        return 1
    # If the current value is greater than the maximum, scale the reward as active_max / current
    elif (current / maxvalue) > 1:
        return (maxvalue / current)

    def _calculate_reward(self):
    # Define the stages of the composting process
    stages = ['initial', 'mid', 'final']
    current_stage = stages[0]  # Example: This would change dynamically based on progress

    # Retrieve the current environmental state values
    temperature = np.mean(self.state['temperature_active'])
    moisture = np.mean(self.state['moisture_active'])
    co2 = self.state['gases']['co2']
    methane = self.state['gases']['methane']
    o2 = self.state['gasses']['oxygen']

    # Use the updated calculate_reward() function to compute the reward for each variable
    temp_reward = calculate_reward_points(temperature, optimal.active_min, optimal.active_max)
    ch4 = calculate_reward_points(methane, optimal.moisture_min, optimal.moisture_max)
    co2_reward = calculate_reward_points(co2, optimal.co2_min, optimal.co2_max)
    o2_reward = calculate_reward_points(o2, optimal.o2_min, optimal.o2_max)
    
    temp_weight = 0.4
    ch4_weight = 0.3
    o2_weight = 0.2
    co2_weight = 0.1
    

    # The total reward is the average of the rewards for all variables
    total_reward = (
        temp_reward * temp_weight + 
        ch4_reward * ch4_weight + 
        o2_reward * o2_weight + 
        co2_reward * co2_weight)

    return total_reward

    def step(self, action):
        """
        Execute an action, update the state, calculate reward, and return the results
        """
        # 1. Apply action: this can define how motors and air pumps affect temperature and humidity
        self._apply_action(action)

        # 2. Update state: update temperature, humidity, and gas concentrations
        self._update_state()

        # 3. Calculate the reward
        reward = self.calculate_reward()

        # 4. Check if the episode is finished
        done = self._check_done()

        # 5. Return the new state, reward, completion status, and any debug information
        return self.state, reward, done, {}

    def _apply_action(self, action):
        """
        Apply the input action to update the motor or air pump control parameters
        """
        motor_action = action['motor']
        pump_action = action['actions']
        
        # Dynamically adjust action timing based on optimal conditions
        motor_duration = self._get_dynamic_duration(np.mean(self.state['temperature_active']), 'temperature', 'active')
        motor_action['duration_ms'][0] = motor_duration

        # Adjust air pump action duration based on optimal gas levels
        for pump in pump_action:
            pump_duration = self._get_dynamic_duration(self.state['gases']['co2'][0], 'gases', 'co2')
            pump['duration_ms'][0] = pump_duration

        # Assume longer air pump run time reduces CO2 and methane, increases oxygen
        co2_reduction = pump_duration * 0.01
        methane_reduction = pump_duration * 0.005
        oxygen_increase = pump_duration * 0.015

        self.state['gases']['co2'][0] = max(0, self.state['gases']['co2'][0] - co2_reduction)
        self.state['gases']['methane'][0] = max(0, self.state['gases']['methane'][0] - methane_reduction)
        self.state['gases']['oxygen'][0] = min(100, self.state['gases']['oxygen'][0] + oxygen_increase)

     def _get_dynamic_duration(self, current_value, category, variable):
        """
        Get dynamic action duration based on the current state and optimal conditions
        """
        optimal_min = self.optimal_conditions[category][variable]['min']
        optimal_max = self.optimal_conditions[category][variable]['max']

        # Calculate duration as a function of deviation from optimal conditions
        if current_value < optimal_min:
            duration = (optimal_min - current_value) * 100  # Example scaling factor
        elif current_value > optimal_max:
            duration = (current_value - optimal_max) * 100
        else:
            duration = 500  # Default duration if within optimal range

        return np.clip(duration, 0, 10000)  # Ensure duration stays within valid range