In [None]:
#RL Fan Controller

import numpy as np

class RLFanController:
    #Q-Learning controller

    def __init__(self):
        self.lr = 0.1
        self.gamma = 0.95
        self.epsilon = 0.2
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01

        self.actions = [-100, 0, 100]
        self.q_table = {}

        self.min_fan_speed = 1000
        self.max_fan_speed = 2000

    def _discretize_state(self, co2, temp, fan_speed, gas_flow):
        #State includes flow sensor reading
        co2_bin = int(co2 / 50)
        temp_bin = int(temp / 5)
        fan_bin = int(fan_speed / 100)
        flow_bin = int(gas_flow / 50)  # ← FLOW SENSOR IN STATE

        return (co2_bin, temp_bin, fan_bin, flow_bin)

    def get_action(self, state, explore=True):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(len(self.actions))

        if explore and np.random.random() < self.epsilon:
            return np.random.randint(0, len(self.actions))
        else:
            return np.argmax(self.q_table[state])

    def calculate_reward(self, weight_captured, fan_speed, gas_flow):
        # Reward considers flow rate
        # Maximize capture
        capture_reward = weight_captured * 20

        # Minimize energy
        energy_penalty = -(fan_speed / self.max_fan_speed) * 3

        # Bonus for optimal flow rate (400-600 L/min)
        if 400 <= gas_flow <= 600:
            flow_bonus = 2
        else:
            flow_bonus = -1

        return capture_reward + energy_penalty + flow_bonus

    def update_q_value(self, state, action_idx, reward, next_state):
        if state not in self.q_table:
            self.q_table[state] = np.zeros(len(self.actions))
        if next_state not in self.q_table:
            self.q_table[next_state] = np.zeros(len(self.actions))

        current_q = self.q_table[state][action_idx]
        max_next_q = np.max(self.q_table[next_state])

        new_q = current_q + self.lr * (reward + self.gamma * max_next_q - current_q)
        self.q_table[state][action_idx] = new_q

    def control_fan_speed(self, co2, temp, current_fan_speed, gas_flow, explore=False):
        #Control uses flow sensor data
        state = self._discretize_state(co2, temp, current_fan_speed, gas_flow)
        action_idx = self.get_action(state, explore=explore)
        fan_speed_change = self.actions[action_idx]

        new_fan_speed = current_fan_speed + fan_speed_change
        new_fan_speed = max(self.min_fan_speed, min(self.max_fan_speed, new_fan_speed))

        return new_fan_speed, action_idx, state

    def train_episode(self, co2_data, temp_data, flow_data, weight_data):
        #Training uses flow sensor data
        total_reward = 0
        current_fan_speed = 1500

        for i in range(len(co2_data) - 1):
            new_fan_speed, action_idx, state = self.control_fan_speed(
                co2_data[i], temp_data[i], current_fan_speed,
                flow_data[i], explore=True  # ← FLOW DATA
            )

            reward = self.calculate_reward(
                weight_data[i+1], new_fan_speed, flow_data[i+1]  # ← FLOW DATA
            )

            next_state = self._discretize_state(
                co2_data[i+1], temp_data[i+1],
                new_fan_speed, flow_data[i+1]  # ← FLOW DATA
            )

            self.update_q_value(state, action_idx, reward, next_state)

            current_fan_speed = new_fan_speed
            total_reward += reward

        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)

        return total_reward

print("RL Controller loaded (USES FLOW SENSOR)")