# ðŸ’§ RL-Driven Nutrient Feedback for Optimized Hydroponics

This notebook provides a "Golden Path" for using Reinforcement Learning to adaptively control nutrient dosing (EC/pH) in a hydroponic vertical farm.

Why RL? Traditional PID or rule-based controllers are static and can struggle with non-linear disturbances (evaporation, COâ‚‚ spikes, crop stage changes). An RL agent learns by trial-and-error to balance plant health and resource efficiency, reducing chemical overuse and improving yield stability â€” directly translating into sustainability and cost savings.

**Scenario:** Adaptive Nutrient Delivery System â€” control discrete dosing actions (increase/hold/decrease) to keep pH and EC in optimal ranges while minimizing resource use and adapting to disturbances.

**Contents:** Data simulation (10k steps), environment definition (Gymnasium), baseline PID controller, RL agent training with Stable Baselines3 (PPO), evaluation (rewards, stability, efficiency), policy interpretation, and deployment-ready export.

In [None]:
# Install required packages (uncomment and run if needed)
# !pip install -q stable-baselines3[extra] gymnasium numpy pandas matplotlib seaborn joblib


In [None]:
# Imports and reproducible settings
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import gymnasium as gym
from gymnasium import spaces

# Stable Baselines3
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.callbacks import EvalCallback

import math
import random
import joblib

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

# Reproducibility
SEED = 42
np.random.seed(SEED)
random.seed(SEED)


In [None]:
# ------------------------------
# Environment: Hydroponic nutrient control
# ------------------------------

class HydroponicEnv(gym.Env):
    """Custom Gymnasium environment simulating nutrient (EC/pH) dynamics in hydroponics.

    State: [pH, EC, Temp, Humidity, Plant_Age]
    Action: Discrete(3) -> 0: decrease dose, 1: hold, 2: increase dose
    Reward: Encourages pH and EC within target ranges and penalizes excessive dosing
    """

    metadata = {"render.modes": ["human"]}

    def __init__(self, episode_length=336, seed: int = None):
        super().__init__()
        self.episode_length = episode_length  # steps per episode (e.g., hours)
        self.seed(seed)

        # Action space: decrease / hold / increase
        self.action_space = spaces.Discrete(3)

        # Observation space: pH [4.5,7.5], EC [0.5,3.5], Temp [12, 32], Humidity [20, 95], Plant_Age [0, 90]
        low = np.array([4.5, 0.5, 12.0, 20.0, 0.0], dtype=np.float32)
        high = np.array([7.5, 3.5, 32.0, 95.0, 90.0], dtype=np.float32)
        self.observation_space = spaces.Box(low=low, high=high, dtype=np.float32)

        # Optimal ranges
        self.pH_opt = (5.8, 6.2)
        self.EC_opt = (1.6, 2.0)

        # Internal state
        self.state = None
        self.step_count = 0

    def seed(self, seed=None):
        self.np_random, seed = gym.utils.seeding.np_random(seed)
        return [seed]

    def reset(self, *, seed: int = None, options: dict = None):
        if seed is not None:
            self.seed(seed)

        # Initialize near-optimal with small random offsets
        pH = float(6.0 + self.np_random.normal(0, 0.05))
        EC = float(1.8 + self.np_random.normal(0, 0.05))
        Temp = float(22.0 + self.np_random.normal(0, 1.0))
        Humidity = float(60.0 + self.np_random.normal(0, 3.0))
        Plant_Age = float(self.np_random.integers(0, 56))  # cycle up to 8 weeks

        self.state = np.array([pH, EC, Temp, Humidity, Plant_Age], dtype=np.float32)
        self.step_count = 0
        return self.state.copy(), {}

    def step(self, action):
        pH, EC, Temp, Humidity, Plant_Age = self.state

        # Map discrete action to dosing change
        dose_change = {0: -0.03, 1: 0.0, 2: 0.03}[int(action)]

        # Nutrient dosing affects EC directly and can nudge pH slightly
        EC = float(np.clip(EC + dose_change + self.np_random.normal(0, 0.005), 0.5, 3.5))
        pH = float(np.clip(pH - 0.01 * dose_change + self.np_random.normal(0, 0.02), 4.5, 7.5))

        # Natural drift: evaporation increases EC slowly; occasional spikes disrupt pH
        EC += 0.0005  # slow accumulation
        if self.np_random.rand() < 0.01:  # spike event (CO2 injection / contamination)
            pH += self.np_random.normal(0.25, 0.1)

        # Temperature and humidity drift slowly
        Temp = float(np.clip(Temp + self.np_random.normal(0, 0.05), 12, 32))
        Humidity = float(np.clip(Humidity + self.np_random.normal(0, 0.2), 20, 95))

        # Plant age increments
        Plant_Age = float(Plant_Age + 1 / 24)  # hourly increment

        self.state = np.array([pH, EC, Temp, Humidity, Plant_Age], dtype=np.float32)
        self.step_count += 1

        # Reward: keep pH and EC within optimal ranges, penalize large dosing
        pH_in = float(self.pH_opt[0] <= pH <= self.pH_opt[1])
        EC_in = float(self.EC_opt[0] <= EC <= self.EC_opt[1])

        # Stability reward component (closeness to center)
        pH_center = 1.0 - abs((pH - np.mean(self.pH_opt)) / 1.0)
        EC_center = 1.0 - abs((EC - np.mean(self.EC_opt)) / 1.0)

        # Action cost (resource efficiency)
        action_cost = 0.02 if action != 1 else 0.0

        reward = 0.5 * (pH_in + EC_in) + 0.3 * (pH_center + EC_center) - action_cost

        # Penalty for large deviations
        if pH < 5.0 or pH > 7.0:
            reward -= 0.5
        if EC < 0.8 or EC > 3.0:
            reward -= 0.5

        done = self.step_count >= self.episode_length

        info = {
            'pH': pH,
            'EC': EC,
            'Temp': Temp,
            'Humidity': Humidity,
            'Plant_Age': Plant_Age
        }

        return self.state.copy(), float(reward), bool(done), False, info

    def render(self, mode='human'):
        print(f"Step {self.step_count} | pH={self.state[0]:.2f} | EC={self.state[1]:.2f}")


# Quick sanity check
env = HydroponicEnv(episode_length=168, seed=SEED)
obs, _ = env.reset(seed=SEED)
print('Initial observation:', obs)


In [None]:
# ------------------------------
# Data simulation: generate 10,000-step dataset using a simple PID-like controller
# ------------------------------

def pid_controller(state, pH_target=6.0, EC_target=1.8):
    """Simple proportional controller mapping error to discrete action."""
    pH, EC, Temp, Humidity, Plant_Age = state
    # Combine errors (pH and EC) to decide an action
    pH_err = pH_target - pH
    EC_err = EC_target - EC

    # Weighted error
    err = 0.6 * pH_err + 0.4 * (EC_err / 1.0)

    # Proportional mapping to discrete action
    if err > 0.05:
        return 2  # increase
    elif err < -0.05:
        return 0  # decrease
    else:
        return 1  # hold


def generate_dataset(env, controller_fn, steps=10000, seed=SEED):
    data = []
    obs, _ = env.reset(seed=seed)
    for t in range(steps):
        action = controller_fn(obs)
        next_obs, reward, done, _, info = env.step(action)
        data.append({
            'timestamp': t,
            'pH': obs[0],
            'EC': obs[1],
            'Temp': obs[2],
            'Humidity': obs[3],
            'Plant_Age': obs[4],
            'action': action,
            'reward': reward
        })
        obs = next_obs
        if done:
            obs, _ = env.reset()
    return pd.DataFrame(data)

# Use a fresh env for dataset generation to avoid interfering with training env state
sim_env = HydroponicEnv(episode_length=1000, seed=SEED)
df_sim = generate_dataset(sim_env, pid_controller, steps=10000)

print('Dataset shape:', df_sim.shape)
df_sim.head()

In [None]:
# Visualize a segment of the simulated data
plt.figure(figsize=(14, 5))
plt.plot(df_sim['timestamp'].iloc[:1000], df_sim['pH'].iloc[:1000], label='pH')
plt.plot(df_sim['timestamp'].iloc[:1000], df_sim['EC'].iloc[:1000], label='EC')
plt.axhspan(5.8, 6.2, color='green', alpha=0.1, label='pH optimal')
plt.axhspan(1.6, 2.0, color='blue', alpha=0.05, label='EC optimal')
plt.legend()
plt.title('Simulated nutrient time-series (first 1000 steps)')
plt.xlabel('Step')
plt.show()

# Quick stats
df_sim[['pH', 'EC', 'action', 'reward']].describe().T

In [None]:
# ------------------------------
# Baseline: PID controller evaluation over multiple episodes
# ------------------------------

def evaluate_controller(env, controller_fn, episodes=30):
    metrics = []
    for ep in range(episodes):
        obs, _ = env.reset()
        total_reward = 0.0
        ph_vals = []
        ec_vals = []
        actions = []
        for t in range(env.episode_length):
            action = controller_fn(obs)
            obs, reward, done, _, info = env.step(action)
            total_reward += reward
            ph_vals.append(info['pH'])
            ec_vals.append(info['EC'])
            actions.append(action)
            if done:
                break
        metrics.append({
            'episode': ep,
            'total_reward': total_reward,
            'pH_var': np.var(ph_vals),
            'EC_var': np.var(ec_vals),
            'mean_actions': np.mean(actions)
        })
    return pd.DataFrame(metrics)

baseline_env = HydroponicEnv(episode_length=168, seed=SEED)
baseline_metrics = evaluate_controller(baseline_env, pid_controller, episodes=40)
print('Baseline mean reward:', baseline_metrics['total_reward'].mean())
baseline_metrics.describe().T

In [None]:
# ------------------------------
# Train RL agent (PPO) with Monitor and evaluation callback
# ------------------------------

train_env = Monitor(HydroponicEnv(episode_length=168, seed=SEED))
vec_env = DummyVecEnv([lambda: train_env])

policy_kwargs = dict(net_arch=[dict(pi=[64, 64], vf=[64, 64])])
model = PPO('MlpPolicy', vec_env, verbose=1, seed=SEED, policy_kwargs=policy_kwargs)

# Evaluate every 5k timesteps on a separate eval env
eval_env = Monitor(HydroponicEnv(episode_length=168, seed=SEED + 1))
eval_callback = EvalCallback(eval_env, best_model_save_path='./logs/', log_path='./logs/', eval_freq=5000, n_eval_episodes=5, deterministic=True)

# Train (short demo training; increase timesteps for production)
TIMESTEPS = 50000
model.learn(total_timesteps=TIMESTEPS, callback=eval_callback)

# Save final policy
model.save('nutrient_agent')
print('Training complete; model saved as nutrient_agent.zip')

In [None]:
# ------------------------------
# Evaluate trained agent vs baseline PID: reward curves and stability
# ------------------------------

def run_episode_collect(env, agent=None, controller_fn=None):
    obs, _ = env.reset()
    rewards = []
    ph_vals = []
    ec_vals = []
    actions = []
    for t in range(env.episode_length):
        if agent is not None:
            action, _ = agent.predict(obs, deterministic=True)
            action = int(action)
        else:
            action = controller_fn(obs)
        obs, reward, done, _, info = env.step(action)
        rewards.append(reward)
        ph_vals.append(info['pH'])
        ec_vals.append(info['EC'])
        actions.append(action)
        if done:
            break
    return {
        'rewards': np.array(rewards),
        'pH': np.array(ph_vals),
        'EC': np.array(ec_vals),
        'actions': np.array(actions)
    }

# Evaluate on several episodes
eval_env = HydroponicEnv(episode_length=336, seed=SEED + 2)
agent_results = run_episode_collect(eval_env, agent=model)

# Reset eval env and run PID baseline on same env type
eval_env2 = HydroponicEnv(episode_length=336, seed=SEED + 3)
pid_results = run_episode_collect(eval_env2, controller_fn=pid_controller)

# Plot episode rewards
plt.plot(np.cumsum(agent_results['rewards']), label='PPO (agent)')
plt.plot(np.cumsum(pid_results['rewards']), label='PID baseline')
plt.title('Cumulative Episode Reward (single episode comparison)')
plt.xlabel('Step')
plt.ylabel('Cumulative Reward')
plt.legend()
plt.show()

# Stability metrics
def stability_metrics(results):
    return {
        'avg_reward': results['rewards'].sum(),
        'pH_var': float(np.var(results['pH'])),
        'EC_var': float(np.var(results['EC'])),
        'resource_use': float(np.sum(results['actions'] != 1))  # count non-hold actions
    }

agent_metrics = stability_metrics(agent_results)
pid_metrics = stability_metrics(pid_results)

pd.DataFrame([agent_metrics, pid_metrics], index=['PPO_agent', 'PID_baseline'])

In [None]:
# ------------------------------
# State-Action heatmap and policy interpretation
# ------------------------------

# Collect a longer set of (pH, action) pairs from the trained agent
def collect_state_action(agent, env, steps=2000):
    obs, _ = env.reset()
    phs = []
    ecs = []
    acts = []
    for _ in range(steps):
        action, _ = agent.predict(obs, deterministic=True)
        action = int(action)
        obs, _, done, _, info = env.step(action)
        phs.append(info['pH'])
        ecs.append(info['EC'])
        acts.append(action)
        if done:
            obs, _ = env.reset()
    return np.array(phs), np.array(ecs), np.array(acts)

phs, ecs, acts = collect_state_action(model, HydroponicEnv(episode_length=1000, seed=SEED + 4), steps=3000)

# Heatmap by pH bins
bins = np.linspace(4.5, 7.5, 31)
ph_bin_idx = np.digitize(phs, bins) - 1
heat = np.zeros((len(bins), 3))
for b, a in zip(ph_bin_idx, acts):
    if 0 <= b < len(bins):
        heat[b, a] += 1

# Normalize per bin
heat_norm = (heat.T / (heat.sum(axis=1) + 1e-8)).T

plt.figure(figsize=(8, 5))
plt.imshow(heat_norm.T, aspect='auto', origin='lower', extent=[bins[0], bins[-1], -0.5, 2.5], cmap='viridis')
plt.yticks([0, 1, 2], ['dec', 'hold', 'inc'])
plt.colorbar(label='Fraction of actions')
plt.xlabel('pH')
plt.title('State-Action heatmap (agent): action fraction by pH')
plt.show()

# Simple operator rule extraction (conditional averages)
rule_df = pd.DataFrame({'pH': phs, 'EC': ecs, 'action': acts})
# Compute mean action at coarse pH buckets
rule_summary = rule_df.groupby(pd.cut(rule_df['pH'], bins=np.linspace(4.5, 7.5, 7)))['action'].mean()
print('Rule summary (mean discrete action by pH bucket):')
print(rule_summary)


In [None]:
# ------------------------------
# Simulate agent vs PID on same initial seed and overlay pH/EC profiles
# ------------------------------

seeded_env_agent = HydroponicEnv(episode_length=336, seed=1234)
seeded_env_pid = HydroponicEnv(episode_length=336, seed=1234)

agent_run = run_episode_collect(seeded_env_agent, agent=model)
pid_run = run_episode_collect(seeded_env_pid, controller_fn=pid_controller)

plt.figure(figsize=(14, 5))
plt.plot(agent_run['pH'], label='PPO pH')
plt.plot(pid_run['pH'], label='PID pH', alpha=0.8)
plt.axhspan(5.8, 6.2, color='green', alpha=0.1)
plt.legend()
plt.title('pH â€” Agent vs PID (same seed)')
plt.show()

plt.figure(figsize=(14, 5))
plt.plot(agent_run['EC'], label='PPO EC')
plt.plot(pid_run['EC'], label='PID EC', alpha=0.8)
plt.axhspan(1.6, 2.0, color='blue', alpha=0.05)
plt.legend()
plt.title('EC â€” Agent vs PID (same seed)')
plt.show()


In [None]:
# ------------------------------
# Business-friendly evaluation metrics and interpretations
# ------------------------------

# Average Reward per Episode (higher is better)
agent_eval = []
for i in range(10):
    res = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 10 + i), agent=model)
    agent_eval.append(res['rewards'].sum())

baseline_eval = []
for i in range(10):
    res = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 100 + i), controller_fn=pid_controller)
    baseline_eval.append(res['rewards'].sum())

print('Avg reward (agent):', np.mean(agent_eval))
print('Avg reward (PID):', np.mean(baseline_eval))

# Stability index: lower variance in pH and EC is better
agent_stability = []
baseline_stability = []
for i in range(10):
    a = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 20 + i), agent=model)
    p = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 120 + i), controller_fn=pid_controller)
    agent_stability.append(np.var(a['pH']) + np.var(a['EC']))
    baseline_stability.append(np.var(p['pH']) + np.var(p['EC']))

print('Stability index (agent mean):', np.mean(agent_stability))
print('Stability index (PID mean):', np.mean(baseline_stability))

# Efficiency score: resource use per unit reward (lower is better)
agent_eff = []
baseline_eff = []
for i in range(10):
    a = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 30 + i), agent=model)
    p = run_episode_collect(HydroponicEnv(episode_length=336, seed=SEED + 130 + i), controller_fn=pid_controller)
    agent_eff.append(np.sum(a['actions'] != 1) / (np.sum(a['rewards']) + 1e-6))
    baseline_eff.append(np.sum(p['actions'] != 1) / (np.sum(p['rewards']) + 1e-6))

print('Resource per reward (agent mean):', np.mean(agent_eff))
print('Resource per reward (PID mean):', np.mean(baseline_eff))

print('\nBusiness interpretation:')
print('- Higher average reward indicates better overall nutrient health control (less crop stress).')
print('- Lower stability index means pH/EC remain steadier, reducing crop failure risk and manual interventions.')
print('- Efficiency score balances dosing frequency against control quality; lower is more economical.')


In [None]:
# ------------------------------
# Export & Deployment Ready
# ------------------------------

# Save agent
model.save('nutrient_agent')
print('Agent saved: nutrient_agent.zip')

# Sample inference snippet for edge device / real-time

sample_obs, _ = HydroponicEnv().reset()
print('Sample obs:', sample_obs)

# In production you would load and call:
# from stable_baselines3 import PPO
# agent = PPO.load('nutrient_agent')
# action, _ = agent.predict(observation, deterministic=True)
# then send action to actuator (pump control) with safeguards/limits

# Deployment checklist (brief):
# - Wrap the model in a lightweight service (FastAPI) that receives sensor JSON and returns action + confidence
# - Add safety checks & action rate limits to avoid hardware stress (e.g., max 1 dose change / hour)
# - Implement logging, anomaly detection, and model drift monitoring

print('\nDeployment ready: model saved and sample inference snippet included.')