In [None]:
import random
import math
import statistics
import numpy as np
import matplotlib.pyplot as plt

SEED = 42
random.seed(SEED)
np.random.seed(SEED)

N_PLOTS = 50
HOURS = 24 * 7
TRAIN_HOURS = 24 * 3
EPISODES = 300
DT = 1

NUM_DRONES = 2
NUM_AGV = 1
BATTERY_MIN_PER_TRIP = 20
SERVICE_MIN_PER_PLOT = 5
PLOTS_PER_TRIP = max(1, BATTERY_MIN_PER_TRIP // SERVICE_MIN_PER_PLOT)
TOTAL_SERVICE_CAPACITY_PER_HOUR = PLOTS_PER_TRIP * (NUM_DRONES + NUM_AGV)

BASE_WEEKLY_WATER_LITERS = 1_000_000
WEEKLY_WATER_QUOTA_LITERS = BASE_WEEKLY_WATER_LITERS * 0.7

HEATWAVE_START = 0
HEATWAVE_DURATION_HOURS = 72
HEATWAVE_MULT = 1.5

PESTICIDE_EFFECTIVENESS = 0.6

ACTIONS = {
    0: "no_action",
    1: "irrigate_low",
    2: "irrigate_med",
    3: "irrigate_high",
    4: "pesticide_spray"
}
N_ACTIONS = len(ACTIONS)
WATER_PER_ACTION = {1: 50, 2: 120, 3: 250}

HIGH_RISK_PLOTS = random.sample(range(N_PLOTS), 2)

def make_initial_plots():
    plots = []
    for i in range(N_PLOTS):
        p = {
            "id": i,
            "soil_capacity": random.uniform(0.2, 0.6),
            "crop_stage": random.choice([0,1,2]),
            "pest_vulnerability": random.uniform(0.0,1.0),
            "moisture": random.uniform(0.25,0.8),
            "pest": random.uniform(0.0,0.2) + (0.4 if i in HIGH_RISK_PLOTS else 0.0)
        }
        plots.append(p)
    return plots

def discretize_moisture(m): return min(4, max(0, int(m*5)))
def discretize_pest(p): return min(4, max(0, int(p*5)))

def discretize_time_of_day(hour):
    h = hour % 24
    if 0 <= h < 6: return 0
    if 6 <= h < 12: return 1
    if 12 <= h < 18: return 2
    return 3

def discretize_water_ratio(r): return min(4, max(0, int(r*5)))

def state_to_index(m,p,t,w): return (((m*5+p)*4+t)*5+w)

def step_env(state_plots, actions, hour, water_remaining):
    new_plots=[p.copy() for p in state_plots]
    water_used=0
    service_used=0
    rewards=[]
    actionable=[i for i,a in enumerate(actions) if a!=0]
    executed=set(random.sample(actionable, min(len(actionable), TOTAL_SERVICE_CAPACITY_PER_HOUR)))

    for i,act in enumerate(actions):
        p=new_plots[i]
        base_et=0.02+0.01*p["crop_stage"]
        et_mult=HEATWAVE_MULT if hour<HEATWAVE_DURATION_HOURS else 1
        p["moisture"]-=base_et*et_mult

        pest_growth=(0.005+0.01*p["pest_vulnerability"])*(0.5+p["moisture"])
        if hour<HEATWAVE_DURATION_HOURS: pest_growth*=1.5
        p["pest"]=min(1,p["pest"]+pest_growth)

        if act!=0 and i in executed:
            service_used+=1
            if act in WATER_PER_ACTION:
                vol=WATER_PER_ACTION[act]
                actual=min(vol, water_remaining)
                water_remaining-=actual
                water_used+=actual
                p["moisture"]=min(1,p["moisture"]+(actual/300)*p["soil_capacity"])
            elif act==4:
                p["pest"]*= (1-PESTICIDE_EFFECTIVENESS)

        if p["moisture"]<0: p["moisture"]=0

        target=p["soil_capacity"]*0.7
        moisture_score=1-min(1, abs(p["moisture"]-target))
        pest_score=1-p["pest"]
        rewards.append((moisture_score,pest_score))

    return new_plots, water_used, service_used, rewards, water_remaining

def compute_reward_plot(mscore, pscore, water_used_plot, water_ratio):
    reward=(0.6*mscore+0.4*pscore)
    reward-=0.0005*water_used_plot
    if water_ratio<0.05: reward-=5
    return reward

def baseline_policy(plot, hour):
    thr=0.4*plot["soil_capacity"]
    if plot["pest"]>0.4: return 4
    if plot["moisture"]<thr:
        deficit=thr-plot["moisture"]
        if deficit>0.2:return 3
        if deficit>0.08:return 2
        return 1
    return 0

N_STATES=5*5*4*5
Q=np.zeros((N_STATES, N_ACTIONS))
alpha=0.1
gamma=0.95
epsilon=0.2

def get_state_idx(p,hour,water):
    m=discretize_moisture(p["moisture"])
    pest=discretize_pest(p["pest"])
    t=discretize_time_of_day(hour)
    w=discretize_water_ratio(water/WEEKLY_WATER_QUOTA_LITERS)
    return state_to_index(m,pest,t,w)

def choose_action(sidx,eps=0): 
    return random.randrange(N_ACTIONS) if random.random()<eps else int(np.argmax(Q[sidx]))

def train_q_learning():
    for ep in range(EPISODES):
        plots=make_initial_plots()
        water=WEEKLY_WATER_QUOTA_LITERS
        for hour in range(TRAIN_HOURS):
            acts=[]; states=[]
            for p in plots:
                s=get_state_idx(p,hour,water)
                states.append(s)
                acts.append(choose_action(s,eps=epsilon))
            new_plots, water_used,_,reward_comp,water = step_env(plots,acts,hour,water)

            for i,p in enumerate(plots):
                mscore,pscore=reward_comp[i]
                a=acts[i]
                w_used=WATER_PER_ACTION[a] if a in WATER_PER_ACTION else 0
                r=compute_reward_plot(mscore,pscore,w_used, water/WEEKLY_WATER_QUOTA_LITERS)
                s=states[i]
                ns=get_state_idx(new_plots[i],hour+1,water)
                Q[s,a]+=alpha*(r+gamma*np.max(Q[ns])-Q[s,a])
            plots=new_plots
    print("Training finished.")

def run_policy(tag):
    plots=make_initial_plots()
    water=WEEKLY_WATER_QUOTA_LITERS
    m_hist=[]; p_hist=[]; total_water=0
    for hour in range(HOURS):
        acts=[]
        for p in plots:
            if tag=="qlearn": 
                a=choose_action(get_state_idx(p,hour,water))
            else:
                a=baseline_policy(p,hour)
            acts.append(a)
        new_plots, water_used,_,_,water = step_env(plots,acts,hour,water)
        total_water+=water_used
        m_hist.append(np.mean([pl["moisture"] for pl in new_plots]))
        p_hist.append(np.mean([pl["pest"] for pl in new_plots]))
        plots=new_plots
    return total_water, m_hist, p_hist

train_q_learning()
wq,mq,pq=run_policy("qlearn")
wb,mb,pb=run_policy("baseline")

print("Q-learning water:", wq)
print("Baseline water:", wb)

plt.plot(mq,label="Q Moisture")
plt.plot(mb,label="Baseline Moisture")
plt.legend(); plt.show()

plt.plot(pq,label="Q Pest")
plt.plot(pb,label="Baseline Pest")
plt.legend(); plt.show()
