In [6]:
%reload_ext autoreload
%autoreload 2
import wandb
import pandas as pd 
import torch
import numpy as np
import random 
import src.const as const
import src.env.env_basic as env_basic
import src.agents.agent_greedy as agent_greedy
import src.agents.agent_nerdy as agent_nerdy
import src.simulation.water_demands as wd 

## agent nerdy 

In [7]:
seed = 41
num_leaks = 12
df = wd.load(seed, num_leaks)

In [None]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
}

run = wandb.init(name=f'agent_always',project= 'cege_test', config=args, monitor_gym=True)

# test on last 30 days
env = env_basic.WaterLeakEnv(df, train=False)
agent = agent_nerdy.AgentAlways()

_obs = env.reset()
done = False

sum_rewards = 0
while not done:
    action = agent.get_action()
    _obs, reward, done, info = env.step(action)

    # log to wandb
    sum_rewards += reward
    run.log({'tindex': info['step']-17520, 'taction': action, "treward": reward, 'tsum_rewards': sum_rewards})
run.finish()

In [None]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
}

run = wandb.init(name=f'agent_never',project= 'cege_test', config=args, monitor_gym=True)

# test on last 30 days
env = env_basic.WaterLeakEnv(df, train=False)
agent = agent_nerdy.AgentNever()

_obs = env.reset()
done = False

sum_rewards = 0
while not done:
    action = agent.get_action()
    _obs, reward, done, info = env.step(action)

    # log to wandb
    sum_rewards += reward
    run.log({'tindex': info['step']-17520, 'taction': action, "treward": reward, 'tsum_rewards': sum_rewards})
run.finish()

## agent greedy

In [18]:
seed = 41
num_leaks = 12
df = wd.load(seed, num_leaks)
epochs = 10

In [19]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
    'eps': 0,
    'epochs': epochs
}


for eps in [0.01, 0.05, 0.1, 0.2, 0.4]:
    args['eps'] = eps

    run = wandb.init(name=f'agent_greedy_eps{eps}_epoch{epochs}',project= 'cege_test', config=args, monitor_gym=True)

    env = env_basic.WaterLeakEnv(df, train=True)
    agent = agent_greedy.AgentGreedy()
    # train 
    sum_rewards = 0
    for i in range(epochs):
        _obs = env.reset()
        done = False
        while not done:
            action = agent.get_action(eps=eps)
            _obs, reward, done, info = env.step(action)
            agent.update_record(action, reward)

            # log to wandb
            sum_rewards += reward
            run.log({'index': info['step'], 'action': action, "reward": reward, 'sum_rewards': sum_rewards})
        # dump record each epoch
        run.log({'record': agent.record})
    # memory dump
    agent.dump_record()

    # test on last 30 days
    env = env_basic.WaterLeakEnv(df, train=False)
    agent = agent_greedy.AgentGreedy()
    agent.load_record() # load memory

    _obs = env.reset()
    done = False

    sum_rewards = 0
    while not done:
        action = agent.get_action(eps=eps)
        _obs, reward, done, info = env.step(action)
        
        # log to wandb
        sum_rewards += reward
        run.log({'tindex': info['step']-17520, 'taction': action, "treward": reward, 'tsum_rewards': sum_rewards})
    run.finish()

record dumped to data/train/eps_greedy_record.npy
record loaded from data/train/eps_greedy_record.npy


VBox(children=(Label(value='0.001 MB of 0.001 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
action,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁█▁▁▁█▁▁█▁██▁▁▁▁▁▁█▁▁▁█▁▁█
index,▂▄▅▇▁▃▅▇▁▃▆█▂▃▅▇▁▃▅▇▂▄▆█▁▃▅▇▁▃▆█▂▄▆▇▁▃▅█
reward,███████████████▁███▁██▁█▁▁██████▁███▁██▁
sum_rewards,███▇▇▇▇▇▇▆▆▆▆▆▅▅▅▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▁▁▁
taction,█▁▁▁▁▁▁▁▁▁█▁▁▁█▁▁▁▁▁▁▁█▁▁▁▁▁▁▁▁▁█▁█▁▁▁▁▁
tindex,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
treward,▁█████████▁███▁███████▁█████████▁█▁█████
tsum_rewards,██▇▇▇▇▇▇▆▇▇▇▆▆▆▇▆▆▅▅▅▅▄▄▄▄▄▄▃▃▃▃▂▂▂▂▂▂▁▁

0,1
action,0
index,17520
reward,1
sum_rewards,-38940
taction,0
tindex,2879
treward,1
tsum_rewards,-612


## agent dqn