In [None]:
%reload_ext autoreload
%autoreload 2
import wandb
import pandas as pd 
import torch
import numpy as np
import random 

import src.const as const
import src.env.env_basic as env_basic
import src.agents.agent_greedy as agent_greedy
import src.agents.agent_nerdy as agent_nerdy
import src.agents.agent_ppo as agent_ppo
import src.simulation.water_demands as wd 

## agent nerdy 

In [None]:
seed = 242
num_leaks = 12
df = wd.load(seed, num_leaks)

In [None]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
    'env': 'EnvComplexR',
}
# test on last 30 days
env = env_basic.EnvComplexR(df, train=False, obs_len=10)

agent = agent_nerdy.AgentAlways()
run = wandb.init(project= 'water_demand_rl', config=args, monitor_gym=True)
run.name = f"agent_always_{run.id}"
_obs, info = env.reset()
done = False

sum_rewards = 0
while not done:
    action = agent.take_action(_obs)
    _obs, reward, done, info, _ = env.step(action)

    # log to wandb
    sum_rewards += reward
    run.log({'test_action': action, "test_step_reward": reward, 'test_sum_reward': sum_rewards})
run.finish()

In [None]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
    'env': 'EnvComplexR',
}
# test on last 30 days
env = env_basic.EnvComplexR(df, train=False, obs_len=10)
agent = agent_nerdy.AgentNever()
run = wandb.init(project= 'water_demand_rl', config=args, monitor_gym=True)
run.name = f"agent_never_{run.id}"
_obs = env.reset()
done = False

sum_rewards = 0
while not done:
    action = agent.take_action(_obs)
    _obs, reward, done, info, _ = env.step(action)

    # log to wandb
    sum_rewards += reward
    run.log({'test_action': action, "test_step_reward": reward, 'test_sum_reward': sum_rewards})
run.finish()

## agent greedy

In [None]:
seed = 242
num_leaks = 12
df = wd.load(seed, num_leaks)
epochs = 10

In [None]:
args = {
    'seed': seed,
    'num_leaks': num_leaks,
    'eps': 0,
    'epochs': epochs,
    'Env': 'EnvComplexR',
}


for eps in [0.01, 0.1, 0.2]:
    args['eps'] = eps
    run = wandb.init(project= 'water_demand_rl', config=args, monitor_gym=True)
    run.name = f"agent_greedy_{run.id}"

    env = env_basic.EnvComplexR(df, train=True, obs_len=10)
    agent = agent_greedy.AgentGreedy()
    # train 
    episode_return = 0
    for epiosed in range(epochs):
        _obs,info = env.reset()
        done = False
        while not done:
            action = agent.take_action(eps=eps)
            _obs, reward, done, info, _  = env.step(action)
            agent.update(action, reward)

            # log to wandb
            episode_return += reward
        run.log({'epiosed': epiosed, 'epiosed_sum_reward': episode_return})

    # memory dump
    data_path = f'data/train/eps_greedy_record_{run.id}.npy'
    agent.dump_record(data_path)

    # test on last 30 days
    env.train = False
    agent.load_record(data_path) # load memory

    _obs,info = env.reset()
    done = False

    sum_rewards = 0
    while not done:
        action = agent.take_action(eps=eps)
        _obs, reward, done, info,_  = env.step(action)
        
        # log to wandb
        sum_rewards += reward
        run.log({'test_action': action, "test_step_reward": reward, 'test_sum_reward': sum_rewards})
    run.finish()

# Plot results 

In [None]:
import wandb 
import pandas as pd
import matplotlib.pyplot as plt


In [None]:
api = wandb.Api()
runs = api.runs(path = "adamzh0u/water_demand_rl")

In [None]:
## get records from wandb
dic_test_sum_rewards = {}

for run in runs:
    name = run.name
    config = run.config
    print(name, config)
    history = run.scan_history(keys=["test_sum_reward"])
    test_sum_rewards = [row['test_sum_reward'] for row in history]
    dic_test_sum_rewards[name] = test_sum_rewards

dic_train_sum_rewards = {}
for run in runs:
    name = run.name
    history = run.scan_history(keys=["epiosed_sum_reward"])
    train_sum_rewards = [row['epiosed_sum_reward'] for row in history]
    dic_train_sum_rewards[name] = train_sum_rewards

In [None]:
dic_labels1 = { 
                'agent_greedy_w6vkzwta': 'Greedy $\epsilon = 0.01$',
                'agent_greedy_3trkrt17': 'Greedy $\epsilon = 0.1$',
                'agent_greedy_fv9ax2yk': 'Greedy $\epsilon = 0.2$',
                'agent_always_wt7zj6fu':'Always',
                'agent_never_rch83t0r':'Never',}
dic_labels2 = { 
                'agent_dqn_jtsyim1g': 'DQN $\gamma = 0.5$',
                'agent_dqn_is8m298o':'DQN $\gamma = 0.2$',
                'agent_dqn_fn67te1w':'DQN $\gamma = 0.7$',
                # 'agent_greedy_w6vkzwta': 'Greedy $\epsilon = 0.01$',
                # 'agent_always_fn67te1w':'Always',
                # 'agent_never_rch83t0r':'Never',
                }
dic_labels3= { 'agent_dqn_sxuseuwu':'DQN $epoch=500$',
                'agent_greedy_w6vkzwta': 'Greedy $\epsilon = 0.01$',
                # 'agent_greedy_3trkrt17': 'Greedy $\epsilon = 0.1$',
                # 'agent_greedy_fv9ax2yk': 'Greedy $\epsilon = 0.2$',
                # 'agent_always_fn67te1w':'Always',
                'agent_never_rch83t0r':'Never',}

In [None]:
import src.utils.plot_utils as pu
import matplotlib.pyplot as plt
pu.setup_mpl(as_default=0)


fig,ax = plt.subplots(2,2, figsize=(3.60236*2, 7),dpi=300)
ax = ax.flatten()

train_sum_rewards = dic_train_sum_rewards['agent_dqn_sxuseuwu']
smooth_record = np.convolve(train_sum_rewards, np.ones(10)/10, mode='valid')
ax[0].plot(smooth_record)
ax[0].set_xlabel('Epochs')
ax[0].set_ylabel('Train sum reward for each epoch')

for name in dic_labels1.keys():
    record = [0] + dic_test_sum_rewards[name]
    ax[1].plot(record, label=dic_labels1[name])
ax[1].set_xlabel('Steps')
ax[1].set_ylabel('Test sum reward')
ax[1].legend(frameon=False)

for name in dic_labels2.keys():
    record = [0] + dic_test_sum_rewards[name]
    ax[2].plot(record, label=dic_labels2[name])
ax[2].set_xlabel('Steps')
ax[1].set_ylabel('Test sum reward')
ax[2].legend(frameon=False)


for name in dic_labels3.keys():
    record = [0] + dic_test_sum_rewards[name]
    ax[3].plot(record, label=dic_labels3[name])
ax[3].set_xlabel('Steps')
ax[3].legend(frameon=False)

for i in range(4):
    # label A B 
    ax[i].text(-0.1, 1.01, chr(65+i), transform=ax[i].transAxes,
            size=12, weight='bold')
fig.savefig('fig/fig_results.png', bbox_inches='tight')