In [None]:
import os
import pickle 
import random
import warnings

import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np
import pandas as pd

import torch

import sys
sys.path.insert(0, '..')
from src.data import TimeSeries
from src.methods import rl
from src.environment import TimeSeriesEnv

In [None]:
# GLOBALS
SEED = 3141
random.seed(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)

warnings.filterwarnings('ignore', category=FutureWarning)

In [None]:
for dir in os.listdir('../data'):
    if dir in ['nitrogen-generator','wrapper-machine']:
        continue

    print(dir.upper())
    # Data loading
    data_dir_path = os.path.join('../data', dir)
    
    X_train = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_train.pkl'), 'rb'))
    X_valid = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_valid.pkl'), 'rb'))
    X_test = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_test.pkl'), 'rb'))
    y_train = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_train.pkl'), 'rb'))
    y_valid = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_valid.pkl'), 'rb'))
    y_test = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_test.pkl'), 'rb'))
    

    # Data loading
    data_dir_path = os.path.join('../data', dir)
    if dir == 'nitrogen-generator':
        tf = '%Y-%m-%d %H:%M:%S'
    else:
        tf = '%Y-%m-%d %H:%M:%S.%f'

    train_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'train.csv')
    )
    valid_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'val.csv')
    )
    test_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'test.csv')
    )

    # Data prep
    train_ts.parse_datetime('timestamp', tf)
    valid_ts.parse_datetime('timestamp', tf)
    test_ts.parse_datetime('timestamp', tf)

    train_ts.split_by_day()
    valid_ts.split_by_day()
    test_ts.split_by_day()

    temp = pd.concat(
        train_ts.time_series[k].drop(
            columns=['timestamp','PW_0.5h','date','time']
        ) for k in train_ts.time_series.keys()
    )

    FEATURE_COLS = [
        c for c in temp.columns if np.std(temp[c])!=0
    ]
    LABEL_COL = 'PW_0.5h'

    temp = None
    del temp

    # Make environments
    train_env = TimeSeriesEnv(train_ts, FEATURE_COLS, LABEL_COL, True)
    valid_env = TimeSeriesEnv(valid_ts, FEATURE_COLS, LABEL_COL, False)
    test_env = TimeSeriesEnv(test_ts, FEATURE_COLS, LABEL_COL, False)

    # Model prep
    hid_dim = 0
    for i in range(10):
        if 2**i > len(FEATURE_COLS):
            hid_dim = 2**(i+1)
            break
    print('Features: ', len(FEATURE_COLS))
    print('Hidden Dim: ',hid_dim)
    alpha_stop = rl.NeuralNetGuidedMCTS(
        in_dim=len(FEATURE_COLS),
        hid_dim=hid_dim,
        save_dir=os.path.join('../results/rl',dir),
        n_actions=2,
        n_sim=100,
        lr=1e-4,
        weight_decay=0.01,
        gamma=0.999,
        bsz=64,
        device='cpu'
    )

    # Train
    epochs = 100
    train_actions, train_rewards = alpha_stop.train(epochs, train_env, valid_env)

    # Load and test best model
    alpha_stop.net.load_state_dict(
        torch.load(
            os.path.join('../results/rl',dir,'network.pt')
        )
    )
    alpha_stop.mcts = pickle.load(open(os.path.join('../results/rl',dir,'mcts.pkl'),'rb'))
    test_actions, test_rewards = alpha_stop.run(test_env)
    
    # Save output
    pickle.dump(train_actions, open(os.path.join('../results/rl',dir,'train_actions.pkl'), 'wb'))
    pickle.dump(train_rewards, open(os.path.join('../results/rl',dir,'train_rewards.pkl'), 'wb'))
    pickle.dump(test_actions, open(os.path.join('../results/rl',dir,'test_actions.pkl'), 'wb'))
    pickle.dump(test_rewards, open(os.path.join('../results/rl',dir,'test_rewards.pkl'), 'wb'))

print("<< PROCESS COMPELTE >>")

In [None]:
for dir in os.listdir('../data'):
    if dir in ['nitrogen-generator','wrapper-machine']:
        continue

    print(dir.upper())
    # Data loading
    data_dir_path = os.path.join('../data', dir)
    
    X_train = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_train.pkl'), 'rb'))
    X_valid = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_valid.pkl'), 'rb'))
    X_test = pickle.load(open(os.path.join('../data',dir,'preprocessed/X_test.pkl'), 'rb'))
    y_train = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_train.pkl'), 'rb'))
    y_valid = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_valid.pkl'), 'rb'))
    y_test = pickle.load(open(os.path.join('../data',dir,'preprocessed/y_test.pkl'), 'rb'))
    

    # Data loading
    data_dir_path = os.path.join('../data', dir)
    if dir == 'nitrogen-generator':
        tf = '%Y-%m-%d %H:%M:%S'
    else:
        tf = '%Y-%m-%d %H:%M:%S.%f'

    train_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'train.csv')
    )
    valid_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'val.csv')
    )
    test_ts = TimeSeries.from_csv(
        'pandas',
        os.path.join(data_dir_path, 'test.csv')
    )

    # Data prep
    train_ts.parse_datetime('timestamp', tf)
    valid_ts.parse_datetime('timestamp', tf)
    test_ts.parse_datetime('timestamp', tf)

    train_ts.split_by_day()
    valid_ts.split_by_day()
    test_ts.split_by_day()

    temp = pd.concat(
        train_ts.time_series[k].drop(
            columns=['timestamp','PW_0.5h','date','time']
        ) for k in train_ts.time_series.keys()
    )

    FEATURE_COLS = [
        c for c in temp.columns if np.std(temp[c])!=0
    ]
    LABEL_COL = 'PW_0.5h'

    temp = None
    del temp

    # Make environments
    train_env = TimeSeriesEnv(train_ts, FEATURE_COLS, LABEL_COL, True)
    valid_env = TimeSeriesEnv(valid_ts, FEATURE_COLS, LABEL_COL, False)
    test_env = TimeSeriesEnv(test_ts, FEATURE_COLS, LABEL_COL, False)

    # Model prep
    hid_dim = 0
    for i in range(10):
        if 2**i > len(FEATURE_COLS):
            hid_dim = 2**(i+1)
            break
    print('Features: ', len(FEATURE_COLS))
    print('Hidden Dim: ',hid_dim)
    ppo = rl.PPOAgent(
        in_dim=len(FEATURE_COLS),
        hid_dim=hid_dim,
        clip=0.2,
        update_freq=10,
        save_dir=os.path.join('../results/rl',dir),
        lr=3e-4,
        gamma=0.99,
        bsz=64,
        device='cuda'
    )

    # Train
    epochs = 1000
    train_actions, train_rewards = ppo.train(epochs, train_env, valid_env)

    # Load and test best model
    ppo.policy_net.load_state_dict(
        torch.load(
            os.path.join('../results/rl',dir,'ppo-policy-network.pt')
        )
    )
    test_actions, test_rewards = ppo.run(test_env)
    
    # Save output
    pickle.dump(train_actions, open(os.path.join('../results/rl',dir,'ppo_train_actions.pkl'), 'wb'))
    pickle.dump(train_rewards, open(os.path.join('../results/rl',dir,'ppo_train_rewards.pkl'), 'wb'))
    pickle.dump(test_actions, open(os.path.join('../results/rl',dir,'ppo_test_actions.pkl'), 'wb'))
    pickle.dump(test_rewards, open(os.path.join('../results/rl',dir,'ppo_test_rewards.pkl'), 'wb'))

print("<< PROCESS COMPELTE >>")

In [None]:
sns.lineplot(train_rewards)

In [None]:
sns.lineplot(train_actions)

In [None]:
test_actions