In [1]:
#!/usr/bin/env python3
import os
import gym
import ptan
import argparse
import numpy as np

import torch
import torch.optim as optim

from lib import environ, data, models, common, validation

from tensorboardX import SummaryWriter

In [2]:
BATCH_SIZE = 32
BARS_COUNT = 10
TARGET_NET_SYNC = 1000
DEFAULT_STOCKS = "data/YNDX_160101_161231.csv"
DEFAULT_VAL_STOCKS = "data/YNDX_150101_151231.csv"

DEFAULT_RUN_NAME = "result"

GAMMA = 0.99

REPLAY_SIZE = 100000
REPLAY_INITIAL = 10000

REWARD_STEPS = 2

LEARNING_RATE = 0.0000125

STATES_TO_EVALUATE = 1000
EVAL_EVERY_STEP = 1000

EPSILON_START = 1.0
EPSILON_STOP = 0.1
EPSILON_STEPS = 1000000

CHECKPOINT_EVERY_STEP = 1000000
VALIDATION_EVERY_STEP = 100000

In [None]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=True, action="store_true", help="Enable cuda")
    parser.add_argument("--data", default=DEFAULT_STOCKS, help="Stocks file or dir to train on, default=" + DEFAULT_STOCKS)
    parser.add_argument("--year", type=int, help="Year to be used for training, if specified, overrides --data option")
    parser.add_argument("--valdata", default=DEFAULT_VAL_STOCKS, help="Stocks data for validation, default=" + DEFAULT_VAL_STOCKS)
    parser.add_argument("-r", "--run", default=DEFAULT_STOCKS, required=False, help="Run name")
    args, unknown = parser.parse_known_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", args.run)
    os.makedirs(saves_path, exist_ok=True)

    if args.year is not None or os.path.isfile(args.data):
        if args.year is not None:
            stock_data = data.load_year_data(args.year)
        else:
            stock_data = {"YNDX": data.load_relative(args.data)}
        env = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False, volumes=False)
        env_tst = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    elif os.path.isdir(args.data):
        env = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
        env_tst = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    else:
        raise RuntimeError("No data to train on")
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)

    val_data = {"YNDX": data.load_relative(args.valdata)}
    env_val = environ.StocksEnv(val_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)

    writer = SummaryWriter(comment="-simple-" + args.run)
    net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device)
    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.EpsilonGreedyActionSelector(EPSILON_START)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, GAMMA, steps_count=REWARD_STEPS)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source, REPLAY_SIZE)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

    # main training loop
    step_idx = 0
    eval_states = None
    best_mean_val = None

    with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker:
        while True:
            step_idx += 1
            buffer.populate(1)
            selector.epsilon = max(EPSILON_STOP, EPSILON_START - step_idx / EPSILON_STEPS)

            new_rewards = exp_source.pop_rewards_steps()
            if new_rewards:
                reward_tracker.reward(new_rewards[0], step_idx, selector.epsilon)

            if len(buffer) < REPLAY_INITIAL:
                continue

            if eval_states is None:
                print("Initial buffer populated, start training")
                eval_states = buffer.sample(STATES_TO_EVALUATE)
                eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
                eval_states = np.array(eval_states, copy=False)

            if step_idx % EVAL_EVERY_STEP == 0:
                mean_val = common.calc_values_of_states(eval_states, net, device=device)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_v = common.calc_loss(batch, net, tgt_net.target_model, GAMMA ** REWARD_STEPS, device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % TARGET_NET_SYNC == 0:
                tgt_net.sync()

            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                idx = step_idx // CHECKPOINT_EVERY_STEP
                torch.save(net.state_dict(), os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % VALIDATION_EVERY_STEP == 0:
                res = validation.validation_run(env_tst, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(env_val, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)

Reading data/YNDX_160101_161231.csv
Read done, got 131542 rows, 99752 filtered, 0 open prices adjusted
Reading data/YNDX_150101_151231.csv
Read done, got 130566 rows, 104412 filtered, 0 open prices adjusted
720: done 100 games, mean reward -0.212, mean steps 6.29, speed 942.46 f/s, eps 1.00
1340: done 200 games, mean reward -0.204, mean steps 5.83, speed 824.45 f/s, eps 1.00
2028: done 300 games, mean reward -0.202, mean steps 5.87, speed 1265.84 f/s, eps 1.00
2714: done 400 games, mean reward -0.205, mean steps 5.90, speed 1378.43 f/s, eps 1.00
3361: done 500 games, mean reward -0.201, mean steps 5.84, speed 1362.87 f/s, eps 1.00
4001: done 600 games, mean reward -0.201, mean steps 5.78, speed 1139.81 f/s, eps 1.00
4635: done 700 games, mean reward -0.203, mean steps 5.74, speed 1372.99 f/s, eps 1.00
5353: done 800 games, mean reward -0.204, mean steps 5.80, speed 1360.91 f/s, eps 0.99
6025: done 900 games, mean reward -0.203, mean steps 5.80, speed 1054.40 f/s, eps 0.99
6759: done 10

65707: done 9200 games, mean reward -0.194, mean steps 6.25, speed 151.48 f/s, eps 0.93
66398: done 9300 games, mean reward -0.194, mean steps 6.24, speed 149.49 f/s, eps 0.93
67132: done 9400 games, mean reward -0.194, mean steps 6.25, speed 149.07 f/s, eps 0.93
67865: done 9500 games, mean reward -0.194, mean steps 6.25, speed 150.83 f/s, eps 0.93
68654: done 9600 games, mean reward -0.194, mean steps 6.26, speed 149.81 f/s, eps 0.93
69433: done 9700 games, mean reward -0.194, mean steps 6.26, speed 151.02 f/s, eps 0.93
70152: done 9800 games, mean reward -0.194, mean steps 6.26, speed 150.45 f/s, eps 0.93
70892: done 9900 games, mean reward -0.195, mean steps 6.26, speed 151.67 f/s, eps 0.93
71688: done 10000 games, mean reward -0.195, mean steps 6.27, speed 149.07 f/s, eps 0.93
72429: done 10100 games, mean reward -0.195, mean steps 6.27, speed 148.03 f/s, eps 0.93
73233: done 10200 games, mean reward -0.195, mean steps 6.29, speed 149.95 f/s, eps 0.93
74008: done 10300 games, mean

135399: done 18400 games, mean reward -0.196, mean steps 6.64, speed 149.52 f/s, eps 0.86
136207: done 18500 games, mean reward -0.197, mean steps 6.64, speed 149.97 f/s, eps 0.86
136994: done 18600 games, mean reward -0.198, mean steps 6.65, speed 151.14 f/s, eps 0.86
137759: done 18700 games, mean reward -0.196, mean steps 6.65, speed 150.41 f/s, eps 0.86
138578: done 18800 games, mean reward -0.196, mean steps 6.66, speed 150.73 f/s, eps 0.86
139394: done 18900 games, mean reward -0.196, mean steps 6.68, speed 150.08 f/s, eps 0.86
140203: done 19000 games, mean reward -0.197, mean steps 6.68, speed 150.30 f/s, eps 0.86
141042: done 19100 games, mean reward -0.197, mean steps 6.69, speed 149.60 f/s, eps 0.86
141883: done 19200 games, mean reward -0.196, mean steps 6.71, speed 150.75 f/s, eps 0.86
142677: done 19300 games, mean reward -0.197, mean steps 6.72, speed 148.62 f/s, eps 0.86
143503: done 19400 games, mean reward -0.196, mean steps 6.73, speed 149.89 f/s, eps 0.86
144266: do

210286: done 27600 games, mean reward -0.206, mean steps 7.18, speed 149.41 f/s, eps 0.79
211193: done 27700 games, mean reward -0.206, mean steps 7.19, speed 148.56 f/s, eps 0.79
212035: done 27800 games, mean reward -0.206, mean steps 7.20, speed 149.27 f/s, eps 0.79
212916: done 27900 games, mean reward -0.206, mean steps 7.21, speed 150.76 f/s, eps 0.79
213740: done 28000 games, mean reward -0.206, mean steps 7.22, speed 148.69 f/s, eps 0.79
214587: done 28100 games, mean reward -0.205, mean steps 7.23, speed 148.50 f/s, eps 0.79
215470: done 28200 games, mean reward -0.206, mean steps 7.23, speed 142.08 f/s, eps 0.78
216348: done 28300 games, mean reward -0.205, mean steps 7.25, speed 136.21 f/s, eps 0.78
217185: done 28400 games, mean reward -0.205, mean steps 7.26, speed 149.77 f/s, eps 0.78
218104: done 28500 games, mean reward -0.205, mean steps 7.27, speed 149.70 f/s, eps 0.78
218993: done 28600 games, mean reward -0.205, mean steps 7.28, speed 150.87 f/s, eps 0.78
219821: do

292989: done 36800 games, mean reward -0.193, mean steps 8.00, speed 150.70 f/s, eps 0.71
293806: done 36900 games, mean reward -0.193, mean steps 8.00, speed 150.44 f/s, eps 0.71
294738: done 37000 games, mean reward -0.194, mean steps 8.01, speed 149.46 f/s, eps 0.71
295700: done 37100 games, mean reward -0.194, mean steps 8.02, speed 149.56 f/s, eps 0.70
296707: done 37200 games, mean reward -0.195, mean steps 8.04, speed 148.41 f/s, eps 0.70
297629: done 37300 games, mean reward -0.195, mean steps 8.06, speed 148.08 f/s, eps 0.70
298637: done 37400 games, mean reward -0.196, mean steps 8.07, speed 149.96 f/s, eps 0.70
299639: done 37500 games, mean reward -0.196, mean steps 8.08, speed 148.91 f/s, eps 0.70
300668: done 37600 games, mean reward -0.196, mean steps 8.10, speed 19.89 f/s, eps 0.70
301644: done 37700 games, mean reward -0.195, mean steps 8.10, speed 142.78 f/s, eps 0.70
302671: done 37800 games, mean reward -0.195, mean steps 8.12, speed 148.54 f/s, eps 0.70
303707: don

385444: done 46000 games, mean reward -0.201, mean steps 9.04, speed 149.03 f/s, eps 0.61
386546: done 46100 games, mean reward -0.200, mean steps 9.05, speed 144.30 f/s, eps 0.61
387584: done 46200 games, mean reward -0.200, mean steps 9.06, speed 148.66 f/s, eps 0.61
388651: done 46300 games, mean reward -0.199, mean steps 9.08, speed 148.86 f/s, eps 0.61
389663: done 46400 games, mean reward -0.199, mean steps 9.09, speed 149.44 f/s, eps 0.61
390641: done 46500 games, mean reward -0.198, mean steps 9.10, speed 148.73 f/s, eps 0.61
391738: done 46600 games, mean reward -0.198, mean steps 9.10, speed 148.99 f/s, eps 0.61
392785: done 46700 games, mean reward -0.197, mean steps 9.12, speed 148.36 f/s, eps 0.61
393955: done 46800 games, mean reward -0.197, mean steps 9.15, speed 148.45 f/s, eps 0.61
395016: done 46900 games, mean reward -0.197, mean steps 9.17, speed 148.10 f/s, eps 0.60
396085: done 47000 games, mean reward -0.196, mean steps 9.18, speed 149.00 f/s, eps 0.60
397155: do

492746: done 55100 games, mean reward -0.193, mean steps 10.72, speed 148.67 f/s, eps 0.51
494080: done 55200 games, mean reward -0.194, mean steps 10.75, speed 148.53 f/s, eps 0.51
495470: done 55300 games, mean reward -0.194, mean steps 10.78, speed 148.59 f/s, eps 0.50
496663: done 55400 games, mean reward -0.195, mean steps 10.80, speed 148.82 f/s, eps 0.50
497951: done 55500 games, mean reward -0.194, mean steps 10.82, speed 148.50 f/s, eps 0.50
499170: done 55600 games, mean reward -0.193, mean steps 10.83, speed 148.50 f/s, eps 0.50
500488: done 55700 games, mean reward -0.193, mean steps 10.86, speed 23.94 f/s, eps 0.50
501831: done 55800 games, mean reward -0.193, mean steps 10.88, speed 147.71 f/s, eps 0.50
503165: done 55900 games, mean reward -0.193, mean steps 10.91, speed 147.89 f/s, eps 0.50
504353: done 56000 games, mean reward -0.193, mean steps 10.92, speed 147.93 f/s, eps 0.50
505599: done 56100 games, mean reward -0.191, mean steps 10.94, speed 149.13 f/s, eps 0.49


626309: done 64200 games, mean reward -0.193, mean steps 13.52, speed 147.05 f/s, eps 0.37
628085: done 64300 games, mean reward -0.193, mean steps 13.57, speed 148.23 f/s, eps 0.37
629821: done 64400 games, mean reward -0.193, mean steps 13.61, speed 149.75 f/s, eps 0.37
631693: done 64500 games, mean reward -0.194, mean steps 13.67, speed 148.34 f/s, eps 0.37
633475: done 64600 games, mean reward -0.195, mean steps 13.71, speed 148.19 f/s, eps 0.37
635197: done 64700 games, mean reward -0.195, mean steps 13.76, speed 148.66 f/s, eps 0.36
636880: done 64800 games, mean reward -0.195, mean steps 13.79, speed 149.14 f/s, eps 0.36
638620: done 64900 games, mean reward -0.195, mean steps 13.86, speed 148.01 f/s, eps 0.36
640435: done 65000 games, mean reward -0.194, mean steps 13.91, speed 148.59 f/s, eps 0.36
642340: done 65100 games, mean reward -0.195, mean steps 13.98, speed 148.51 f/s, eps 0.36
644040: done 65200 games, mean reward -0.194, mean steps 14.02, speed 149.14 f/s, eps 0.36