In [1]:
#!/usr/bin/env python3
import os
import gym
import ptan
import argparse
import numpy as np

import torch
import torch.optim as optim

from lib import environ, data, models, common, validation

from tensorboardX import SummaryWriter

In [2]:
BATCH_SIZE = 32
BARS_COUNT = 10
TARGET_NET_SYNC = 1000
DEFAULT_STOCKS = "data/YNDX_160101_161231.csv"
DEFAULT_VAL_STOCKS = "data/YNDX_150101_151231.csv"

DEFAULT_RUN_NAME = "result"

GAMMA = 0.99

REPLAY_SIZE = 100000
REPLAY_INITIAL = 10000

REWARD_STEPS = 2

LEARNING_RATE = 0.0001

STATES_TO_EVALUATE = 1000
EVAL_EVERY_STEP = 1000

EPSILON_START = 1.0
EPSILON_STOP = 0.1
EPSILON_STEPS = 1000000

CHECKPOINT_EVERY_STEP = 1000000
VALIDATION_EVERY_STEP = 100000

In [3]:
if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--cuda", default=True, action="store_true", help="Enable cuda")
    parser.add_argument("--data", default=DEFAULT_STOCKS, help="Stocks file or dir to train on, default=" + DEFAULT_STOCKS)
    parser.add_argument("--year", type=int, help="Year to be used for training, if specified, overrides --data option")
    parser.add_argument("--valdata", default=DEFAULT_VAL_STOCKS, help="Stocks data for validation, default=" + DEFAULT_VAL_STOCKS)
    parser.add_argument("-r", "--run", default=DEFAULT_STOCKS, required=False, help="Run name")
    args, unknown = parser.parse_known_args()
    device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", args.run)
    os.makedirs(saves_path, exist_ok=True)

    if args.year is not None or os.path.isfile(args.data):
        if args.year is not None:
            stock_data = data.load_year_data(args.year)
        else:
            stock_data = {"YNDX": data.load_relative(args.data)}
        env = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False, volumes=False)
        env_tst = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    elif os.path.isdir(args.data):
        env = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
        env_tst = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    else:
        raise RuntimeError("No data to train on")
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)

    val_data = {"YNDX": data.load_relative(args.valdata)}
    env_val = environ.StocksEnv(val_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)

    writer = SummaryWriter(comment="-simple-" + args.run)
    net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device)
    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.EpsilonGreedyActionSelector(EPSILON_START)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, GAMMA, steps_count=REWARD_STEPS)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source, REPLAY_SIZE)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

    # main training loop
    step_idx = 0
    eval_states = None
    best_mean_val = None

    with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker:
        while True:
            step_idx += 1
            buffer.populate(1)
            selector.epsilon = max(EPSILON_STOP, EPSILON_START - step_idx / EPSILON_STEPS)

            new_rewards = exp_source.pop_rewards_steps()
            if new_rewards:
                reward_tracker.reward(new_rewards[0], step_idx, selector.epsilon)

            if len(buffer) < REPLAY_INITIAL:
                continue

            if eval_states is None:
                print("Initial buffer populated, start training")
                eval_states = buffer.sample(STATES_TO_EVALUATE)
                eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
                eval_states = np.array(eval_states, copy=False)

            if step_idx % EVAL_EVERY_STEP == 0:
                mean_val = common.calc_values_of_states(eval_states, net, device=device)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_v = common.calc_loss(batch, net, tgt_net.target_model, GAMMA ** REWARD_STEPS, device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % TARGET_NET_SYNC == 0:
                tgt_net.sync()

            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                idx = step_idx // CHECKPOINT_EVERY_STEP
                torch.save(net.state_dict(), os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % VALIDATION_EVERY_STEP == 0:
                res = validation.validation_run(env_tst, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(env_val, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)

Reading data/YNDX_160101_161231.csv
Read done, got 131542 rows, 99752 filtered, 0 open prices adjusted
Reading data/YNDX_150101_151231.csv
Read done, got 130566 rows, 104412 filtered, 0 open prices adjusted
682: done 100 games, mean reward -0.231, mean steps 5.94, speed 567.49 f/s, eps 1.00
1424: done 200 games, mean reward -0.251, mean steps 6.22, speed 709.91 f/s, eps 1.00
2127: done 300 games, mean reward -0.223, mean steps 6.20, speed 683.69 f/s, eps 1.00
2839: done 400 games, mean reward -0.230, mean steps 6.19, speed 622.95 f/s, eps 1.00
3618: done 500 games, mean reward -0.238, mean steps 6.33, speed 598.07 f/s, eps 1.00
4309: done 600 games, mean reward -0.225, mean steps 6.28, speed 673.98 f/s, eps 1.00
4965: done 700 games, mean reward -0.221, mean steps 6.19, speed 735.74 f/s, eps 1.00
5654: done 800 games, mean reward -0.225, mean steps 6.16, speed 666.84 f/s, eps 0.99
6350: done 900 games, mean reward -0.224, mean steps 6.15, speed 624.76 f/s, eps 0.99
7026: done 1000 game

66266: done 9300 games, mean reward -0.191, mean steps 6.23, speed 73.30 f/s, eps 0.93
66956: done 9400 games, mean reward -0.191, mean steps 6.23, speed 73.76 f/s, eps 0.93
67723: done 9500 games, mean reward -0.191, mean steps 6.23, speed 0.18 f/s, eps 0.93
68461: done 9600 games, mean reward -0.191, mean steps 6.23, speed 79.11 f/s, eps 0.93
69140: done 9700 games, mean reward -0.191, mean steps 6.23, speed 82.60 f/s, eps 0.93
69848: done 9800 games, mean reward -0.191, mean steps 6.23, speed 83.24 f/s, eps 0.93
70555: done 9900 games, mean reward -0.192, mean steps 6.23, speed 82.46 f/s, eps 0.93
71250: done 10000 games, mean reward -0.192, mean steps 6.23, speed 80.63 f/s, eps 0.93
72020: done 10100 games, mean reward -0.191, mean steps 6.24, speed 78.30 f/s, eps 0.93
72791: done 10200 games, mean reward -0.190, mean steps 6.24, speed 78.74 f/s, eps 0.93
73449: done 10300 games, mean reward -0.190, mean steps 6.23, speed 78.99 f/s, eps 0.93
74171: done 10400 games, mean reward -0.

136811: done 18600 games, mean reward -0.194, mean steps 6.66, speed 76.19 f/s, eps 0.86
137674: done 18700 games, mean reward -0.194, mean steps 6.67, speed 74.83 f/s, eps 0.86
138502: done 18800 games, mean reward -0.194, mean steps 6.68, speed 75.00 f/s, eps 0.86
139259: done 18900 games, mean reward -0.194, mean steps 6.68, speed 75.96 f/s, eps 0.86
140045: done 19000 games, mean reward -0.194, mean steps 6.68, speed 76.95 f/s, eps 0.86
140800: done 19100 games, mean reward -0.193, mean steps 6.68, speed 77.14 f/s, eps 0.86
141570: done 19200 games, mean reward -0.192, mean steps 6.69, speed 77.15 f/s, eps 0.86
142377: done 19300 games, mean reward -0.192, mean steps 6.70, speed 75.82 f/s, eps 0.86
143169: done 19400 games, mean reward -0.192, mean steps 6.71, speed 74.97 f/s, eps 0.86
143980: done 19500 games, mean reward -0.192, mean steps 6.71, speed 77.44 f/s, eps 0.86
144716: done 19600 games, mean reward -0.192, mean steps 6.71, speed 75.82 f/s, eps 0.86
145533: done 19700 ga

212477: done 27900 games, mean reward -0.189, mean steps 7.18, speed 74.88 f/s, eps 0.79
213424: done 28000 games, mean reward -0.190, mean steps 7.20, speed 75.99 f/s, eps 0.79
214309: done 28100 games, mean reward -0.191, mean steps 7.21, speed 75.19 f/s, eps 0.79
215180: done 28200 games, mean reward -0.191, mean steps 7.21, speed 76.40 f/s, eps 0.78
216031: done 28300 games, mean reward -0.192, mean steps 7.22, speed 75.53 f/s, eps 0.78
216836: done 28400 games, mean reward -0.191, mean steps 7.22, speed 75.70 f/s, eps 0.78
217710: done 28500 games, mean reward -0.191, mean steps 7.23, speed 75.00 f/s, eps 0.78
218541: done 28600 games, mean reward -0.192, mean steps 7.24, speed 73.52 f/s, eps 0.78
219409: done 28700 games, mean reward -0.191, mean steps 7.25, speed 74.86 f/s, eps 0.78
220160: done 28800 games, mean reward -0.191, mean steps 7.24, speed 74.75 f/s, eps 0.78
220979: done 28900 games, mean reward -0.191, mean steps 7.24, speed 75.77 f/s, eps 0.78
221910: done 29000 ga

295316: done 37200 games, mean reward -0.200, mean steps 7.96, speed 74.02 f/s, eps 0.70
296272: done 37300 games, mean reward -0.201, mean steps 7.96, speed 74.95 f/s, eps 0.70
297283: done 37400 games, mean reward -0.201, mean steps 7.97, speed 75.07 f/s, eps 0.70
298332: done 37500 games, mean reward -0.201, mean steps 7.99, speed 74.89 f/s, eps 0.70
299226: done 37600 games, mean reward -0.200, mean steps 8.00, speed 75.78 f/s, eps 0.70
300117: done 37700 games, mean reward -0.199, mean steps 8.00, speed 8.76 f/s, eps 0.70
301065: done 37800 games, mean reward -0.199, mean steps 8.00, speed 74.85 f/s, eps 0.70
302013: done 37900 games, mean reward -0.200, mean steps 8.02, speed 73.95 f/s, eps 0.70
302954: done 38000 games, mean reward -0.200, mean steps 8.02, speed 75.96 f/s, eps 0.70
303948: done 38100 games, mean reward -0.199, mean steps 8.03, speed 74.57 f/s, eps 0.70
304895: done 38200 games, mean reward -0.198, mean steps 8.04, speed 75.62 f/s, eps 0.70
305870: done 38300 gam

389151: done 46500 games, mean reward -0.190, mean steps 9.08, speed 75.38 f/s, eps 0.61
390288: done 46600 games, mean reward -0.190, mean steps 9.09, speed 75.94 f/s, eps 0.61
391390: done 46700 games, mean reward -0.191, mean steps 9.11, speed 75.95 f/s, eps 0.61
392402: done 46800 games, mean reward -0.190, mean steps 9.12, speed 75.14 f/s, eps 0.61
393440: done 46900 games, mean reward -0.190, mean steps 9.13, speed 69.58 f/s, eps 0.61
394563: done 47000 games, mean reward -0.189, mean steps 9.15, speed 73.80 f/s, eps 0.61
395651: done 47100 games, mean reward -0.189, mean steps 9.17, speed 70.84 f/s, eps 0.60
396661: done 47200 games, mean reward -0.189, mean steps 9.18, speed 72.69 f/s, eps 0.60
397616: done 47300 games, mean reward -0.188, mean steps 9.18, speed 75.20 f/s, eps 0.60
398785: done 47400 games, mean reward -0.189, mean steps 9.20, speed 73.51 f/s, eps 0.60
399866: done 47500 games, mean reward -0.189, mean steps 9.20, speed 75.41 f/s, eps 0.60
400835: done 47600 ga

496545: done 55700 games, mean reward -0.194, mean steps 10.63, speed 75.70 f/s, eps 0.50
497976: done 55800 games, mean reward -0.194, mean steps 10.65, speed 74.87 f/s, eps 0.50
499238: done 55900 games, mean reward -0.195, mean steps 10.67, speed 73.72 f/s, eps 0.50
500528: done 56000 games, mean reward -0.195, mean steps 10.70, speed 12.41 f/s, eps 0.50
501853: done 56100 games, mean reward -0.195, mean steps 10.73, speed 74.36 f/s, eps 0.50
503181: done 56200 games, mean reward -0.196, mean steps 10.76, speed 74.67 f/s, eps 0.50
504562: done 56300 games, mean reward -0.196, mean steps 10.79, speed 75.08 f/s, eps 0.50
505966: done 56400 games, mean reward -0.195, mean steps 10.82, speed 75.18 f/s, eps 0.49
507334: done 56500 games, mean reward -0.195, mean steps 10.85, speed 73.81 f/s, eps 0.49
508764: done 56600 games, mean reward -0.195, mean steps 10.88, speed 74.49 f/s, eps 0.49
510161: done 56700 games, mean reward -0.194, mean steps 10.91, speed 74.52 f/s, eps 0.49
511513: do

631492: done 64900 games, mean reward -0.191, mean steps 13.54, speed 73.63 f/s, eps 0.37
633299: done 65000 games, mean reward -0.191, mean steps 13.58, speed 74.32 f/s, eps 0.37
635021: done 65100 games, mean reward -0.193, mean steps 13.64, speed 74.49 f/s, eps 0.36
636638: done 65200 games, mean reward -0.192, mean steps 13.67, speed 74.65 f/s, eps 0.36
638471: done 65300 games, mean reward -0.193, mean steps 13.72, speed 74.41 f/s, eps 0.36
640311: done 65400 games, mean reward -0.193, mean steps 13.80, speed 74.84 f/s, eps 0.36
642120: done 65500 games, mean reward -0.194, mean steps 13.85, speed 74.03 f/s, eps 0.36
643869: done 65600 games, mean reward -0.195, mean steps 13.88, speed 74.00 f/s, eps 0.36
645777: done 65700 games, mean reward -0.194, mean steps 13.94, speed 73.71 f/s, eps 0.35
647339: done 65800 games, mean reward -0.194, mean steps 13.96, speed 73.44 f/s, eps 0.35
649250: done 65900 games, mean reward -0.194, mean steps 14.02, speed 74.22 f/s, eps 0.35
651233: do

861131: done 74100 games, mean reward -0.206, mean steps 23.33, speed 74.26 f/s, eps 0.14
865521: done 74200 games, mean reward -0.207, mean steps 23.60, speed 74.61 f/s, eps 0.13
870168: done 74300 games, mean reward -0.207, mean steps 23.89, speed 74.34 f/s, eps 0.13
874888: done 74400 games, mean reward -0.208, mean steps 24.19, speed 74.45 f/s, eps 0.13
880480: done 74500 games, mean reward -0.206, mean steps 24.59, speed 74.34 f/s, eps 0.12
885650: done 74600 games, mean reward -0.207, mean steps 24.93, speed 74.53 f/s, eps 0.11
891371: done 74700 games, mean reward -0.207, mean steps 25.33, speed 74.38 f/s, eps 0.11
897913: done 74800 games, mean reward -0.207, mean steps 25.82, speed 74.42 f/s, eps 0.10
904503: done 74900 games, mean reward -0.209, mean steps 26.31, speed 36.40 f/s, eps 0.10
910205: done 75000 games, mean reward -0.210, mean steps 26.70, speed 71.45 f/s, eps 0.10
916195: done 75100 games, mean reward -0.208, mean steps 27.12, speed 71.74 f/s, eps 0.10
922300: do

1400488: done 83100 games, mean reward -0.182, mean steps 56.82, speed 63.31 f/s, eps 0.10
1406639: done 83200 games, mean reward -0.183, mean steps 57.09, speed 147.27 f/s, eps 0.10
1412975: done 83300 games, mean reward -0.181, mean steps 57.36, speed 146.33 f/s, eps 0.10
1419502: done 83400 games, mean reward -0.180, mean steps 57.63, speed 145.60 f/s, eps 0.10
1425050: done 83500 games, mean reward -0.179, mean steps 57.82, speed 145.98 f/s, eps 0.10
1430551: done 83600 games, mean reward -0.182, mean steps 58.01, speed 147.36 f/s, eps 0.10
1436958: done 83700 games, mean reward -0.184, mean steps 58.31, speed 144.61 f/s, eps 0.10
1443174: done 83800 games, mean reward -0.185, mean steps 58.50, speed 147.39 f/s, eps 0.10
1448661: done 83900 games, mean reward -0.186, mean steps 58.63, speed 147.63 f/s, eps 0.10
1454700: done 84000 games, mean reward -0.187, mean steps 58.79, speed 147.44 f/s, eps 0.10
1460987: done 84100 games, mean reward -0.187, mean steps 58.99, speed 147.27 f/s

1949535: done 92100 games, mean reward -0.187, mean steps 59.80, speed 147.37 f/s, eps 0.10
1955687: done 92200 games, mean reward -0.187, mean steps 59.81, speed 146.97 f/s, eps 0.10
1961767: done 92300 games, mean reward -0.186, mean steps 59.80, speed 144.50 f/s, eps 0.10
1967647: done 92400 games, mean reward -0.186, mean steps 59.84, speed 137.82 f/s, eps 0.10
1974268: done 92500 games, mean reward -0.187, mean steps 59.91, speed 141.32 f/s, eps 0.10
1980492: done 92600 games, mean reward -0.187, mean steps 59.97, speed 145.67 f/s, eps 0.10
1987069: done 92700 games, mean reward -0.185, mean steps 60.03, speed 145.86 f/s, eps 0.10
1993669: done 92800 games, mean reward -0.187, mean steps 60.10, speed 147.23 f/s, eps 0.10
1999662: done 92900 games, mean reward -0.186, mean steps 60.12, speed 144.25 f/s, eps 0.10
2005052: done 93000 games, mean reward -0.187, mean steps 60.08, speed 67.43 f/s, eps 0.10
2010978: done 93100 games, mean reward -0.186, mean steps 60.05, speed 143.06 f/s

KeyboardInterrupt: 