In [None]:
#!/usr/bin/env python3
import os
import gym
from gym import wrappers
import ptan
import argparse
import numpy as np

import torch
import torch.optim as optim

from lib import environ, data, models, common, validation

from tensorboardX import SummaryWriter

BATCH_SIZE = 32
BARS_COUNT = 10
TARGET_NET_SYNC = 1000
DEFAULT_STOCKS = "data/YNDX_160101_161231.csv"
DEFAULT_VAL_STOCKS = "data/YNDX_150101_151231.csv"

GAMMA = 0.99

REPLAY_SIZE = 100000
REPLAY_INITIAL = 10000

REWARD_STEPS = 2

LEARNING_RATE = 0.0001

STATES_TO_EVALUATE = 1000
EVAL_EVERY_STEP = 1000

EPSILON_START = 1.0
EPSILON_STOP = 0.1
EPSILON_STEPS = 1000000

CHECKPOINT_EVERY_STEP = 1000000
VALIDATION_EVERY_STEP = 100000


if __name__ == "__main__":
    data_in = {'cuda' : True,'data' : DEFAULT_STOCKS,'year' : None,'val' : DEFAULT_VAL_STOCKS,'r' : "test"}
    device = torch.device("cuda" if data_in['cuda'] else "cpu")
    
    #saves_path = os.path.join("/content/sample_data/saves", data_in['r'])
    #os.makedirs(saves_path, exist_ok=True)
    
    #parser = argparse.ArgumentParser()
    #parser.add_argument("--cuda", default=False, action="store_true", help="Enable cuda")
    #parser.add_argument("--data", default=DEFAULT_STOCKS, help="Stocks file or dir to train on, default=" + DEFAULT_STOCKS)
    #parser.add_argument("--year", type=int, help="Year to be used for training, if specified, overrides --data option")
    #parser.add_argument("--valdata", default=DEFAULT_VAL_STOCKS, help="Stocks data for validation, default=" + DEFAULT_VAL_STOCKS)
    #parser.add_argument("-r", "--run", default="Test run", required=True, help="Run name")
    #args = parser.parse_args()
    #device = torch.device("cuda" if args.cuda else "cpu")

    saves_path = os.path.join("saves", data_in['r'])
    os.makedirs(saves_path, exist_ok=True)
    
    if data_in['year'] is not None or os.path.isfile(data_in['data']):
      if data_in['year'] is not None:
        stock_data = data.load_year_data(data_in['year'])
        print("data not None","stosk =", stock_data )
      else:
        stock_data = {"YNDX": data.load_relative(data_in['data'])}
        print("data is None")
      env = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False, volumes=False)
      env_tst = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    elif os.path.isdir(data_in['data']):
      env = environ.StocksEnv.from_dir(data_in['data'], bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
      env_tst = environ.StocksEnv.from_dir(data_in['data'], bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    else:
      raise RuntimeError("No data to train on")
    
    #if args.year is not None or os.path.isfile(args.data):
    #    if args.year is not None:
    #        stock_data = data.load_year_data(args.year)
    #    else:
    #        stock_data = {"YNDX": data.load_relative(args.data)}
    #    env = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False, volumes=False)
    #    env_tst = environ.StocksEnv(stock_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    #elif os.path.isdir(args.data):
    #    env = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    #    env_tst = environ.StocksEnv.from_dir(args.data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)
    #else:
    #    raise RuntimeError("No data to train on")
        
        
    env = gym.wrappers.TimeLimit(env, max_episode_steps=1000)

    val_data = {"YNDX": data.load_relative(data_in['val'])}
    env_val = environ.StocksEnv(val_data, bars_count=BARS_COUNT, reset_on_close=True, state_1d=False)

    writer = SummaryWriter(comment="-simple-" + data_in['r'])
    net = models.SimpleFFDQN(env.observation_space.shape[0], env.action_space.n).to(device)
    tgt_net = ptan.agent.TargetNet(net)
    selector = ptan.actions.EpsilonGreedyActionSelector(EPSILON_START)
    agent = ptan.agent.DQNAgent(net, selector, device=device)
    exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, GAMMA, steps_count=REWARD_STEPS)
    buffer = ptan.experience.ExperienceReplayBuffer(exp_source, REPLAY_SIZE)
    optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

    # main training loop
    step_idx = 0
    eval_states = None
    best_mean_val = None

    with common.RewardTracker(writer, np.inf, group_rewards=100) as reward_tracker:
        while True:
            step_idx += 1
            buffer.populate(1)
            selector.epsilon = max(EPSILON_STOP, EPSILON_START - step_idx / EPSILON_STEPS)

            new_rewards = exp_source.pop_rewards_steps()
            if new_rewards:
                reward_tracker.reward(new_rewards[0], step_idx, selector.epsilon)

            if len(buffer) < REPLAY_INITIAL:
                continue

            if eval_states is None:
                print("Initial buffer populated, start training")
                eval_states = buffer.sample(STATES_TO_EVALUATE)
                eval_states = [np.array(transition.state, copy=False) for transition in eval_states]
                eval_states = np.array(eval_states, copy=False)

            if step_idx % EVAL_EVERY_STEP == 0:
                mean_val = common.calc_values_of_states(eval_states, net, device=device)
                writer.add_scalar("values_mean", mean_val, step_idx)
                if best_mean_val is None or best_mean_val < mean_val:
                    if best_mean_val is not None:
                        print("%d: Best mean value updated %.3f -> %.3f" % (step_idx, best_mean_val, mean_val))
                    best_mean_val = mean_val
                    torch.save(net.state_dict(), os.path.join(saves_path, "mean_val-%.3f.data" % mean_val))

            optimizer.zero_grad()
            batch = buffer.sample(BATCH_SIZE)
            loss_v = common.calc_loss(batch, net, tgt_net.target_model, GAMMA ** REWARD_STEPS, device=device)
            loss_v.backward()
            optimizer.step()

            if step_idx % TARGET_NET_SYNC == 0:
                tgt_net.sync()

            if step_idx % CHECKPOINT_EVERY_STEP == 0:
                idx = step_idx // CHECKPOINT_EVERY_STEP
                torch.save(net.state_dict(), os.path.join(saves_path, "checkpoint-%3d.data" % idx))

            if step_idx % VALIDATION_EVERY_STEP == 0:
                res = validation.validation_run(env_tst, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_test", val, step_idx)
                res = validation.validation_run(env_val, net, device=device)
                for key, val in res.items():
                    writer.add_scalar(key + "_val", val, step_idx)


Reading data/YNDX_160101_161231.csv
Read done, got 131542 rows, 99752 filtered, 0 open prices adjusted
data is None
Reading data/YNDX_150101_151231.csv
Read done, got 130566 rows, 104412 filtered, 0 open prices adjusted
719: done 100 games, mean reward -0.199, mean steps 6.32, speed 436.55 f/s, eps 1.00
1365: done 200 games, mean reward -0.218, mean steps 5.94, speed 1179.21 f/s, eps 1.00
1979: done 300 games, mean reward -0.198, mean steps 5.70, speed 1169.90 f/s, eps 1.00
2649: done 400 games, mean reward -0.197, mean steps 5.74, speed 1111.47 f/s, eps 1.00
3314: done 500 games, mean reward -0.189, mean steps 5.74, speed 1101.35 f/s, eps 1.00
3975: done 600 games, mean reward -0.193, mean steps 5.75, speed 1034.77 f/s, eps 1.00
4653: done 700 games, mean reward -0.186, mean steps 5.77, speed 966.12 f/s, eps 1.00
5320: done 800 games, mean reward -0.186, mean steps 5.77, speed 987.01 f/s, eps 0.99
6064: done 900 games, mean reward -0.195, mean steps 5.85, speed 993.65 f/s, eps 0.99
66

  next_state_values[done_mask] = 0.0


10094: done 1500 games, mean reward -0.196, mean steps 5.84, speed 518.44 f/s, eps 0.99
10816: done 1600 games, mean reward -0.202, mean steps 5.87, speed 160.53 f/s, eps 0.99
11512: done 1700 games, mean reward -0.198, mean steps 5.88, speed 161.78 f/s, eps 0.99
12141: done 1800 games, mean reward -0.198, mean steps 5.86, speed 162.42 f/s, eps 0.99
12895: done 1900 games, mean reward -0.196, mean steps 5.90, speed 168.32 f/s, eps 0.99
13638: done 2000 games, mean reward -0.198, mean steps 5.93, speed 160.94 f/s, eps 0.99
14322: done 2100 games, mean reward -0.197, mean steps 5.93, speed 164.79 f/s, eps 0.99
15024: done 2200 games, mean reward -0.199, mean steps 5.94, speed 168.36 f/s, eps 0.98
15788: done 2300 games, mean reward -0.196, mean steps 5.98, speed 162.05 f/s, eps 0.98
