In [11]:
import pandas as pd
from stable_baselines3.common.logger import configure

import numpy as np
import datetime
import yfinance as yf

from finrl.meta.preprocessor.yahoodownloader import YahooDownloader
from finrl.meta.preprocessor.preprocessors import FeatureEngineer, data_split
from finrl import config_tickers
from finrl.config import INDICATORS

import itertools

from finrl.agents.stablebaselines3.models import DRLAgent
from finrl.config import INDICATORS, TRAINED_MODEL_DIR, RESULTS_DIR
from finrl.main import check_and_make_directories
from Envs.env_stocktrading import StockTradingEnv

check_and_make_directories([TRAINED_MODEL_DIR])

## Read data

We first read the .csv file of our training data into dataframe.

In [12]:
train = pd.read_csv('train_data.csv')

# If you are not using the data generated from part 1 of this tutorial, make sure
# it has the columns and index in the form that could be make into the environment.
# Then you can comment and skip the following two lines.
train = train.set_index(train.columns[0])
train.index.names = ['']

In [13]:
#train

In [14]:
# Die Spalte 'price-change' hinzufügen
train['change'] = train['open'] - train['close']

In [15]:
#train

In [16]:
SENTIMENT=[]

## Construct the environment

In [17]:
stock_dimension = len(train.tic.unique())
state_space = 1 + 2*stock_dimension + len(INDICATORS)*stock_dimension +  len(SENTIMENT)*stock_dimension
print(f"Stock Dimension: {stock_dimension}, State Space: {state_space}")

Stock Dimension: 29, State Space: 291


In [9]:
buy_cost_list = sell_cost_list = [0.001] * stock_dimension
num_stock_shares = [0] * stock_dimension
price = train.close.values.tolist()


env_kwargs = {
    "hmax": 100,
    "initial_amount": 1000000,
    "num_stock_shares": num_stock_shares,
    "buy_cost_pct": buy_cost_list,
    "sell_cost_pct": sell_cost_list,
    "state_space": state_space,
    "stock_dim": stock_dimension,
    "tech_indicator_list": INDICATORS,
    "sentiment_list" : SENTIMENT,
    "action_space": stock_dimension,
    "reward_scaling": 1e-4,
    
}


e_train_gym = StockTradingEnv(df = train, **env_kwargs)

## Environment for training

In [10]:
env_train, _ = e_train_gym.get_sb_env()
print(type(env_train))

<class 'stable_baselines3.common.vec_env.dummy_vec_env.DummyVecEnv'>


# Part 3: Train DRL Agents
* Here, the DRL algorithms are from **[Stable Baselines 3](https://stable-baselines3.readthedocs.io/en/master/)**. It's a library that implemented popular DRL algorithms using pytorch, succeeding to its old version: Stable Baselines.
* Users are also encouraged to try **[ElegantRL](https://github.com/AI4Finance-Foundation/ElegantRL)** and **[Ray RLlib](https://github.com/ray-project/ray)**.

In [11]:
agent = DRLAgent(env = env_train)

# Set the corresponding values to 'True' for the algorithms that you want to use
if_using_a2c = True
if_using_ddpg = True
if_using_ppo = True
if_using_td3 = True
if_using_sac = True

## Agent Training: 5 algorithms (A2C, DDPG, PPO, TD3, SAC)


### Agent 1: A2C

In [12]:
agent = DRLAgent(env = env_train)
model_a2c = agent.get_model("a2c")

if if_using_a2c:
  # set up logger
  tmp_path = RESULTS_DIR + '/a2c'
  new_logger_a2c = configure(tmp_path, ["stdout", "csv", "tensorboard"])
  # Set new logger
  model_a2c.set_logger(new_logger_a2c)

{'n_steps': 5, 'ent_coef': 0.01, 'learning_rate': 0.0007}
Using cpu device
Logging to results/a2c


In [13]:
trained_a2c = agent.train_model(model=model_a2c,
                             tb_log_name='a2c',
                             total_timesteps=5000) if if_using_a2c else None

------------------------------------------
| time/                 |                |
|    fps                | 288            |
|    iterations         | 100            |
|    time_elapsed       | 1              |
|    total_timesteps    | 500            |
| train/                |                |
|    entropy_loss       | -43.2          |
|    explained_variance | 5.96e-08       |
|    learning_rate      | 0.0007         |
|    n_updates          | 99             |
|    policy_loss        | -0.0989        |
|    reward             | -0.00060774904 |
|    std                | 1.07           |
|    value_loss         | 5.23e-06       |
------------------------------------------
------------------------------------------
| time/                 |                |
|    fps                | 295            |
|    iterations         | 200            |
|    time_elapsed       | 3              |
|    total_timesteps    | 1000           |
| train/                |                |
|    entrop