In [None]:
pip install git+https://github.com/notadamking/tensortrade.git


#or
#pip install git+https://github.com/notadamking/tensortrade.git#egg=tensortrade[tf,tensorforce,baselines,ccxt,ta,fbm] -U

In [None]:
#RL Gym Environment

from tensortrade.environments import TradingEnvironment
environment = TradingEnvironment(exchange=exchange,
                                 action_scheme=action_scheme,
                                 reward_scheme=reward_scheme,
                                 feature_pipeline=feature_pipeline)

In [None]:
#Exchanges

#For live acoount
# import ccxt
# from tensortrade.exchanges.live import CCXTExchange
# coinbase = ccxt.coinbasepro()
# exchange = CCXTExchange(exchange=coinbase, base_instrument='USD')


from tensortrade.exchanges.simulated import FBMExchange
exchange = FBMExchange(base_instrument='BTC', timeframe='1h')

In [None]:
#Reading The Data

import pandas as pd
from tensortrade.exchanges.simulated import SimulatedExchange
df = pd.read_csv('./data/btc_ohclv_1h.csv')
exchange = SimulatedExchange(data_frame=df, base_instrument='USD')

In [None]:
#Feature Pipelines


from tensortrade.features import FeaturePipeline
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features.stationarity import FractionalDifference
from tensortrade.features.indicators import SimpleMovingAverage
price_columns = ["open", "high", "low", "close"]
normalize_price = MinMaxNormalizer(price_columns)
moving_averages = SimpleMovingAverage(price_columns)
difference_all = FractionalDifference(difference_order=0.6)
feature_pipeline = FeaturePipeline(steps=[normalize_price,
                                          moving_averages,
                                          difference_all])
exchange.feature_pipeline = feature_pipeline

In [None]:
#Action Schemes

from tensortrade.actions import DiscreteActions
action_scheme = DiscreteActions(n_actions=20,      
                                instrument_symbol='BTC')

In [None]:
#Reward Schemes


from tensortrade.rewards import SimpleProfit
reward_scheme = SimpleProfit()

In [None]:
#Learning Agents

from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines import PPO2
model = PPO2
policy = MlpLnLstmPolicy
params = { "learning_rate": 1e-5 }
agent = model(policy, environment, model_kwargs=params)

In [None]:
#Tensorforce

from tensorforce.agents import Agent
agent_spec = {
    "type": "ppo_agent",
    "step_optimizer": {
        "type": "adam",
        "learning_rate": 1e-4
    },
    "discount": 0.99,
    "likelihood_ratio_clipping": 0.2,
}
network_spec = [
    dict(type='dense', size=64, activation="tanh"),
    dict(type='dense', size=32, activation="tanh")
]
agent = Agent.from_spec(spec=agent_spec,
                        kwargs=dict(network=network_spec,
                                    states=environment.states,
                                    actions=environment.actions))

In [None]:
#Trading Strategy

from tensortrade.strategies import TensorforceTradingStrategy,
                                   StableBaselinesTradingStrategy
a_strategy = TensorforceTradingStrategy(environment=environment,
                                        agent_spec=agent_spec,
                                        network_spec=network_spec)
b_strategy = StableBaselinesTradingStrategy(environment=environment,
                                            model=PPO2,
                                            policy=MlpLnLSTMPolicy)

In [None]:
#Creating an Environment

from tensortrade.exchanges.simulated import FBMExchange
from tensortrade.features.scalers import MinMaxNormalizer
from tensortrade.features.stationarity import FractionalDifference
from tensortrade.features import FeaturePipeline
from tensortrade.rewards import SimpleProfit
from tensortrade.actions import DiscreteActions
from tensortrade.environments import TradingEnvironment
normalize_price = MinMaxNormalizer(["open", "high", "low", "close"])
difference = FractionalDifference(difference_order=0.6)
feature_pipeline = FeaturePipeline(steps=[normalize_price, 
                                          difference])
exchange = FBMExchange(timeframe='1h',
                       base_instrument='BTC',
                       feature_pipeline=feature_pipeline)
reward_scheme = SimpleProfit()
action_scheme = DiscreteActions(n_actions=20, 
                                instrument_symbol='ETH/BTC')
environment = TradingEnvironment(exchange=exchange,
                                 action_scheme=action_scheme,
                                 reward_scheme=reward_scheme,
                                 feature_pipeline=feature_pipeline)

In [None]:
#Defining the Agent

from stable_baselines.common.policies import MlpLnLstmPolicy
from stable_baselines import PPO2
model = PPO2
policy = MlpLnLstmPolicy
params = { "learning_rate": 1e-5 }

In [None]:
#Training a Strategy

from tensortrade.strategies import StableBaselinesTradingStrategy
strategy = StableBaselinesTradingStrategy(environment=environment,
                                          model=model,
                                          policy=policy,
                                          model_kwargs=params)

In [None]:
performance = strategy.run(steps=100000,
                           episode_callback=stop_early_callback)

In [None]:
#Saving and Restoring

strategy.save_agent(path="../agents/ppo_btc_1h")

#restore the agent from the file
from tensortrade.strategies import StableBaselinesTradingStrategy
strategy = StableBaselinesTradingStrategy(environment=environment,
                                          model=model,
                                          policy=policy,
                                          model_kwargs=params)
strategy.restore_agent(path="../agents/ppo_btc/1h")

In [None]:
#Tuning Your Strategy

from tensortrade.environments import TradingEnvironment
from tensortrade.exchanges.simulated import FBMExchange
exchange = FBMExchange(timeframe='1h',
                       base_instrument='BTC',
                       feature_pipeline=feature_pipeline)
environment = TradingEnvironment(exchange=exchange,
                                 action_scheme=action_scheme,
                                 reward_scheme=reward_scheme)
strategy.environment = environment
tuned_performance = strategy.tune(episodes=10)

In [None]:
#Strategy Evaluation

from pandas import pd
from tensortrade.environments import TradingEnvironment
from tensortrade.exchanges.simulated import SimulatedExchange
df = pd.read_csv('./btc_ohlcv_1h.csv')
exchange = SimulatedExchange(data_frame=df, 
                             base_instrument='BTC',
                             feature_pipeline=feature_pipeline)
environment = TradingEnvironment(exchange=exchange,
                                 action_scheme=action_scheme,
                                 reward_scheme=reward_scheme)
strategy.environment = environment
test_performance = strategy.run(episodes=1, testing=True)