# RL Trading

## Step 1 - Build the environment

In [18]:
from AlgorithmicTrading.models.metatrader import MqlAccountInfo
from AlgorithmicTrading.utils.trades import get_last_tick
import gym
from gym import spaces
from gym.utils.env_checker import check_env
import numpy as np

In [78]:
class TradingEnv(gym.Env):
    metadata: dict = {"render_modes": ["human", "rgb_array", "rgb_array_list"], "render_fps": 4}
    balance: float = 100_000
    margin_mode: ENUM_ACCOUNT_MARGIN_MODE = ENUM_ACCOUNT_MARGIN_MODE.ACCOUNT_MARGIN_MODE_RETAIL_NETTING
    
    def __init__(self, training_data: pd.DataFrame, render_mode=None):
        # Check if the render_mode is in the allowed list
        assert render_mode is None or render_mode in self.metadata["render_modes"]
        self.render_mode = render_mode

        # Get training data
        self.training_data: pd.DataFrame = training_data
        n_rates: int = len(training_data.columns)        

        # Get the max step
        self.total_steps: int = len(training_data) - 1 # There can not be an action when the last candle is reached 
        
         # Define observation space and action space
        self.observation_space = gym.spaces.Box(low=0, high=1, shape=(n_rates,), dtype=np.float32)
        self.action_space = gym.spaces.Discrete(3)  # 0: Buy, 1: Sell, 2: Hold

    def _reset_account(self) -> None:
        self.account: MqlAccountInfo = AccountBacktest.login(balance=self.balance, margin_mode=self.margin_mode)
    
    def reset(self):
        # Reset the environment to its initial state
        self.current_step = 0
        self.done = False
        self._reset_account()
        
        # Initialize state and other variables
        return self._get_obs()

    def _get_obs(self) -> pd.Series:
        return self.training_data.iloc[self.current_step].drop("time").values

    def step(self, action):
        # Take one step in the environment given the action and return the next observation, reward, done, and info
        if self.done:
            raise ValueError("[ERROR]: Episode is done. Call 'reset()' to start a new episode.")

        self.current_step += 1
        if self.current_step >= self.total_steps:
            self.done = True

        close = self.training_data.iloc[self.current_step].close        
        open = self.training_data.iloc[self.current_step].open        

        # Buy action
        if action == 0:
            reward = (close - open)
        elif action == 1:
            reward = (open - close)
        else:
            if len(self.account.positions):
                pass
            else:
                # Nothing happened
                reward = 0
        

        

        # Perform the action and update the environment state
        # Calculate the reward based on your trading strategy
        if len(self.account.history_deals):
            

        # Example reward calculation:
        reward = 1 if (action == 0 and self.data.iloc[self.current_step]['buy_signal']) else 0

        # Get the next observation after taking the action
        next_observation = self._get_obs()

        return next_observation, reward, self.done, {}


In [79]:
x = TradingEnv(n_rates)

x.reset()

[INFO]: Successfull backtest account create


array([139.459, 139.471, 139.446, 139.448, 209], dtype=object)

IndexError: list index out of range

In [356]:
from AlgorithmicTrading.strategies.perceptron import Strategy
from AlgorithmicTrading.account import AccountBacktest, AccountLive
from AlgorithmicTrading.models.metatrader import ENUM_ACCOUNT_MARGIN_MODE, ENUM_TIMEFRAME
from AlgorithmicTrading.rates import Rates
from AlgorithmicTrading.utils.files import write_file, read_file
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
import MetaTrader5 as mt5
import pandas as pd
import datetime
from AlgorithmicTrading.trade.trade import Trade

In [68]:
# Netting account
account = AccountLive.login(login=5013526569, server="MetaQuotes-Demo", password="h3xhhvuo")
# Backtest account
account = AccountBacktest.login(balance=100_000, margin_mode=ENUM_ACCOUNT_MARGIN_MODE.ACCOUNT_MARGIN_MODE_RETAIL_NETTING)
account

[INFO]: Successfull login to #5013526569 account
[INFO]: Successfull backtest account create


MqlAccountInfo(login=9999999, trade_mode=<ENUM_ACCOUNT_TRADE_MODE.ACCOUNT_TRADE_MODE_DEMO: 0>, leverage=100, limit_orders=200, margin_so_mode=<ENUM_ACCOUNT_STOPOUT_MODE.ACCOUNT_STOPOUT_MODE_PERCENT: 0>, trade_allowed=True, trade_expert=True, margin_mode=<ENUM_ACCOUNT_MARGIN_MODE.ACCOUNT_MARGIN_MODE_RETAIL_NETTING: 0>, currency_digits=2, fifo_close=False, balance=100000.0, credit=0.0, profit=0.0, equity=100000.0, margin=0.0, margin_free=100000.0, margin_level=0.0, margin_so_call=50.0, margin_so_so=30.0, margin_initial=0.0, margin_maintenance=0.0, assets=0.0, liabilities=0.0, commission_blocked=0.0, name='Backtest Name', server='Backtest Server', currency='USD', company='Backtest Company', orders=[], positions=[], history_deals=[], is_backtest_account=True)

In [380]:
account.positions[0].__dict__

{'ticket': 1690891258098218,
 'time': datetime.datetime(2023, 8, 1, 12, 0, 58, 98218, tzinfo=datetime.timezone.utc),
 'time_msc': datetime.datetime(2023, 8, 1, 12, 0, 58, 98218, tzinfo=datetime.timezone.utc),
 'time_update': datetime.datetime(2023, 8, 1, 12, 0, 58, tzinfo=datetime.timezone.utc),
 'time_update_msc': datetime.datetime(2023, 8, 1, 12, 0, 58, 98218, tzinfo=datetime.timezone.utc),
 'type': <ENUM_POSITION_TYPE.POSITION_TYPE_SELL: 1>,
 'magic': 9999,
 'identifier': 1690891258098218,
 'reason': <ENUM_POSITION_REASON.POSITION_REASON_EXPERT: 3>,
 'volume': 1.0,
 'price_open': 142.164,
 'sl': 0.0,
 'tp': 0.0,
 'price_current': 142.164,
 'swap': 0.0,
 'profit': 0.0,
 'symbol': 'USDJPY',
 'comment': '',
 'external_id': None}

In [378]:
a = Trade(account, 9999, backtest_financial_data=n_rates.iloc[:-100])
a.sell("USDJPY", 1)

True

In [354]:
# Reset data
n_rates = Rates.get_last_n_candles("USDJPY", timeframe=ENUM_TIMEFRAME.TIMEFRAME_M5).reset_index()
n_rates

Unnamed: 0,time,open,high,low,close,tick_volume
0,2023-06-13 06:45:00+00:00,139.419,139.439,139.404,139.405,189
1,2023-06-13 06:50:00+00:00,139.405,139.426,139.390,139.403,249
2,2023-06-13 06:55:00+00:00,139.403,139.405,139.385,139.392,180
3,2023-06-13 07:00:00+00:00,139.392,139.412,139.384,139.406,307
4,2023-06-13 07:05:00+00:00,139.405,139.426,139.392,139.423,288
...,...,...,...,...,...,...
9995,2023-08-01 02:00:00+00:00,142.251,142.271,142.228,142.264,121
9996,2023-08-01 02:05:00+00:00,142.264,142.312,142.264,142.290,116
9997,2023-08-01 02:10:00+00:00,142.289,142.356,142.282,142.354,213
9998,2023-08-01 02:15:00+00:00,142.353,142.353,142.301,142.322,115


In [65]:
n_rates = read_file("EnvironmentTrainingData.feather")
x = n_rates.iloc[0].drop("time")

# np.array([], dtype=np.float)

x["close"].reshape(-1)
# np.array(x, dtype=np.float64)

array([139.393])

In [16]:
data = Strategy.features_engineering(n_rates).dropna()
data

Unnamed: 0,open,high,low,close,tick_volume,Target - returns,Feature - 5 SMA,Feature - 10 SMA,Feature - 20 SMA,Feature - 30 SMA,...,Feature - 21 RSI SMA,Feature - MACD Line,Feature - MACD Signal Line,Feature - MACD Diff,Feature - Bollinger Bands High Band,Feature - Bollinger Bands Mid Band,Feature - Bollinger Bands Low Band,Feature - Bollinger Bands P Band,Feature - Bollinger Bands W Band,Feature - Acc/Dist Index
33,139.435,139.461,139.408,139.411,370,-0.044,139.432109,139.456566,139.483871,139.494184,...,45.334933,-0.019830,-0.005289,-0.014541,139.651661,139.51515,139.378639,0.118529,0.195694,-736.819301
34,139.412,139.415,139.353,139.368,455,0.002,139.410740,139.440449,139.472493,139.485170,...,43.801340,-0.026028,-0.009437,-0.016591,139.652226,139.50470,139.357174,0.036693,0.211500,-971.658010
35,139.365,139.387,139.352,139.367,223,0.003,139.396160,139.427085,139.462164,139.476786,...,42.507263,-0.030668,-0.013683,-0.016984,139.648212,139.49385,139.339488,0.089116,0.221318,-1003.515153
36,139.366,139.386,139.352,139.369,257,-0.004,139.387106,139.416517,139.453067,139.469188,...,41.476950,-0.033793,-0.017705,-0.016088,139.629981,139.48060,139.331219,0.126457,0.214196,-1003.515153
37,139.369,139.386,139.362,139.365,267,0.023,139.379738,139.407146,139.444489,139.461887,...,40.546347,-0.036176,-0.021399,-0.014777,139.608417,139.46740,139.326383,0.136923,0.202222,-1203.765153
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9994,141.125,141.165,141.120,141.162,126,-0.001,141.123056,141.103598,141.069691,141.028919,...,63.395164,0.050861,0.053157,-0.002296,141.167839,141.08780,141.007761,0.963524,0.113460,80812.296955
9995,141.163,141.172,141.147,141.162,188,0.005,141.136037,141.114216,141.078482,141.037505,...,63.746978,0.052272,0.052980,-0.000708,141.170129,141.09620,141.022271,0.945024,0.104792,80849.896955
9996,141.163,141.177,141.163,141.168,150,0.003,141.146691,141.123995,141.087008,141.045924,...,64.100195,0.053261,0.053036,0.000224,141.179176,141.09905,141.018924,0.930259,0.113574,80807.039812
9997,141.168,141.173,141.154,141.171,168,-0.009,141.154794,141.132541,141.095007,141.053993,...,64.431269,0.053667,0.053162,0.000505,141.186573,141.10125,141.015927,0.908743,0.120938,80939.671391


In [9]:
X = data.drop(columns=["Target - returns"]).values


p20 = data["Target - returns"].quantile(.2) 
p80 = data["Target - returns"].quantile(.8)
y = (data["Target - returns"] > 0).values
# y = (data["Target - returns"] > 0).values
y

array([False, False, False, ...,  True,  True, False])

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [14]:
model = MLPClassifier(hidden_layer_sizes=[50, 50, 50], random_state=42)
pipeline = make_pipeline(StandardScaler(), model)
pipeline.fit(X_train, y_train)
y_pred = pipeline.predict(X_test)
y_pred = (y_pred >= 0.5)

# mean_squared_error(y_pred, y_test, squared=False)
print(str(round(accuracy_score(y_pred=y_pred, y_true=y_test) * 100)) + "%")

50%


