In [1]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import datetime
import tensorflow as tf
from datetime import datetime, timedelta
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import Huber
from tensorflow.keras.callbacks import TensorBoard
from collections import deque
import random

In [11]:
class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential()
        model.add(LSTM(32, input_shape=(self.state_size, 1)))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(self.action_size, activation='linear'))  # Output size set to action_size
        model.compile(optimizer=Adam(lr=self.learning_rate), loss=Huber())
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        act_values = self.model.predict(state)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = (reward + self.gamma * np.amax(self.model.predict(next_state)[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [12]:
import MetaTrader5 as mt5
mt5.initialize()

bars = mt5.copy_rates_from_pos("GOLD", mt5.TIMEFRAME_M15, 0, 99200)
df = pd.DataFrame(bars)

In [13]:
df

Unnamed: 0,time,open,high,low,close,tick_volume,spread,real_volume
0,1557813600,1299.15,1299.35,1298.16,1298.17,351,40,0
1,1557814500,1298.21,1299.25,1298.17,1298.45,349,40,0
2,1557815400,1298.40,1298.59,1297.69,1297.87,264,40,0
3,1557816300,1297.78,1297.94,1297.25,1297.30,280,40,0
4,1557817200,1297.25,1297.59,1296.75,1297.06,355,40,0
...,...,...,...,...,...,...,...,...
99195,1690212600,1960.22,1960.64,1959.41,1959.41,1088,29,0
99196,1690213500,1959.41,1960.28,1958.67,1959.55,1138,29,0
99197,1690214400,1959.57,1960.08,1958.22,1958.68,1409,29,0
99198,1690215300,1958.68,1960.31,1958.43,1959.69,1087,29,0


In [14]:
df['time'] = pd.to_datetime(df['time'], unit='s')
res = df.drop(['open', 'high', 'low', 'tick_volume', 'spread', 'real_volume'], axis=1)
price = np.array([float(bars[i][4]) for i in range(99200)])
price = price.reshape(99200,1)

In [15]:
scaler = StandardScaler()
scaler.fit(price[:74400])
price = scaler.transform(price)
df = pd.DataFrame(price.reshape(12400,8),columns=['A','B','C','D','E','F','G','Target'])

In [16]:
df

Unnamed: 0,A,B,C,D,E,F,G,Target
0,-2.504214,-2.502597,-2.505946,-2.509236,-2.510621,-2.508947,-2.503810,-2.503463
1,-2.504214,-2.507158,-2.510852,-2.510391,-2.509698,-2.509178,-2.506811,-2.505022
2,-2.507389,-2.507851,-2.509178,-2.507273,-2.516856,-2.513508,-2.509178,-2.500231
3,-2.499827,-2.500519,-2.498441,-2.497229,-2.507504,-2.503001,-2.504098,-2.505311
4,-2.508947,-2.514431,-2.515066,-2.518068,-2.512411,-2.520377,-2.529960,-2.526785
...,...,...,...,...,...,...,...,...
12395,1.325962,1.335025,1.342588,1.342934,1.334737,1.332312,1.324577,1.319959
12396,1.313897,1.333005,1.342414,1.343049,1.347148,1.347263,1.341837,1.345416
12397,1.347379,1.354826,1.355634,1.356962,1.344550,1.349515,1.348822,1.349515
12398,1.358347,1.345820,1.332254,1.333524,1.312974,1.323884,1.308009,1.316149


In [17]:
#75% train , 25% test

X_train = df.iloc[:9300,:7]
Y_train = df.iloc[:9300,-1]

X_test = df.iloc[9301:12400,:7]
Y_test = df.iloc[9301:12400,-1]

X_train = np.array(X_train)
Y_train = np.array(Y_train)
X_test = np.array(X_test)
Y_test = np.array(Y_test)

In [18]:
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

In [19]:

state_size = X_train.shape[1]
action_size = 3 # actions (buy, hold, sell)
agent = DQNAgent(state_size, action_size)


  super(Adam, self).__init__(name, **kwargs)


In [None]:
batch_size = 32
episodes = 100
for e in range(episodes):
    state = X_train[0]
    state = np.reshape(state, [1, state_size, 1])
    for t in range(1, len(X_train)):
        action = agent.act(state)
        next_state = np.reshape(X_train[t], [1, state_size, 1])
        reward = Y_train[t]
        done = False
        if t == len(X_train) - 1:
            done = True
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        if done:
            print("episode: {}/{}, score: {}, epsilon: {:.2}"
                  .format(e, episodes, t, agent.epsilon))
            break
    if len(agent.memory) > batch_size:
        agent.replay(batch_size)

episode: 0/100, score: 9299, epsilon: 1.0
episode: 1/100, score: 9299, epsilon: 0.99
episode: 2/100, score: 9299, epsilon: 0.99
episode: 3/100, score: 9299, epsilon: 0.99
episode: 4/100, score: 9299, epsilon: 0.98
episode: 5/100, score: 9299, epsilon: 0.98
episode: 6/100, score: 9299, epsilon: 0.97
episode: 7/100, score: 9299, epsilon: 0.97
episode: 8/100, score: 9299, epsilon: 0.96
episode: 9/100, score: 9299, epsilon: 0.96
episode: 10/100, score: 9299, epsilon: 0.95
episode: 11/100, score: 9299, epsilon: 0.95
episode: 12/100, score: 9299, epsilon: 0.94
episode: 13/100, score: 9299, epsilon: 0.94
episode: 14/100, score: 9299, epsilon: 0.93
episode: 15/100, score: 9299, epsilon: 0.93
episode: 16/100, score: 9299, epsilon: 0.92
episode: 17/100, score: 9299, epsilon: 0.92
episode: 18/100, score: 9299, epsilon: 0.91
episode: 19/100, score: 9299, epsilon: 0.91
episode: 20/100, score: 9299, epsilon: 0.9
episode: 21/100, score: 9299, epsilon: 0.9
episode: 22/100, score: 9299, epsilon: 0.9
ep

In [11]:
total_profit = 0
for t in range(len(X_test)):
    state = np.reshape(X_test[t], [1, state_size, 1])
    action = agent.act(state)
    reward = Y_test[t]
    total_profit += reward
    next_state = np.reshape(X_test[t+1], [1, state_size, 1]) if t < len(X_test)-1 else None
    done = True if t == len(X_test) - 1 else False
    agent.remember(state, action, reward, next_state, done)

print("Total profit: ", total_profit)

Total profit:  28979.789558722212


In [29]:
agent.model.save('trained_model.h5')  
print("Model saved successfully!")

Model saved successfully!
