In [1]:
import random
import math
import numpy as np
from collections import deque
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
import pdb
import csv
import random

In [2]:
class Environment:
	def __init__(self, csv_path):
		self.points = 0.00001
		self.spread_points = 5
		self.tp_points = 500
		self.sl_points = 500

		self.dataset = []
		self.first_tick = 10;
		self.next_index = self.first_tick;


		with open(csv_path, newline='') as csvfile:
			reader = csv.reader(csvfile, delimiter=';')
			for row in reader:
				date, hour, \
				op, high, low, close, \
				tick_vol, vol, spread = row
				
				entry = {
					'date': date,
					'hour': hour,
					'tick_vol': float(tick_vol),
					'vol': float(vol),
					'open': float(op),
					'close': float(close),
					'high': float(high),
					'low': float(low),
				}
				self.dataset.append(entry)

		self.input_size = len(self.__build_state(0)[0])
		self.action_space = [0, 1, 2] # 0: pass, 1: long, 2: short

	def report(self):
		return "Long Trades: {} ({} won), Short Trades: {} ({} won), won %: {}, profit: {} (gross gain: {}, gross loss: {})".format(
			self.long_trades, self.long_trades_won, self.short_trades, self.short_trades_won,
			(self.long_trades_won + self.short_trades_won) / (self.long_trades + self.short_trades),
			round(self.gross_profit + self.gross_loss), round(self.gross_profit), round(self.gross_loss)
		)

	def reset(self):
		self.next_index = self.first_tick
		self.short_trades = 0
		self.long_trades = 0
		self.long_trades_won = 0
		self.short_trades_won = 0
		self.short_trades_lost = 0
		self.gross_profit = 0
		self.gross_loss = 0

		return self.step(self.action_space[0])[0]

	def step(self, action):
		reward, nindex = self.__calculate_reward(action, self.next_index)

		distance = max(1, nindex - self.next_index)
		self.next_index = nindex
		next_state, has_next_state = self.__build_state(self.next_index)

		done = False
		if not has_next_state: done = True

		return (next_state, reward / distance, done)

	def __build_state(self, index):
		if index >= len(self.dataset):
			return (None, False)

		row = self.dataset[index]
		hour = tf.keras.utils.to_categorical(row['hour'].split(':')[0], 24)
		
		state = []

		for i in range(index,index-self.first_tick,-1):
			if not self.dataset[i]:
				state.append(self.dataset[i]['close'])
				state.append(self.dataset[i]['open'])
				state.append(self.dataset[i]['high'])
				state.append(self.dataset[i]['low'])
			else:
				state.append(row['close'])
				state.append(row['open'])
				state.append(row['high'])
				state.append(row['low'])

		return (np.concatenate((hour, state)), True)
		#return (state, True)

	def __calculate_reward(self, action, index):
		if action == 0: # pass
			reward = -1
			nindex = index + 1
		if action == 1: # long
			reward, nindex = self.__calculate_position_reward(True, index)
			self.long_trades += 1

			if reward > 0: self.long_trades_won += 1
			self.gross_profit += max(0, reward)
			self.gross_loss += min(0, reward)
		if action == 2: # short
			reward, nindex = self.__calculate_position_reward(False, index)
			self.short_trades += 1

			if reward > 0: self.short_trades_won += 1
			self.gross_profit += max(0, reward)
			self.gross_loss += min(0, reward)
		

		return (reward, nindex)


	def __calculate_position_reward(self, long, index):
		open_bid, open_ask = \
			(
				self.dataset[index]['close'] - self.__points(self.spread_points) / 2,
				self.dataset[index]['close'] + self.__points(self.spread_points) / 2
			)
		
		open_price = open_ask if long else open_bid
		if long:
			open_position_tp_price = open_bid + self.__points(self.tp_points) + self.__points(self.spread_points)
			open_position_sl_price = open_bid - self.__points(self.sl_points) + self.__points(self.spread_points)
		else:
			open_position_tp_price = open_ask - self.__points(self.tp_points) - self.__points(self.spread_points)
			open_position_sl_price = open_ask + self.__points(self.sl_points) - self.__points(self.spread_points)

		index += 1
		while index < len(self.dataset):
			if long:
				high_bid = self.dataset[index]['high'] - self.__points(self.spread_points) / 2
				low_bid = self.dataset[index]['low'] - self.__points(self.spread_points) / 2

				if low_bid <= open_position_sl_price:
					# print("LONG SL ({}):  From {} to {} ({})".format(index, open_price, open_position_sl_price, round((open_position_sl_price - open_price) / self.points)))
					return ((open_position_sl_price - open_price) / self.points, index + 1)
				if high_bid >= open_position_tp_price:
					# print("LONG TP ({}):  From {} to {} ({})".format(index, open_price, open_position_tp_price, round((open_position_tp_price - open_price) / self.points)))
					return ((open_position_tp_price - open_price) / self.points, index + 1)
			else:
				high_ask = self.dataset[index]['high'] + self.__points(self.spread_points) / 2
				low_ask = self.dataset[index]['low'] + self.__points(self.spread_points) / 2

				if high_ask >= open_position_sl_price:
					# print("SHORT SL ({}): From {} to {} ({})".format(index, open_price, open_position_sl_price, round((open_price - open_position_sl_price) / self.points)))
					return ((open_price - open_position_sl_price) / self.points, index + 1)
				if low_ask <= open_position_tp_price:
					# print("SHORT TP ({}): From {} to {} ({})".format(index, open_price, open_position_tp_price, round((open_price - open_position_tp_price) / self.points)))
					return ((open_price - open_position_tp_price) / self.points, index + 1)

			index += 1

		# if I'm here, it's end of the dataset
		return (0, index)


	def __points(self, val):
		return val * 0.00001



In [3]:
EPOCHS = 10
THRESHOLD = 195
MONITOR = True

class DQN:
	def __init__(self, env_string, batch_size=128):
		self.env = Environment('EURUSD_H1.csv')
		self.memory = deque(maxlen=1000)
		self.input_size = self.env.input_size
		self.action_size = len(self.env.action_space)
		self.batch_size = batch_size
		self.gamma = 0.0
		self.epsilon = 1.0
		self.epsilon_min = 0.0001
		self.epsilon_decay = 0.9998
		alpha=0.01
		alpha_decay=0.01
		dropout = 0.3

		# Init model
		self.model = Sequential()
		self.model.add(Dense(24 + 8, input_dim=self.input_size, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		# self.model.add(Dropout(dropout))
		self.model.add(Dense(64, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		self.model.add(Dropout(dropout))
		self.model.add(Dense(32, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		self.model.add(Dropout(dropout))
		self.model.add(Dense(self.action_size, activation='tanh'))
		self.model.compile(loss='mse', optimizer=Adam(lr=alpha, decay=alpha_decay))

		self.model.summary()

	def remember(self, state, action, reward, next_state, done):
		self.memory.append((state, action, reward, next_state, done))

	def replay(self, batch_size):
		x_batch, y_batch = [], []
		minibatch = random.sample(self.memory, min(len(self.memory), batch_size))

		for state, action, reward, next_state, done in minibatch:
			y_target = self.model.predict(state) # y_target = [[1, 0, -1]]
			# print("action: {}, reward: {}".format(action, reward))
			# print(y_target[0])
			# print(reward)
			reward = max(min(reward, 1), -1)
			# print(y_target[0])
			# print("|")
			y_target[0][action] = reward if (done or self.gamma == 0) else reward + self.gamma * np.max(self.model.predict(next_state)[0])
			# print(y_target[0])
			# print(y_target[0][action])
			# print(self.model.predict(next_state))
			# y_target[0][action] = max(min(reward, 1), 0)
			# print(y_target[0])
			x_batch.append(state[0])
			y_batch.append(y_target[0])

		self.model.fit(np.array(x_batch), np.array(y_batch), batch_size=len(x_batch), verbose=1)

	def train(self):
		scores = deque(maxlen=100)
		avg_scores = []
		for e in range(EPOCHS):
			print("Training ({}):".format(e + 1))

			state = self.env.reset()
			state = self.preprocess_state(state)
			done = False

			while not done:
				action = self.choose_action(state, self.epsilon)


				# next_index = self.env.next_index
				next_state, reward, done = self.env.step(action)

				# if action == 1:
				# 	self.env.next_index = next_index
				# 	ns, rew, don = self.env.step(2)
				# 	print("1: {} => 2: {}".format(reward, rew))
				# elif action == 2:
				# 	self.env.next_index = next_index
				# 	ns, rew, don = self.env.step(1)
				# 	print("2: {} => 1: {}".format(reward, rew))


				next_state = self.preprocess_state(next_state)
				self.remember(state, action, reward, next_state, done)
				state = next_state
				self.epsilon = max(self.epsilon_min, self.epsilon_decay*self.epsilon)
				if self.env.next_index >= int(len(self.env.dataset) * 0.7): break

			self.replay(self.batch_size)
			print(self.env.report())

			# test on unoptimized data
			print("Evaluating: ")

			state = self.env.reset()
			self.env.next_index = int(len(self.env.dataset) * 0.7) + 1
			state = self.preprocess_state(state)
			done = False

			while not done:
				action = self.choose_action(state, -1)

				next_state, reward, done = self.env.step(action)
				if done: break

				state = self.preprocess_state(next_state)
				if self.env.next_index >= len(self.env.dataset) - 2: break

			print(self.env.report())
			print("Current epsilon: {}".format(self.epsilon))
			print()
			self.memory.clear()


		print('Did not solve after {} episodes :('.format(e))
		return avg_scores

	def choose_action(self, state, epsilon):
		# return random.choice(self.env.action_space)
		if np.random.random() <= epsilon:
			return random.choice(self.env.action_space)
		else:
			return np.argmax(self.model(state, training=epsilon>0))


	def preprocess_state(self, state):
		return np.reshape(state, [1, self.input_size])

env_string = 'CartPole-v0'
agent = DQN(env_string)
scores = agent.train()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 32)                2080      
_________________________________________________________________
dense_1 (Dense)              (None, 64)                2112      
_________________________________________________________________
dropout (Dropout)            (None, 64)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 32)                2080      
_________________________________________________________________
dropout_1 (Dropout)          (None, 32)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 3)                 99        
Total params: 6,371
Trainable params: 6,371
Non-trainable params: 0
______________________________________________________

In [None]:
EPOCHS = 100
scores = agent.train()

Training (1):
Long Trades: 4451 (2263 won), Short Trades: 946 (481 won), won %: 0.5084306095979247, profit: 45500 (gross gain: 1372000, gross loss: -1326500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Training (2):
Long Trades: 4834 (2452 won), Short Trades: 563 (287 won), won %: 0.5075041689827682, profit: 40500 (gross gain: 1369500, gross loss: -1329000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Training (3):
Long Trades: 5171 (2609 won), Short Trades: 233 (119 won), won %: 0.5048112509252406, profit: 26000 (gross gain: 1364000, gross loss: -1338000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Trai

Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Training (22):
Long Trades: 5230 (2627 won), Short Trades: 170 (75 won), won %: 0.5003703703703704, profit: 2000 (gross gain: 1351000, gross loss: -1349000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Training (23):
Long Trades: 5257 (2644 won), Short Trades: 140 (63 won), won %: 0.5015749490457662, profit: 8500 (gross gain: 1353500, gross loss: -1345000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.0001

Training (24):
Long Trades: 5234 (2644 won), Short Trades: 165 (88 won), won %: 0.5060196332654195, profit: 32500 (gross gain: 1366000, gross loss: -1333500)
Evaluating: 
Long T

In [6]:
model2 = tf.keras.models.load_model('C:/MQLProjects/MQL5/Experts/Forex/forex.model')
agent.model = model2
EPOCHS = 100
scores = agent.train()

Training (1):
Long Trades: 3977 (1975 won), Short Trades: 1411 (658 won), won %: 0.4886785449146251, profit: -61000 (gross gain: 1316500, gross loss: -1377500)
Evaluating: 
Long Trades: 1153 (578 won), Short Trades: 0 (0 won), won %: 0.5013009540329575, profit: 2000 (gross gain: 289000, gross loss: -287000)
Current epsilon: 0.07101497595309675

Training (2):
Long Trades: 4471 (2235 won), Short Trades: 563 (257 won), won %: 0.4950337703615415, profit: -25000 (gross gain: 1246000, gross loss: -1271000)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 1138 (552 won), won %: 0.4845094664371773, profit: -18000 (gross gain: 281500, gross loss: -299500)
Current epsilon: 0.014557878177545953

Training (3):
Long Trades: 1856 (930 won), Short Trades: 3551 (1721 won), won %: 0.49029036434251894, profit: -52500 (gross gain: 1325500, gross loss: -1378000)
Evaluating: 
Long Trades: 0 (0 won), Short Trades: 1162 (565 won), won %: 0.48623063683304646, profit: -16000 (gross gain: 282500, gross loss

Long Trades: 0 (0 won), Short Trades: 1162 (565 won), won %: 0.48623063683304646, profit: -16000 (gross gain: 282500, gross loss: -298500)
Current epsilon: 0.01

Training (22):
Long Trades: 1528 (817 won), Short Trades: 3866 (1921 won), won %: 0.5076010381905821, profit: 41000 (gross gain: 1369000, gross loss: -1328000)
Evaluating: 
Long Trades: 158 (82 won), Short Trades: 1003 (490 won), won %: 0.4926787252368648, profit: -8500 (gross gain: 286000, gross loss: -294500)
Current epsilon: 0.01

Training (23):
Long Trades: 2766 (1372 won), Short Trades: 2642 (1247 won), won %: 0.4842825443786982, profit: -85000 (gross gain: 1309500, gross loss: -1394500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (24):
Long Trades: 3459 (1727 won), Short Trades: 1943 (924 won), won %: 0.4907441688263606, profit: -50000 (gross gain: 1325500, gross loss: -1375500)
Evaluat

Long Trades: 5160 (2597 won), Short Trades: 236 (110 won), won %: 0.5016679021497406, profit: 9000 (gross gain: 1353500, gross loss: -1344500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (43):
Long Trades: 5209 (2614 won), Short Trades: 182 (82 won), won %: 0.5000927471712113, profit: 500 (gross gain: 1348000, gross loss: -1347500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (44):
Long Trades: 5272 (2659 won), Short Trades: 121 (61 won), won %: 0.5043575004635639, profit: 23500 (gross gain: 1360000, gross loss: -1336500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (45):
Long Trades:

Long Trades: 3298 (1684 won), Short Trades: 2101 (1047 won), won %: 0.5058344137803297, profit: 31500 (gross gain: 1365500, gross loss: -1334000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (64):
Long Trades: 3620 (1837 won), Short Trades: 1767 (867 won), won %: 0.501949136810841, profit: 10500 (gross gain: 1352000, gross loss: -1341500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (65):
Long Trades: 3671 (1829 won), Short Trades: 1733 (822 won), won %: 0.4905625462620281, profit: -51000 (gross gain: 1325500, gross loss: -1376500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (66):
Lon

Long Trades: 5384 (2714 won), Short Trades: 16 (7 won), won %: 0.5038888888888889, profit: 21000 (gross gain: 1360500, gross loss: -1339500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (85):
Long Trades: 5364 (2704 won), Short Trades: 21 (10 won), won %: 0.5039925719591458, profit: 21500 (gross gain: 1357000, gross loss: -1335500)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (86):
Long Trades: 5378 (2706 won), Short Trades: 17 (7 won), won %: 0.502873030583874, profit: 15500 (gross gain: 1356500, gross loss: -1341000)
Evaluating: 
Long Trades: 1156 (580 won), Short Trades: 0 (0 won), won %: 0.5017301038062284, profit: 2500 (gross gain: 290000, gross loss: -287500)
Current epsilon: 0.01

Training (87):
Long Trades: 538

In [20]:
state = agent.env.reset()
agent.env.next_index = 0
state = agent.preprocess_state(state)
done = False
long, short, pas = 0, 0 ,0
actions = []

while not done:
	action = agent.choose_action(state, -1)
	next_state, reward, done = agent.env.step(action)
	if done: break
	state = agent.preprocess_state(next_state)
	if action == 1:
		long += 1
	elif action == 0:
		pas += 1
	else:
		short += 1   
	actions.append(action)
	if agent.env.next_index >= len(agent.env.dataset) -1: break

print(agent.env.report())
print("pas: ", pas)
print("short: ", short)
print("long: ", long)

Long Trades: 5824 (2948 won), Short Trades: 729 (370 won), won %: 0.5063329772623226, profit: 42000 (gross gain: 1659000, gross loss: -1617000)
pas:  0
short:  729
long:  5823


In [30]:
agent.env.step(1)

(array([0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    ,
        0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 1.    ,
        0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    ,
        1.2299, 1.2266, 1.2312, 1.2263, 1.2299, 1.2266, 1.2312, 1.2263,
        1.2299, 1.2266, 1.2312, 1.2263, 1.2299, 1.2266, 1.2312, 1.2263,
        1.2299, 1.2266, 1.2312, 1.2263, 1.2299, 1.2266, 1.2312, 1.2263]),
 83.33333333333525,
 False)

In [None]:
agent.model.save('forex.model')