In [1]:
import pdb
import csv
import numpy as np
import tensorflow as tf
import random
import math
import numpy as np
from collections import deque
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
from environment import Environment

ModuleNotFoundError: No module named 'tensorflow'

In [2]:
class Environment:
	def __init__(self, csv_path):
		self.points = 0.00001
		self.spread_points = 5
		self.tp_points = 500
		self.sl_points = 500

		self.dataset = []
		self.first_tick = 2
		self.next_index = self.first_tick

		with open(csv_path, newline='') as csvfile:
			reader = csv.reader(csvfile, delimiter=';')
			for row in reader:
				date, hour, tick_count, tick_count_change_percent, \
				op, close, high, low, \
				price_change, price_change_normalized, \
				ma4, ma4_change, ma4_change_normalized, \
				ma12, ma12_change, ma12_change_normalized, \
				ma50, ma50_change, ma50_change_normalized, \
				ma200, ma200_change, ma200_change_normalized, \
				sar, sar_boolean, \
				sar_1, sar_1_boolean, \
				sar_2, sar_3_boolean, \
				sar_3, sar_2_boolean  = row
				
				entry = {
					'date': date,
					'hour': hour[0:1],
					'tick_count': tick_count,
					'tick_count_change_percent': float(tick_count_change_percent),
					'open': float(op),
					'close': float(close),
					'high': float(high),
					'low': float(low),
					'price_change': float(price_change),
					'price_change_normalized': float(price_change_normalized),
					'ma4': float(ma4),
					'ma4_change': float(ma4_change),
					'ma4_change_normalized': float(ma4_change_normalized),
					'ma12': float(ma12),
					'ma12_change': float(ma12_change),
					'ma12_change_normalized': float(ma12_change_normalized),
					'ma50': float(ma50),
					'ma50_change': float(ma50_change),
					'ma50_change_normalized': float(ma50_change_normalized),
					'ma200': float(ma200),
					'ma200_change': float(ma200_change),
					'ma200_change_normalized': float(ma200_change_normalized),
					'sar': float(sar),
					'sar_boolean': (1 if sar_boolean == 'true' else 0),
					'sar_1': float(sar_1),
					'sar_1_boolean': (1 if sar_1_boolean == 'true' else 0),
					'sar_2': float(sar_2),
					'sar_2_boolean': (1 if sar_3_boolean == 'true' else 0),
					'sar_3': float(sar_2),
					'sar_3_boolean': (1 if sar_3_boolean == 'true' else 0)
				}
				self.dataset.append(entry)

		self.input_size = len(self.__build_state(0)[0])
		self.action_space = [0, 1, 2] # 0: pass, 1: long, 2: short

	def report(self):
		return "Long Trades: {} ({} won), Short Trades: {} ({} won), won %: {}, profit: {} (gross gain: {}, gross loss: {})".format(
			self.long_trades, self.long_trades_won, self.short_trades, self.short_trades_won,
			(self.long_trades_won + self.short_trades_won) / (self.long_trades + self.short_trades),
			round(self.gross_profit + self.gross_loss), round(self.gross_profit), round(self.gross_loss)
		)

	def reset(self):
		self.next_index = self.first_tick
		self.short_trades = 0
		self.long_trades = 0
		self.long_trades_won = 0
		self.short_trades_won = 0
		self.short_trades_lost = 0
		self.gross_profit = 0
		self.gross_loss = 0

		return self.step(self.action_space[0])[0]

	def step(self, action):
		reward, nindex = self.__calculate_reward(action, self.next_index)

		distance = max(1, nindex - self.next_index)
		self.next_index = nindex
		next_state, has_next_state = self.__build_state(self.next_index)

		done = False
		if not has_next_state: done = True

		return (next_state, reward / distance, done)

	def __build_state(self, index):
		if index >= len(self.dataset):
			return (None, False)

		row = self.dataset[index]
		hour = tf.keras.utils.to_categorical(row['hour'], 24)
		
		state = [
			row['sar_boolean'],
			row['sar_1_boolean'],
			row['sar_2_boolean'],
			row['sar_3_boolean'],
			row['ma4_change_normalized'],
			row['ma12_change_normalized'],
			row['ma50_change_normalized'],
			row['ma200_change_normalized']
		]
		for i in range(index,index-self.first_tick,-1):
			if not self.dataset[i]:
				state.append(self.dataset[i]['close'])
				state.append(self.dataset[i]['open'])
				state.append(self.dataset[i]['high'])
				state.append(self.dataset[i]['low'])
			else:
				state.append(row['close'])
				state.append(row['open'])
				state.append(row['high'])
				state.append(row['low'])

		return (np.concatenate((hour, state)), True)
		# return (state, True)

	def __calculate_reward(self, action, index):
		if action == 0: # pass
			reward = -1
			nindex = index + 1
		if action == 1: # long
			reward, nindex = self.__calculate_position_reward(True, index)
			self.long_trades += 1

			if reward > 0: self.long_trades_won += 1
			self.gross_profit += max(0, reward)
			self.gross_loss += min(0, reward)
		if action == 2: # short
			reward, nindex = self.__calculate_position_reward(False, index)
			self.short_trades += 1

			if reward > 0: self.short_trades_won += 1
			self.gross_profit += max(0, reward)
			self.gross_loss += min(0, reward)
		

		return (reward, nindex)


	def __calculate_position_reward(self, long, index):
		open_bid, open_ask = \
			(
				self.dataset[index]['close'] - self.__points(self.spread_points) / 2,
				self.dataset[index]['close'] + self.__points(self.spread_points) / 2
			)
		
		open_price = open_ask if long else open_bid
		if long:
			open_position_tp_price = open_bid + self.__points(self.tp_points) + self.__points(self.spread_points)
			open_position_sl_price = open_bid - self.__points(self.sl_points) + self.__points(self.spread_points)
		else:
			open_position_tp_price = open_ask - self.__points(self.tp_points) - self.__points(self.spread_points)
			open_position_sl_price = open_ask + self.__points(self.sl_points) - self.__points(self.spread_points)

		index += 1
		while index < len(self.dataset):
			if long:
				high_bid = self.dataset[index]['high'] - self.__points(self.spread_points) / 2
				low_bid = self.dataset[index]['low'] - self.__points(self.spread_points) / 2

				if low_bid <= open_position_sl_price:
					# print("LONG SL ({}):  From {} to {} ({})".format(index, open_price, open_position_sl_price, round((open_position_sl_price - open_price) / self.points)))
					return ((open_position_sl_price - open_price) / self.points, index + 1)
				if high_bid >= open_position_tp_price:
					# print("LONG TP ({}):  From {} to {} ({})".format(index, open_price, open_position_tp_price, round((open_position_tp_price - open_price) / self.points)))
					return ((open_position_tp_price - open_price) / self.points, index + 1)
			else:
				high_ask = self.dataset[index]['high'] + self.__points(self.spread_points) / 2
				low_ask = self.dataset[index]['low'] + self.__points(self.spread_points) / 2

				if high_ask >= open_position_sl_price:
					# print("SHORT SL ({}): From {} to {} ({})".format(index, open_price, open_position_sl_price, round((open_price - open_position_sl_price) / self.points)))
					return ((open_price - open_position_sl_price) / self.points, index + 1)
				if low_ask <= open_position_tp_price:
					# print("SHORT TP ({}): From {} to {} ({})".format(index, open_price, open_position_tp_price, round((open_price - open_position_tp_price) / self.points)))
					return ((open_price - open_position_tp_price) / self.points, index + 1)

			index += 1

		# if I'm here, it's end of the dataset
		return (0, index)


	def __points(self, val):
		return val * 0.00001


# env = Environment('gbpusd_dump.csv')
# print(env.reset())
# # print(env.step(0))
# print(env.step(1))
# print(env.report())

In [3]:
EPOCHS = 20
THRESHOLD = 195
MONITOR = True

class DQN:
	def __init__(self, env_string, batch_size=128):
		self.memory = deque(maxlen=1000)
		self.env = Environment('eurousd_data.csv')
		self.input_size = self.env.input_size
		self.action_size = len(self.env.action_space)
		self.batch_size = batch_size
		self.gamma = 0.0 # we want to be greedy
		self.epsilon = 1.0
		self.epsilon_min = 0.01
		self.epsilon_decay = 0.9998
		alpha=0.01
		alpha_decay=0.01
		dropout = 0.3

		# Init model
		self.model = Sequential()
		self.model.add(Dense(24 + 8, input_dim=self.input_size, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		# self.model.add(Dropout(dropout))
		self.model.add(Dense(32, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		self.model.add(Dropout(dropout))
		self.model.add(Dense(32, activation='tanh', kernel_regularizer=l2(0.01), activity_regularizer=l2(0.01)))
		self.model.add(Dropout(dropout))
		self.model.add(Dense(self.action_size, activation='tanh'))
		self.model.compile(loss='mse', optimizer=Adam(lr=alpha, decay=alpha_decay))

		self.model.summary()

	def remember(self, state, action, reward, next_state, done):
		self.memory.append((state, action, reward, next_state, done))

	def replay(self, batch_size):
		x_batch, y_batch = [], []
		minibatch = random.sample(self.memory, min(len(self.memory), batch_size))

		for state, action, reward, next_state, done in minibatch:
			y_target = self.model.predict(state) # y_target = [[1, 0, -1]]
			# print("action: {}, reward: {}".format(action, reward))
			# print(y_target[0])
			# print(reward)
			reward = max(min(reward, 1), -1)
			# print(y_target[0])
			# print("|")
			y_target[0][action] = reward if (done or self.gamma == 0) else reward + self.gamma * np.max(self.model.predict(next_state)[0])
			# print(y_target[0])
			# print(y_target[0][action])
			# print(self.model.predict(next_state))
			# y_target[0][action] = max(min(reward, 1), 0)
			# print(y_target[0])
			x_batch.append(state[0])
			y_batch.append(y_target[0])

		self.model.fit(np.array(x_batch), np.array(y_batch), batch_size=len(x_batch), verbose=1)

	def train(self):
		scores = deque(maxlen=100)
		avg_scores = []
		for e in range(EPOCHS):
			print("Training ({}):".format(e + 1))

			state = self.env.reset()
			state = self.preprocess_state(state)
			done = False

			while not done:
				action = self.choose_action(state, self.epsilon)


				# next_index = self.env.next_index
				next_state, reward, done = self.env.step(action)

				# if action == 1:
				# 	self.env.next_index = next_index
				# 	ns, rew, don = self.env.step(2)
				# 	print("1: {} => 2: {}".format(reward, rew))
				# elif action == 2:
				# 	self.env.next_index = next_index
				# 	ns, rew, don = self.env.step(1)
				# 	print("2: {} => 1: {}".format(reward, rew))


				next_state = self.preprocess_state(next_state)
				self.remember(state, action, reward, next_state, done)
				state = next_state
				self.epsilon = max(self.epsilon_min, self.epsilon_decay*self.epsilon) # decrease epsilon
				if self.env.next_index >= int(len(self.env.dataset) * 0.7): break

			self.replay(self.batch_size)
			print(self.env.report())

			# test on unoptimized data
			print("Evaluating: ")

			state = self.env.reset()
			self.env.next_index = int(len(self.env.dataset) * 0.7) + 1
			state = self.preprocess_state(state)
			done = False

			while not done:
				action = self.choose_action(state, -1)

				next_state, reward, done = self.env.step(action)
				if done: break

				state = self.preprocess_state(next_state)
				if self.env.next_index >= len(self.env.dataset) - 2: break

			print(self.env.report())
			print("Current epsilon: {}".format(self.epsilon))
			print()
			self.memory.clear()


		print('Did not solve after {} episodes :('.format(e))
		return avg_scores

	def choose_action(self, state, epsilon):
		# return random.choice(self.env.action_space)
		if np.random.random() <= epsilon:
			return random.choice(self.env.action_space)
		else:
			return np.argmax(self.model(state, training=epsilon>0))


	def preprocess_state(self, state):
		return np.reshape(state, [1, self.input_size])

env_string = 'CartPole-v0'
agent = DQN(env_string)
scores = agent.train()


NameError: name 'deque' is not defined

In [8]:
EPOCHS = 20
scores = agent.train()

Training (1):
Long Trades: 4030 (2583 won), Short Trades: 2858 (1955 won), won %: 0.6588269454123112, profit: 1094000 (gross gain: 2269000, gross loss: -1175000)
Evaluating: 
Long Trades: 1085 (639 won), Short Trades: 682 (437 won), won %: 0.6089417091114884, profit: 193000 (gross gain: 538000, gross loss: -345000)
Current epsilon: 0.01

Training (2):
Long Trades: 4186 (2671 won), Short Trades: 2705 (1888 won), won %: 0.6615875780002902, profit: 1113500 (gross gain: 2279500, gross loss: -1166000)
Evaluating: 
Long Trades: 1150 (671 won), Short Trades: 616 (404 won), won %: 0.6087202718006794, profit: 192500 (gross gain: 537500, gross loss: -345000)
Current epsilon: 0.01

Training (3):
Long Trades: 4382 (2729 won), Short Trades: 2521 (1757 won), won %: 0.649862378675938, profit: 1034500 (gross gain: 2243000, gross loss: -1208500)
Evaluating: 
Long Trades: 1190 (691 won), Short Trades: 577 (384 won), won %: 0.608375778155065, profit: 192000 (gross gain: 537500, gross loss: -345500)
Curre

In [9]:
EPOCHS = 600
scores = agent.train()

Training (1):
Long Trades: 69 (53 won), Short Trades: 50 (36 won), won %: 0.7478991596638656, profit: 29500 (gross gain: 44500, gross loss: -15000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (2):
Long Trades: 74 (58 won), Short Trades: 46 (36 won), won %: 0.7833333333333333, profit: 34000 (gross gain: 47000, gross loss: -13000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (3):
Long Trades: 75 (57 won), Short Trades: 47 (35 won), won %: 0.7540983606557377, profit: 31000 (gross gain: 46000, gross loss: -15000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (4):
Long Trades: 68 (54 won), Short Tra

Long Trades: 71 (54 won), Short Trades: 47 (34 won), won %: 0.7457627118644068, profit: 29000 (gross gain: 44000, gross loss: -15000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (24):
Long Trades: 72 (56 won), Short Trades: 48 (36 won), won %: 0.7666666666666667, profit: 32000 (gross gain: 46000, gross loss: -14000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (25):
Long Trades: 70 (54 won), Short Trades: 49 (36 won), won %: 0.7563025210084033, profit: 30500 (gross gain: 45000, gross loss: -14500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (26):
Long Trades: 65 (53 won), Short Trades: 53 (39

Long Trades: 75 (55 won), Short Trades: 43 (31 won), won %: 0.7288135593220338, profit: 27000 (gross gain: 43000, gross loss: -16000)
Evaluating: 
Long Trades: 26 (13 won), Short Trades: 16 (6 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (46):
Long Trades: 77 (59 won), Short Trades: 43 (34 won), won %: 0.775, profit: 33000 (gross gain: 46500, gross loss: -13500)
Evaluating: 
Long Trades: 27 (13 won), Short Trades: 16 (6 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (47):
Long Trades: 76 (57 won), Short Trades: 42 (32 won), won %: 0.7542372881355932, profit: 30000 (gross gain: 44500, gross loss: -14500)
Evaluating: 
Long Trades: 28 (14 won), Short Trades: 15 (6 won), won %: 0.46511627906976744, profit: -1000 (gross gain: 10000, gross loss: -11000)
Current epsilon: 0.01

Training (48):
Long Trades: 77 (57 won), Short Trades: 44 (33 won), won %: 

Long Trades: 27 (13 won), Short Trades: 15 (5 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (67):
Long Trades: 75 (54 won), Short Trades: 43 (30 won), won %: 0.711864406779661, profit: 25000 (gross gain: 42000, gross loss: -17000)
Evaluating: 
Long Trades: 27 (13 won), Short Trades: 15 (5 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (68):
Long Trades: 72 (54 won), Short Trades: 46 (33 won), won %: 0.7372881355932204, profit: 28000 (gross gain: 43500, gross loss: -15500)
Evaluating: 
Long Trades: 27 (13 won), Short Trades: 15 (5 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (69):
Long Trades: 78 (57 won), Short Trades: 44 (32 won), won %: 0.7295081967213115, profit: 28000 (gross gain: 44500, gross loss: -16500)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 w

Long Trades: 70 (56 won), Short Trades: 52 (40 won), won %: 0.7868852459016393, profit: 35000 (gross gain: 48000, gross loss: -13000)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (89):
Long Trades: 64 (52 won), Short Trades: 54 (40 won), won %: 0.7796610169491526, profit: 33000 (gross gain: 46000, gross loss: -13000)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (90):
Long Trades: 72 (53 won), Short Trades: 48 (34 won), won %: 0.725, profit: 27000 (gross gain: 43500, gross loss: -16500)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (91):
Long Trades: 68 (54 won), Short Trades: 54 (40 won), won %: 0.

Long Trades: 66 (54 won), Short Trades: 54 (40 won), won %: 0.7833333333333333, profit: 34000 (gross gain: 47000, gross loss: -13000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (111):
Long Trades: 74 (56 won), Short Trades: 47 (35 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (112):
Long Trades: 69 (54 won), Short Trades: 49 (36 won), won %: 0.7627118644067796, profit: 31000 (gross gain: 45000, gross loss: -14000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (113):
Long Trades: 67 (53 won), Short Trades: 51 

Long Trades: 22 (11 won), Short Trades: 20 (8 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (132):
Long Trades: 61 (51 won), Short Trades: 60 (43 won), won %: 0.7768595041322314, profit: 33500 (gross gain: 47000, gross loss: -13500)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (133):
Long Trades: 66 (55 won), Short Trades: 56 (43 won), won %: 0.8032786885245902, profit: 37000 (gross gain: 49000, gross loss: -12000)
Evaluating: 
Long Trades: 26 (12 won), Short Trades: 17 (6 won), won %: 0.4186046511627907, profit: -3000 (gross gain: 9000, gross loss: -12000)
Current epsilon: 0.01

Training (134):
Long Trades: 78 (55 won), Short Trades: 42 (30 won), won %: 0.7083333333333334, profit: 25000 (gross gain: 42500, gross loss: -17500)
Evaluating: 
Long Trades: 27 (13 won), Short Trades: 16 (6 

Long Trades: 73 (56 won), Short Trades: 48 (36 won), won %: 0.7603305785123967, profit: 31500 (gross gain: 46000, gross loss: -14500)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (176):
Long Trades: 74 (57 won), Short Trades: 44 (34 won), won %: 0.7711864406779662, profit: 32000 (gross gain: 45500, gross loss: -13500)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (177):
Long Trades: 69 (54 won), Short Trades: 49 (36 won), won %: 0.7627118644067796, profit: 31000 (gross gain: 45000, gross loss: -14000)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (178):
Long Trades: 68 (54 won), Short Trades: 52 

Long Trades: 79 (59 won), Short Trades: 42 (33 won), won %: 0.7603305785123967, profit: 31500 (gross gain: 46000, gross loss: -14500)
Evaluating: 
Long Trades: 26 (13 won), Short Trades: 16 (6 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (198):
Long Trades: 74 (57 won), Short Trades: 48 (36 won), won %: 0.7622950819672131, profit: 32000 (gross gain: 46500, gross loss: -14500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (199):
Long Trades: 71 (56 won), Short Trades: 50 (38 won), won %: 0.7768595041322314, profit: 33500 (gross gain: 47000, gross loss: -13500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (200):
Long Trades: 73 (58 won), Short Trades: 48 (

Long Trades: 67 (52 won), Short Trades: 53 (37 won), won %: 0.7416666666666667, profit: 29000 (gross gain: 44500, gross loss: -15500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (220):
Long Trades: 70 (55 won), Short Trades: 50 (37 won), won %: 0.7666666666666667, profit: 32000 (gross gain: 46000, gross loss: -14000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (221):
Long Trades: 69 (54 won), Short Trades: 51 (37 won), won %: 0.7583333333333333, profit: 31000 (gross gain: 45500, gross loss: -14500)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (222):
Long Trades: 69 (52 won), Short Trades: 49 (

Long Trades: 64 (52 won), Short Trades: 54 (39 won), won %: 0.7711864406779662, profit: 32000 (gross gain: 45500, gross loss: -13500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (242):
Long Trades: 67 (53 won), Short Trades: 53 (38 won), won %: 0.7583333333333333, profit: 31000 (gross gain: 45500, gross loss: -14500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (243):
Long Trades: 67 (52 won), Short Trades: 53 (37 won), won %: 0.7416666666666667, profit: 29000 (gross gain: 44500, gross loss: -15500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (244):
Long Trades: 72 (55 won), Short Trades: 49 

Long Trades: 75 (56 won), Short Trades: 44 (33 won), won %: 0.7478991596638656, profit: 29500 (gross gain: 44500, gross loss: -15000)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (264):
Long Trades: 71 (54 won), Short Trades: 48 (35 won), won %: 0.7478991596638656, profit: 29500 (gross gain: 44500, gross loss: -15000)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (265):
Long Trades: 72 (54 won), Short Trades: 50 (36 won), won %: 0.7377049180327869, profit: 29000 (gross gain: 45000, gross loss: -16000)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (266):
Long Trades: 66 (54 won), Short Trades: 54 (40

Long Trades: 68 (51 won), Short Trades: 51 (35 won), won %: 0.7226890756302521, profit: 26500 (gross gain: 43000, gross loss: -16500)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (286):
Long Trades: 70 (54 won), Short Trades: 51 (37 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (287):
Long Trades: 72 (57 won), Short Trades: 49 (38 won), won %: 0.7851239669421488, profit: 34500 (gross gain: 47500, gross loss: -13000)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (288):
Long Trades: 67 (55 won), Short Trades: 53 

Long Trades: 68 (53 won), Short Trades: 51 (37 won), won %: 0.7563025210084033, profit: 30500 (gross gain: 45000, gross loss: -14500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (308):
Long Trades: 71 (56 won), Short Trades: 48 (37 won), won %: 0.7815126050420168, profit: 33500 (gross gain: 46500, gross loss: -13000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (309):
Long Trades: 70 (53 won), Short Trades: 49 (35 won), won %: 0.7394957983193278, profit: 28500 (gross gain: 44000, gross loss: -15500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (310):
Long Trades: 71 (54 won), Short Trades: 47 

Long Trades: 73 (55 won), Short Trades: 48 (35 won), won %: 0.743801652892562, profit: 29500 (gross gain: 45000, gross loss: -15500)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (330):
Long Trades: 70 (58 won), Short Trades: 51 (41 won), won %: 0.8181818181818182, profit: 38500 (gross gain: 49500, gross loss: -11000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (331):
Long Trades: 67 (55 won), Short Trades: 51 (39 won), won %: 0.7966101694915254, profit: 35000 (gross gain: 47000, gross loss: -12000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (332):
Long Trades: 68 (54 won), Short Trades: 53 (39 

Long Trades: 69 (54 won), Short Trades: 52 (38 won), won %: 0.7603305785123967, profit: 31500 (gross gain: 46000, gross loss: -14500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (352):
Long Trades: 70 (52 won), Short Trades: 48 (35 won), won %: 0.7372881355932204, profit: 28000 (gross gain: 43500, gross loss: -15500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (353):
Long Trades: 66 (52 won), Short Trades: 53 (38 won), won %: 0.7563025210084033, profit: 30500 (gross gain: 45000, gross loss: -14500)
Evaluating: 
Long Trades: 26 (13 won), Short Trades: 17 (7 won), won %: 0.46511627906976744, profit: -1000 (gross gain: 10000, gross loss: -11000)
Current epsilon: 0.01

Training (354):
Long Trades: 74 (54 won), Short Trades: 47 (

Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (373):
Long Trades: 68 (55 won), Short Trades: 50 (38 won), won %: 0.788135593220339, profit: 34000 (gross gain: 46500, gross loss: -12500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (374):
Long Trades: 75 (56 won), Short Trades: 46 (34 won), won %: 0.743801652892562, profit: 29500 (gross gain: 45000, gross loss: -15500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (375):
Long Trades: 77 (56 won), Short Trades: 42 (31 won), won %: 0.7310924369747899, profit: 27500 (gross gain: 43500, gross loss: -16000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 17 (6

Long Trades: 72 (57 won), Short Trades: 48 (37 won), won %: 0.7833333333333333, profit: 34000 (gross gain: 47000, gross loss: -13000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (417):
Long Trades: 73 (54 won), Short Trades: 47 (34 won), won %: 0.7333333333333333, profit: 28000 (gross gain: 44000, gross loss: -16000)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (418):
Long Trades: 73 (55 won), Short Trades: 48 (35 won), won %: 0.743801652892562, profit: 29500 (gross gain: 45000, gross loss: -15500)
Evaluating: 
Long Trades: 25 (12 won), Short Trades: 18 (7 won), won %: 0.4418604651162791, profit: -2000 (gross gain: 9500, gross loss: -11500)
Current epsilon: 0.01

Training (419):
Long Trades: 69 (54 won), Short Trades: 52 (38 

Long Trades: 66 (52 won), Short Trades: 52 (37 won), won %: 0.7542372881355932, profit: 30000 (gross gain: 44500, gross loss: -14500)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (439):
Long Trades: 71 (54 won), Short Trades: 47 (34 won), won %: 0.7457627118644068, profit: 29000 (gross gain: 44000, gross loss: -15000)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (440):
Long Trades: 64 (51 won), Short Trades: 56 (39 won), won %: 0.75, profit: 30000 (gross gain: 45000, gross loss: -15000)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (441):
Long Trades: 63 (52 won), Short Trades: 57 (41 won), won 

Long Trades: 69 (56 won), Short Trades: 52 (40 won), won %: 0.7933884297520661, profit: 35500 (gross gain: 48000, gross loss: -12500)
Evaluating: 
Long Trades: 24 (12 won), Short Trades: 18 (7 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (461):
Long Trades: 74 (56 won), Short Trades: 47 (35 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (462):
Long Trades: 70 (54 won), Short Trades: 49 (36 won), won %: 0.7563025210084033, profit: 30500 (gross gain: 45000, gross loss: -14500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (463):
Long Trades: 75 (57 won), Short Trades: 46 (

Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (482):
Long Trades: 73 (56 won), Short Trades: 47 (35 won), won %: 0.7583333333333333, profit: 31000 (gross gain: 45500, gross loss: -14500)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (483):
Long Trades: 71 (54 won), Short Trades: 47 (34 won), won %: 0.7457627118644068, profit: 29000 (gross gain: 44000, gross loss: -15000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (484):
Long Trades: 71 (55 won), Short Trades: 47 (35 won), won %: 0.7627118644067796, profit: 31000 (gross gain: 45000, gross loss: -14000)
Evaluating: 
Long Trades: 26 (13 won), Short Trades: 16 

Long Trades: 71 (56 won), Short Trades: 47 (36 won), won %: 0.7796610169491526, profit: 33000 (gross gain: 46000, gross loss: -13000)
Evaluating: 
Long Trades: 26 (13 won), Short Trades: 16 (6 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (504):
Long Trades: 72 (55 won), Short Trades: 49 (36 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (505):
Long Trades: 67 (55 won), Short Trades: 54 (41 won), won %: 0.7933884297520661, profit: 35500 (gross gain: 48000, gross loss: -12500)
Evaluating: 
Long Trades: 24 (11 won), Short Trades: 18 (6 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (506):
Long Trades: 65 (54 won), Short Trades: 55 (

Long Trades: 72 (55 won), Short Trades: 48 (35 won), won %: 0.75, profit: 30000 (gross gain: 45000, gross loss: -15000)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (526):
Long Trades: 68 (56 won), Short Trades: 52 (40 won), won %: 0.8, profit: 36000 (gross gain: 48000, gross loss: -12000)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (527):
Long Trades: 64 (52 won), Short Trades: 57 (41 won), won %: 0.768595041322314, profit: 32500 (gross gain: 46500, gross loss: -14000)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (528):
Long Trades: 65 (54 won), Short Trades: 55 (41 won), won %: 0.79166666666

Long Trades: 70 (54 won), Short Trades: 51 (37 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 22 (11 won), Short Trades: 20 (8 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (548):
Long Trades: 64 (51 won), Short Trades: 57 (40 won), won %: 0.7520661157024794, profit: 30500 (gross gain: 45500, gross loss: -15000)
Evaluating: 
Long Trades: 21 (10 won), Short Trades: 21 (8 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (549):
Long Trades: 64 (52 won), Short Trades: 56 (40 won), won %: 0.7666666666666667, profit: 32000 (gross gain: 46000, gross loss: -14000)
Evaluating: 
Long Trades: 22 (10 won), Short Trades: 20 (7 won), won %: 0.40476190476190477, profit: -3500 (gross gain: 8500, gross loss: -12000)
Current epsilon: 0.01

Training (550):
Long Trades: 67 (54 won), Short Trades: 54 (

Long Trades: 69 (56 won), Short Trades: 51 (40 won), won %: 0.8, profit: 36000 (gross gain: 48000, gross loss: -12000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (570):
Long Trades: 67 (54 won), Short Trades: 51 (38 won), won %: 0.7796610169491526, profit: 33000 (gross gain: 46000, gross loss: -13000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (571):
Long Trades: 72 (57 won), Short Trades: 48 (37 won), won %: 0.7833333333333333, profit: 34000 (gross gain: 47000, gross loss: -13000)
Evaluating: 
Long Trades: 25 (13 won), Short Trades: 17 (7 won), won %: 0.47619047619047616, profit: -500 (gross gain: 10000, gross loss: -10500)
Current epsilon: 0.01

Training (572):
Long Trades: 67 (53 won), Short Trades: 51 (37 won), won %

Long Trades: 69 (53 won), Short Trades: 52 (37 won), won %: 0.743801652892562, profit: 29500 (gross gain: 45000, gross loss: -15500)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (592):
Long Trades: 63 (50 won), Short Trades: 58 (40 won), won %: 0.743801652892562, profit: 29500 (gross gain: 45000, gross loss: -15500)
Evaluating: 
Long Trades: 23 (11 won), Short Trades: 19 (7 won), won %: 0.42857142857142855, profit: -2500 (gross gain: 9000, gross loss: -11500)
Current epsilon: 0.01

Training (593):
Long Trades: 64 (51 won), Short Trades: 56 (40 won), won %: 0.7583333333333333, profit: 31000 (gross gain: 45500, gross loss: -14500)
Evaluating: 
Long Trades: 22 (11 won), Short Trades: 20 (8 won), won %: 0.4523809523809524, profit: -1500 (gross gain: 9500, gross loss: -11000)
Current epsilon: 0.01

Training (594):
Long Trades: 65 (53 won), Short Trades: 56 (41

In [48]:
agent.model.save('forex.model')

Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: forex.model\assets


In [19]:
model2 = tf.keras.models.load_model('C:/MQLProjects/MQL5/Experts/Forex/forex.model')
agent.model = model2
agent.env.step(1)


(array([0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  , 0.  , 1.07, 0.  , 0.  , 0.  , 0.  , 3.2 , 3.8 , 3.4 , 1.4 ]),
 9.803921568627675,
 False)

In [43]:
state = agent.env.reset()
agent.env.next_index = 0
state = agent.preprocess_state(state)
done = False
long, short, pas = 0, 0 ,0
actions = []

while not done:
	action = agent.choose_action(state, -1)
	next_state, reward, done = agent.env.step(action)
	if done: break
	state = agent.preprocess_state(next_state)
	if action == 1:
		long += 1
	elif action == 0:
		pas += 1
	else:
		short += 1   
	actions.append(action)
	if agent.env.next_index >= len(agent.env.dataset) -1: break

print(agent.env.report())
print("pas: ", pas)
print("short: ", short)
print("long: ", long)

Long Trades: 5904 (3681 won), Short Trades: 2754 (2061 won), won %: 0.6632016632016632, profit: 1413500 (gross gain: 2871000, gross loss: -1457500)
pas:  0
short:  2754
long:  5903


In [18]:
EPOCHS = 10
scores = agent.tra0in()

Training (1):
Long Trades: 78 (49 won), Short Trades: 43 (25 won), won %: 0.6115702479338843, profit: 13500 (gross gain: 37000, gross loss: -23500)
Evaluating: 
Long Trades: 39 (19 won), Short Trades: 4 (1 won), won %: 0.46511627906976744, profit: -1000 (gross gain: 10000, gross loss: -11000)
Current epsilon: 0.29097339082339757

Training (2):
Long Trades: 72 (45 won), Short Trades: 44 (24 won), won %: 0.5948275862068966, profit: 11000 (gross gain: 34500, gross loss: -23500)
Evaluating: 
Long Trades: 41 (21 won), Short Trades: 2 (1 won), won %: 0.5116279069767442, profit: 1000 (gross gain: 11000, gross loss: -10000)
Current epsilon: 0.28339145701096863

Training (3):
Long Trades: 85 (56 won), Short Trades: 36 (24 won), won %: 0.6611570247933884, profit: 19500 (gross gain: 40000, gross loss: -20500)
Evaluating: 
Long Trades: 41 (21 won), Short Trades: 2 (1 won), won %: 0.5116279069767442, profit: 1000 (gross gain: 11000, gross loss: -10000)
Current epsilon: 0.2757863472580599

Training 

KeyboardInterrupt: 