In [37]:
import pandas as pd
import MetaTrader5 as mt5

# โหลดข้อมูลจากไฟล์ CSV
file_path = './EURUSD_H4_201901020400_202412312000.csv'
data = pd.read_csv(file_path, delimiter="\t")

# แปลงเวลาจาก timestamp หากยังไม่ได้แปลง
# data['time'] = pd.to_datetime(data['<DATE>'], unit='s')

# ดูตัวอย่างข้อมูล
# print(data.head())


# เชื่อมต่อกับ MT5
if not mt5.initialize():
    print("Failed to initialize MT5")
    mt5.shutdown()

print(mt5.account_info().name)



Suriya Meerua


In [41]:
import numpy as np

class ForexTradingEnv:
    def __init__(self, data, initial_balance=10000):
        self.data = data
        self.initial_balance = initial_balance
        self.balance = initial_balance
        self.position = 0  # 1: Long, -1: Short, 0: Neutral
        self.current_step = 0

    def reset(self):
        self.balance = self.initial_balance
        self.position = 0
        self.current_step = 0
        return self._get_observation()

    def step(self, action):
        current_price = self.data.iloc[self.current_step]['<CLOSE>']
        next_price = self.data.iloc[self.current_step + 1]['<CLOSE>']
        reward = 0

        if action == 1:  # Buy
            self.position = 1
            reward = next_price - current_price
        elif action == 2:  # Sell
            self.position = -1
            reward = current_price - next_price
        else:  # Hold
            reward = 0

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1

        return self._get_observation(), reward, done, {}

    def _get_observation(self):
        return np.array([self.data.iloc[self.current_step]['<CLOSE>']])

    def render(self):
        print(f'Step: {self.current_step}, Balance: {self.balance}, Position: {self.position}')


In [49]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten

class DQNAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = []
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_decay = 0.995
        self.epsilon_min = 0.01
        self.learning_rate = 0.001
        self.model = self._build_model()

    def _build_model(self):
        model = Sequential([
            Dense(24, input_dim=self.state_size, activation='relu'),
            Dense(24, activation='relu'),
            Dense(self.action_size, activation='linear')
        ])
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=self.learning_rate),
                      loss='mse')
        return model

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return np.random.choice(self.action_size)
        act_values = self.model.predict(state, verbose=0)
        return np.argmax(act_values[0])

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def replay(self, batch_size):
        # if len(self.memory) < batch_size:
        #     return
        print(self.memory)
        # minibatch = np.random.choice(self.memory, batch_size)
        # for state, action, reward, next_state, done in minibatch:
        #     target = reward
        #     if not done:
        #         target += self.gamma * np.amax(self.model.predict(next_state, verbose=0)[0])
        #     target_f = self.model.predict(state, verbose=0)
        #     target_f[0][action] = target
        #     self.model.fit(state, target_f, epochs=1, verbose=0)

        # if self.epsilon > self.epsilon_min:
        #     self.epsilon *= self.epsilon_decay


In [50]:
env = ForexTradingEnv(data)
agent = DQNAgent(state_size=1, action_size=3)  # 3 Actions: Buy, Sell, Hold

episodes = 50
batch_size = 32

for episode in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, 1])
    total_reward = 0

    while True:
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, 1])
        agent.remember(state, action, reward, next_state, done)
        state = next_state
        total_reward += reward

        if done:
            print(f"Episode {episode + 1}/{episodes}, Total Reward: {total_reward}")
            break

    agent.replay(batch_size)


Episode 1/50, Total Reward: 0.17294999999999983
[(array([[1.14787]]), 0, 0, array([[1.14336]]), False), (array([[1.14336]]), 0, 0, array([[1.13713]]), False), (array([[1.13713]]), 1, -0.003649999999999931, array([[1.13348]]), False), (array([[1.13348]]), 1, 0.0008900000000000574, array([[1.13437]]), False), (array([[1.13437]]), 0, 0, array([[1.1348]]), False), (array([[1.1348]]), 1, 0.0012499999999999734, array([[1.13605]]), False), (array([[1.13605]]), 0, 0, array([[1.13754]]), False), (array([[1.13754]]), 0, 0, array([[1.1365]]), False), (array([[1.1365]]), 0, 0, array([[1.13984]]), False), (array([[1.13984]]), 2, 0.0005500000000000504, array([[1.13929]]), False), (array([[1.13929]]), 0, 0, array([[1.14036]]), False), (array([[1.14036]]), 2, 0.0013900000000000023, array([[1.13897]]), False), (array([[1.13897]]), 1, 0.0012999999999998568, array([[1.14027]]), False), (array([[1.14027]]), 0, 0, array([[1.1371]]), False), (array([[1.1371]]), 2, -0.0034000000000000696, array([[1.1405]]), 

KeyboardInterrupt: 