In [1]:
import pandas as pd
import numpy as np
import random
from datetime import datetime, timedelta

# Parameters
num_entries = 1000
currency_pairs = ['EUR/USD', 'USD/JPY', 'GBP/USD', 'USD/CHF', 'AUD/USD', 'USD/CAD']

# Generate timestamps
start_time = datetime.now()
timestamps = [start_time - timedelta(minutes=5*i) for i in range(num_entries)]

# Generate synthetic data
data = []
for i in range(num_entries):
    timestamp = timestamps[i]
    pair = random.choice(currency_pairs)
    bid_price = round(random.uniform(1.0, 1.5), 4)
    ask_price = round(bid_price + random.uniform(0.0001, 0.0005), 4)
    data.append([timestamp, pair, bid_price, ask_price])

# Create DataFrame
columns = ['Timestamp', 'Currency Pair', 'Bid Price', 'Ask Price']
df = pd.DataFrame(data, columns=columns)

# Save to CSV
df.to_csv('currency_arbitrage_data.csv', index=False)


In [2]:
import pandas as pd

# Load the dataset
df = pd.read_csv('currency_arbitrage_data.csv')

# Convert the Timestamp column to datetime
df['Timestamp'] = pd.to_datetime(df['Timestamp'])

# Display the first few rows of the dataset
print(df.head())


                   Timestamp Currency Pair  Bid Price  Ask Price
0 2024-07-19 11:38:48.716950       USD/CHF     1.2220     1.2224
1 2024-07-19 11:33:48.716950       USD/JPY     1.4628     1.4631
2 2024-07-19 11:28:48.716950       USD/JPY     1.3268     1.3271
3 2024-07-19 11:23:48.716950       USD/CHF     1.1257     1.1262
4 2024-07-19 11:18:48.716950       USD/CAD     1.3318     1.3319


In [3]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Encode the currency pairs
le = LabelEncoder()
df['Currency Pair'] = le.fit_transform(df['Currency Pair'])

# Normalize the bid and ask prices
scaler = StandardScaler()
df[['Bid Price', 'Ask Price']] = scaler.fit_transform(df[['Bid Price', 'Ask Price']])

print(df.head())


                   Timestamp  Currency Pair  Bid Price  Ask Price
0 2024-07-19 11:38:48.716950              4  -0.173319  -0.172652
1 2024-07-19 11:33:48.716950              5   1.508608   1.508600
2 2024-07-19 11:28:48.716950              5   0.558683   0.558661
3 2024-07-19 11:23:48.716950              4  -0.845950  -0.844594
4 2024-07-19 11:18:48.716950              3   0.593606   0.592188


In [4]:
import numpy as np
import gym
from gym import spaces

class CurrencyArbitrageEnv(gym.Env):
    def __init__(self, df):
        super(CurrencyArbitrageEnv, self).__init__()
        self.df = df
        self.current_step = 0

        # Actions: 0 = hold, 1 = buy, 2 = sell
        self.action_space = spaces.Discrete(3)

        # Observation space: bid price, ask price, and currency pair
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(3,), dtype=np.float32)

        # Initial account balance
        self.balance = 10000

    def reset(self):
        self.current_step = 0
        self.balance = 10000
        return self._next_observation()

    def _next_observation(self):
        obs = self.df.iloc[self.current_step][['Bid Price', 'Ask Price', 'Currency Pair']].values
        return obs

    def step(self, action):
        obs = self._next_observation()
        self.current_step += 1

        if action == 1:  # Buy
            self.balance -= obs[1]  # Subtract ask price
        elif action == 2:  # Sell
            self.balance += obs[0]  # Add bid price

        reward = self.balance

        done = self.current_step >= len(self.df) - 1

        return obs, reward, done, {}

    def render(self, mode='human'):
        print(f'Step: {self.current_step}, Balance: {self.balance}')

# Create the environment
env = CurrencyArbitrageEnv(df)


In [6]:
import random
from collections import deque

class QLearningAgent:
    def __init__(self, state_size, action_size):
        self.state_size = state_size
        self.action_size = action_size
        self.memory = deque(maxlen=2000)
        self.gamma = 0.95  # discount rate
        self.epsilon = 1.0  # exploration rate
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.learning_rate = 0.001
        self.q_table = {}

    def get_qs(self, state):
        return self.q_table.get(tuple(state.flatten()), np.zeros(self.action_size))

    def remember(self, state, action, reward, next_state, done):
        self.memory.append((state, action, reward, next_state, done))

    def act(self, state):
        if np.random.rand() <= self.epsilon:
            return random.randrange(self.action_size)
        qs = self.get_qs(state)
        return np.argmax(qs)

    def replay(self, batch_size):
        minibatch = random.sample(self.memory, batch_size)
        for state, action, reward, next_state, done in minibatch:
            target = reward
            if not done:
                target = reward + self.gamma * np.amax(self.get_qs(next_state))
            qs = self.get_qs(state)
            qs[action] = target
            self.q_table[tuple(state.flatten())] = qs
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

# Initialize the agent
state_size = env.observation_space.shape[0]
action_size = env.action_space.n
agent = QLearningAgent(state_size, action_size)

# Train the agent
episodes = 300
batch_size = 32

for e in range(episodes):
    state = env.reset()
    state = np.reshape(state, [1, state_size])

    for time in range(50):
        action = agent.act(state)
        next_state, reward, done, _ = env.step(action)
        next_state = np.reshape(next_state, [1, state_size])

        agent.remember(state, action, reward, next_state, done)
        state = next_state

        if done:
            print(f"Episode: {e+1}/{episodes}, Score: {reward}, Epsilon: {agent.epsilon:.2}")
            break

        if len(agent.memory) > batch_size:
            agent.replay(batch_size)


In [7]:
# Evaluate the agent
state = env.reset()
state = np.reshape(state, [1, state_size])
total_reward = 0

for time in range(500):
    action = agent.act(state)
    next_state, reward, done, _ = env.step(action)
    next_state = np.reshape(next_state, [1, state_size])
    total_reward += reward
    state = next_state

    if done:
        print(f"Final Balance: {total_reward}")
        break
