In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from collections import deque
import random

In [2]:
# Load the dataset
file_path = "/kaggle/input/gold-new/Gold.csv"
df = pd.read_csv(file_path)

In [3]:
states = df[['Close/Last', 'Volume', 'Open', 'High', 'Low', 'day', 'month', 'year']].values

In [4]:
# Add a decision column
df['Decision'] = 'Hold'  # Default to Hold

In [5]:
# Define parameters
action_space = ['Buy', 'Sell', 'Hold']
state_size = states.shape[1]
action_size = len(action_space)
episodes = 200
gamma = 0.95  # Discount factor
epsilon = 1.0  # Exploration rate
epsilon_min = 0.01
epsilon_decay = 0.995
learning_rate = 0.001

In [6]:
# Build the Deep Q-Learning model
def build_model():
    model = Sequential()
    model.add(Dense(64, input_dim=state_size, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(action_size, activation='linear'))
    model.compile(loss='mse', optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate))
    return model

In [7]:
# Initialize DQL
model = build_model()
target_model = build_model()
target_model.set_weights(model.get_weights())

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [8]:
# Experience replay buffer
memory = deque(maxlen=2000)

In [9]:
# Function to act
def act(state):
    if np.random.rand() <= epsilon:
        return random.randrange(action_size)
    q_values = model.predict(state, verbose=0)
    return np.argmax(q_values[0])

In [10]:
# Function to replay and train
def replay(batch_size=32):
    global epsilon
    minibatch = random.sample(memory, batch_size)
    for state, action, reward, next_state, done in minibatch:
        target = reward
        if not done:
            target += gamma * np.amax(target_model.predict(next_state, verbose=0)[0])
        target_f = model.predict(state, verbose=0)
        target_f[0][action] = target
        model.fit(state, target_f, epochs=1, verbose=0)
    if epsilon > epsilon_min:
        epsilon *= epsilon_decay

In [11]:
# Train the model
for e in range(episodes):
    state = np.reshape(states[0], [1, state_size])
    for time in range(len(states)):
        action = act(state)
        next_state = np.reshape(states[time], [1, state_size])
        reward = 0
        done = time == len(states) - 1

        # Define rewards
        if action == 0:  # Buy
            reward = 1 if df['Close/Last'].iloc[time] < df['Close/Last'].iloc[time - 1] else -1
        elif action == 1:  # Sell
            reward = 1 if df['Close/Last'].iloc[time] > df['Close/Last'].iloc[time - 1] else -1

        memory.append((state, action, reward, next_state, done))
        state = next_state

        if done:
            print(f"Episode {e+1}/{episodes} finished")
            break
    if len(memory) > 32:
        replay()

Episode 1/200 finished
Episode 2/200 finished
Episode 3/200 finished
Episode 4/200 finished
Episode 5/200 finished
Episode 6/200 finished
Episode 7/200 finished
Episode 8/200 finished
Episode 9/200 finished
Episode 10/200 finished
Episode 11/200 finished
Episode 12/200 finished
Episode 13/200 finished
Episode 14/200 finished
Episode 15/200 finished
Episode 16/200 finished
Episode 17/200 finished
Episode 18/200 finished
Episode 19/200 finished
Episode 20/200 finished
Episode 21/200 finished
Episode 22/200 finished
Episode 23/200 finished
Episode 24/200 finished
Episode 25/200 finished
Episode 26/200 finished
Episode 27/200 finished
Episode 28/200 finished
Episode 29/200 finished
Episode 30/200 finished
Episode 31/200 finished
Episode 32/200 finished
Episode 33/200 finished
Episode 34/200 finished
Episode 35/200 finished
Episode 36/200 finished
Episode 37/200 finished
Episode 38/200 finished
Episode 39/200 finished
Episode 40/200 finished
Episode 41/200 finished
Episode 42/200 finished
E

In [12]:
# Generate decisions
df['Decision'] = [action_space[act(np.reshape(state, [1, state_size]))] for state in states]

In [13]:
# Save the updated dataset
output_file = "Gold_decisions.csv"
df.to_csv(output_file, index=False)

print(f"Updated dataset saved to {output_file}")

Updated dataset saved to Gold_decisions.csv


In [14]:
# Evaluation metrics
def evaluate_trades(df):
    initial_balance = 10000  # Starting balance
    balance = initial_balance
    gold_owned = 0
    profits = []

    for i in range(len(df) - 1):
        decision = df['Decision'].iloc[i]
        price = df['Close/Last'].iloc[i]

        # Execute trades
        if decision == 'Buy' and balance >= price:  # Buy gold
            gold_owned += 1
            balance -= price
        elif decision == 'Sell' and gold_owned > 0:  # Sell gold
            gold_owned -= 1
            balance += price
            profits.append(balance - initial_balance)

    # Final balance after selling remaining gold
    final_price = df['Close/Last'].iloc[-1]
    balance += gold_owned * final_price
    profits.append(balance - initial_balance)

    # Metrics
    total_profit = balance - initial_balance
    win_rate = sum(1 for p in profits if p > 0) / len(profits) if profits else 0
    mean_return = np.mean(profits)
    std_return = np.std(profits)
    sharpe_ratio = mean_return / std_return if std_return != 0 else 0

    print("Evaluation Metrics:")
    print(f"Total Profit: ${total_profit:.2f}")
    print(f"Win Rate: {win_rate * 100:.2f}%")
    print(f"Sharpe Ratio: {sharpe_ratio:.2f}")
    print(f"Final Balance: ${balance:.2f}")

    return {
        "Total Profit": total_profit,
        "Win Rate": win_rate,
        "Sharpe Ratio": sharpe_ratio,
        "Final Balance": balance
    }

# Evaluate the model
results = evaluate_trades(df)

Evaluation Metrics:
Total Profit: $47.70
Win Rate: 85.57%
Sharpe Ratio: -0.17
Final Balance: $10047.70
