In [4]:
from utils import *
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Sequential
from collections import deque
import random
from sklearn.neighbors import NearestNeighbors

In [1]:
!pip install -U tensorflow
!pip install tensorflow-macos tensorflow-metal



# Configure Modeling Parameters and Fetch Data

Enter a ticker and date range you would like to build the model on.  This model takes a a single ticker's data.  Also enter a training size for the proportion of the data you want to include in your training set vs. your test set.

In [6]:
# stock configs
ticker = ['GOOG']
start_date = '2022-04-01'
end_date = '2024-04-05'

# model configs
train_size = 0.8

In [7]:
# Data Fetching
data = fetch_stock_data(ticker, start_date, end_date)[ticker[0]]
data.reset_index(drop=False, inplace=True)
data['Date'] = pd.to_datetime(data['Date']).dt.tz_localize(None)

print(data.shape)
included_days = len(data)
data.head()

(504, 8)


Unnamed: 0,Date,Open,High,Low,Close,Volume,Dividends,Stock Splits
0,2022-04-01,140.009995,140.949997,138.796997,140.699997,23480000,0.0,0.0
1,2022-04-04,140.824493,144.043747,140.824493,143.642502,19076000,0.0,0.0
2,2022-04-05,143.399506,143.589996,140.943497,141.063004,19256000,0.0,0.0
3,2022-04-06,139.161499,139.848495,136.418106,137.175995,23574000,0.0,0.0
4,2022-04-07,136.617996,137.701508,134.857254,136.464996,19448000,0.0,0.0


Here's the markdown explanation for the Q-learning agent, provided as plain text:

# Q-Learning Agent for Stock Trading

This Q-learning agent is designed to make stock trading decisions based on historical price data. The agent learns an optimal trading strategy through interaction with the market environment using the Q-learning algorithm.

## Agent Architecture

The agent consists of a deep neural network that takes in a state representation and outputs Q-values for each possible action. The state is represented by a sliding window of price differences over a specified window size. The neural network architecture includes an input layer, a hidden layer with ReLU activation, and an output layer with linear activation.

## Q-Learning Process

The Q-learning process follows these steps:

1. **Initialization**: The agent's neural network is initialized with random weights, and the replay memory is emptied.

2. **State Representation**: At each time step, the agent observes the current state of the market, which is represented by a sliding window of price differences.

3. **Action Selection**: The agent selects an action (buy, sell, or hold) based on an epsilon-greedy policy. With probability epsilon, the agent explores by selecting a random action, and with probability 1-epsilon, the agent exploits by selecting the action with the highest Q-value.

4. **State Transition**: The agent executes the selected action and observes the next state and the reward received from the market.

5. **Replay Memory**: The agent stores the transition (state, action, reward, next state, done) in the replay memory.

6. **Q-Value Update**: The agent samples a batch of transitions from the replay memory and updates the Q-values using the Q-learning update rule. The target Q-value for each transition is calculated based on the reward and the maximum Q-value of the next state.

7. **Neural Network Update**: The agent's neural network is updated using the sampled batch of transitions and the Q-learning loss function. The optimizer adjusts the network's weights to minimize the loss.

8. **Iteration**: Steps 3-7 are repeated for a specified number of episodes or until convergence.

## Trading Simulation

The trading simulation is performed using the trained Q-learning agent. The agent makes buy and sell decisions based on the current state of the market. The state is represented by a sliding window of price differences.

The agent's decisions are as follows:

- **Buy**: If the agent selects the buy action and there is sufficient funds, a unit of stock is purchased, and the inventory and balance are updated accordingly.

- **Sell**: If the agent selects the sell action and there is stock in the inventory, a unit of stock is sold, and the balance is updated based on the selling price.

- **Hold**: If the agent selects the hold action, no action is taken.

The simulation keeps track of the buying and selling states, total gains, investment percentage, and the number of shares held at the end.

## Usage

To use the Q-learning agent:

1. Prepare the historical price data as a list of closing prices.

2. Set the initial parameters, such as the initial money, window size, and skip size.

3. Create an instance of the `Agent` class with the desired state size, window size, trend, skip size, batch size, and device.

4. Call the `train` method to train the agent for a specified number of iterations.

5. Evaluate the agent's performance using the `buy` method, which returns the buy and sell states, total gains, investment percentage, and the number of shares held.

The Q-learning agent provides a reinforcement learning approach to optimize a trading strategy based on historical price data. By interacting with the market environment and learning from its experiences, the agent aims to make profitable trading decisions.

In [8]:
close = data.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1

novelty_search_threshold = 6
novelty_log_maxlen = 1000
backlog_maxsize = 500
novelty_log_add_amount = 3

In [9]:
class Agent:
    def __init__(self, state_size, window_size, trend, skip, batch_size):
        self.state_size = state_size
        self.window_size = window_size
        self.half_window = window_size // 2
        self.trend = trend
        self.skip = skip
        self.action_size = 3
        self.batch_size = batch_size
        self.memory = deque(maxlen=1000)
        self.inventory = []

        self.gamma = 0.95
        self.epsilon = 0.5
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.999

        self.model = tf.keras.models.Sequential([
            tf.keras.layers.Dense(256, activation='relu', input_shape=(self.state_size,)),
            tf.keras.layers.Dense(self.action_size)
        ])

        self.model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
                           loss='mse')

    def act(self, state):
        if random.random() <= self.epsilon:
            return random.randrange(self.action_size)
        return np.argmax(self.model.predict(state)[0])

    def get_state(self, t):
        window_size = self.window_size + 1
        d = t - window_size + 1
        block = self.trend[d : t + 1] if d >= 0 else -d * [self.trend[0]] + self.trend[0 : t + 1]
        res = []
        for i in range(window_size - 1):
            res.append(block[i + 1] - block[i])
        return np.array([res])

    def replay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(l - batch_size, l):
            mini_batch.append(self.memory[i])

        states = np.array([a[0][0] for a in mini_batch])
        new_states = np.array([a[3][0] for a in mini_batch])
        Q = self.model.predict(states)
        Q_new = self.model.predict(new_states)

        X = np.empty((batch_size, self.state_size))
        y = np.empty((batch_size, self.action_size))

        for i in range(len(mini_batch)):
            state, action, reward, next_state, done = mini_batch[i]
            target = Q[i]
            target[action] = reward
            if not done:
                target[action] += self.gamma * np.amax(Q_new[i])
            X[i] = state
            y[i] = target

        cost = self.model.train_on_batch(X, y)

        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

        return cost

    def buy(self, initial_money):
        starting_money = initial_money
        states_sell = []
        states_buy = []
        inventory = []
        state = self.get_state(0)

        for t in range(0, len(self.trend) - 1, self.skip):
            action = self.act(state)
            next_state = self.get_state(t + 1)

            if action == 1 and initial_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
                inventory.append(self.trend[t])
                initial_money -= self.trend[t]
                states_buy.append(t)
                print('day %d: buy 1 unit at price %f, total balance %f' % (t, self.trend[t], initial_money))

            elif action == 2 and len(inventory):
                bought_price = inventory.pop(0)
                initial_money += self.trend[t]
                states_sell.append(t)
                try:
                    invest = ((self.trend[t] - bought_price) / bought_price) * 100
                except:
                    invest = 0
                print('day %d, sell 1 unit at price %f, investment %f %%, total balance %f' % (t, self.trend[t], invest, initial_money))

            state = next_state

        invest = ((initial_money - starting_money) / starting_money) * 100
        total_gains = initial_money - starting_money
        return states_buy, states_sell, total_gains, invest

    def train(self, iterations, checkpoint, initial_money):
        for i in range(iterations):
            total_profit = 0
            inventory = []
            state = self.get_state(0)
            starting_money = initial_money

            for t in range(0, len(self.trend) - 1, self.skip):
                action = self.act(state)
                next_state = self.get_state(t + 1)

                if action == 1 and starting_money >= self.trend[t] and t < (len(self.trend) - self.half_window):
                    inventory.append(self.trend[t])
                    starting_money -= self.trend[t]

                elif action == 2 and len(inventory) > 0:
                    bought_price = inventory.pop(0)
                    total_profit += self.trend[t] - bought_price
                    starting_money += self.trend[t]

                invest = ((starting_money - initial_money) / initial_money)
                self.memory.append((state, action, invest, next_state, starting_money < initial_money))
                state = next_state

                batch_size = min(self.batch_size, len(self.memory))
                cost = self.replay(batch_size)

            if (i + 1) % checkpoint == 0:
                print('epoch: %d, total rewards: %f.3, cost: %f, total money: %f' % (i + 1, total_profit, cost, starting_money))

In [10]:
close = data.Close.values.tolist()
initial_money = 10000
window_size = 30
skip = 1
batch_size = 32
agent = Agent(state_size = window_size, 
              window_size = window_size, 
              trend = close, 
              skip = skip, 
              batch_size = batch_size)
agent.train(iterations = 50, checkpoint = 10, initial_money = initial_money)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-05-07 13:03:59.328566: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Pro
2024-05-07 13:03:59.328621: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-05-07 13:03:59.328638: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-05-07 13:03:59.329039: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-05-07 13:03:59.329084: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:

states_buy, states_sell, total_gains, invest = neural_evolve.buy(fittest_net)

In [None]:
import plotly.graph_objects as go

def visualize_stock_trading(close, states_buy, states_sell, total_gains, invest):
    fig = go.Figure()

    # Plot the closing price
    fig.add_trace(go.Scatter(x=list(range(len(close))), y=close, mode='lines', name='Closing Price', line=dict(color='blue', width=2)))

    # Plot the buying signals
    buy_indices = [i for i in range(len(close)) if i in states_buy]
    buy_prices = [close[i] for i in buy_indices]
    fig.add_trace(go.Scatter(x=buy_indices, y=buy_prices, mode='markers', name='Buying Signal', marker=dict(symbol='triangle-up', size=10, color='green')))

    # Plot the selling signals
    sell_indices = [i for i in range(len(close)) if i in states_sell]
    sell_prices = [close[i] for i in sell_indices]
    fig.add_trace(go.Scatter(x=sell_indices, y=sell_prices, mode='markers', name='Selling Signal', marker=dict(symbol='triangle-down', size=10, color='red')))

    # Customize the layout
    fig.update_layout(
        title=f'Total Gains: {total_gains:.2f}, Total Investment: {invest:.2f}%',
        xaxis_title='Day',
        yaxis_title='Price',
        template='plotly_dark',
        hovermode='x',
        legend=dict(x=0, y=1, orientation='h')
    )

    fig.show()

In [None]:
visualize_stock_trading(data.Close, states_buy, states_sell, total_gains, invest)