In [1]:
# import base stats libraries
import numpy as np
import pandas as pd
import random 
import seaborn as sb
import matplotlib.pyplot as plt

# import yfinance for stock data
import yfinance as yf 

# import torch for model creation and sklearn to scale within reasonable range
import torch
from sklearn.preprocessing import MinMaxScaler

# import progress bar
from tqdm import tqdm

# import decoupling method numpy array split
from collections import deque

#import datetime
#import cv2

In [2]:
# cuda setup
if torch.cuda.is_available():
    device = 'cuda'
else:
    device = 'cpu'

### Trading Environment

In [3]:
class TradingEnv:
    def __init__(self, data, window_size = 30, initial_balance = 10000):
        self.data = data
        self.window_size = window_size
        self.initial_balance = initial_balance
        self.reset()
    
    def reset(self):
        self.current_step = self.window_size
        self.balance = self.initial_balance
        self.shares_held = 0
        self.net_worth = self.initial_balance
        self.done = False
        return self.getStates()
    
    def getStates(self):
        state = self.data[self.current_step - self.window_size:self.current_step].flatten()
        # normalize balance and shares_held before 
        norm_balance = self.balance / self.initial_balance
        norm_shares = self.shares_held
        return np.concatenate((state,[norm_balance, norm_shares]))
    
    def step(self, action):
        price = self.data[self.current_step][3] # close
        prev_net = self.net_worth
        reward = 0

        if action == 1: # Buy
            if self.balance >= price:
                self.balance -= price
                self.shares_held += 1 
        elif action == 2: # sell
            if self.shares_held > 0:
                self.balance += price 
                self.shares_held -= 1
                reward = 1
    
        self.current_step += 1
        if self.current_step >= len(self.data):
            self.done = True
        self.net_worth = self.balance + self.shares_held * price
        reward = self.net_worth - prev_net

        return self.getStates(), reward, self.done

### Network architecture

In [4]:
N_features = 5 # OHLCV
window_size = 30 # 30-day trading window
N_states = N_features * window_size 

class TradingNet(torch.nn.Module):
    def __init__(self):
        super(TradingNet, self).__init__()
        self.fc1 = torch.nn.Linear(N_states+2,8192) # +2 for balance and shares_held
        self.fc2 = torch.nn.Linear(8192,1024)
        self.fc3 = torch.nn.Linear(1024,3)
        self.activ = torch.nn.LeakyReLU()
    
    def forward(self, x):
        x =self.activ(self.fc1(x))
        x = self.activ(self.fc2(x))
        return self.fc3(x)

### Replay Class

In [5]:
class ReplayBuffer:
    def __init__(self, capacity):
        self.buffer = deque(maxlen=capacity)

    def push(self, state, action, reward, next_state, done):
        self.buffer.append((np.array(state), action, reward, np.array(next_state), done))

    def sample(self, batch_size):
        batch = random.sample(self.buffer, batch_size)
        state, action, reward, next_state, done = map(np.array, zip(*batch))
        return state, action, reward, next_state, done

    def __len__(self):
        return len(self.buffer)



### Parameters/Settings

In [6]:
'''Initial model settings will be here '''
# model parameter settings
episodes = 50
gamma = 0.9999
epsilon = 1.0
epsilon_min = 0.01
epsilon_decay = 0.999
batch_size = 32
buffer_capacity = 10000
model_path = "../data"


### Get data

In [7]:
'''def stock_data(ticker='NVDA', start="2020-01-01", end="2023-12-31"):
    #load stock data while also cleaning columns of interest
    data = yf.download(ticker, start=start, end=end)[["Open","High","Low","Close","Volume"]].fillna(method='ffill')
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(data)
    return scaled_data'''

'def stock_data(ticker=\'NVDA\', start="2020-01-01", end="2023-12-31"):\n    #load stock data while also cleaning columns of interest\n    data = yf.download(ticker, start=start, end=end)[["Open","High","Low","Close","Volume"]].fillna(method=\'ffill\')\n    scaler = MinMaxScaler()\n    scaled_data = scaler.fit_transform(data)\n    return scaled_data'

In [8]:
# Data loader
def split_data_by_year(ticker='NVDA', train_end_year=2022, test_start_year=2023):
    data = yf.download(ticker, start="2020-01-01", end="2023-12-31")[["Open", "High", "Low", "Close", "Volume"]]
    data = data.fillna(method='ffill')
    data['Year'] = data.index.year
    train_data = data[data['Year'] <= train_end_year].drop(columns='Year')
    test_data = data[data['Year'] >= test_start_year].drop(columns='Year')

    scaler = MinMaxScaler()
    scaled_train = scaler.fit_transform(train_data)
    scaled_test = scaler.transform(test_data)
    return scaled_train, scaled_test

### Training

In [9]:
def train(train_data):
    #data = stock_data()
    env = TradingEnv(train_data,30)#30 is for days
    action_size = 3 # buy,sell, hold
    model = TradingNet().to(device) # neural network

    #network setup
    target_model = TradingNet().to(device)
    target_model.load_state_dict(model.state_dict())

    net = TradingNet()
    optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
    criterion = torch.nn.MSELoss()
    replay_buffer = ReplayBuffer(buffer_capacity)

    global epsilon # access epsilon variable from settings
    rewards_history = []

    pbar = tqdm(range(episodes), desc="Training Episodes")

    # Training Loop
    for episode in pbar:
        state = env.reset()
        total_reward = 0
        done = False

        while not done:
            if np.random.rand() <= epsilon:
                action = random.randrange(action_size)
            else:
                with torch.no_grad():
                    # convert to tensor and move to device.unsqueeze to make batch dimensions work.
                    action = torch.argmax(model(torch.FloatTensor(state).unsqueeze(0).to(device))).item()
            
            next_state, reward, done = env.step(action)
            replay_buffer.push(state, action, reward, next_state, done)
            state = next_state
            total_reward += reward

            # Replay
            if len(replay_buffer) >= batch_size:
                states, actions, rewards, next_states, dones = replay_buffer.sample(batch_size)
                states = torch.FloatTensor(states).to(device)
                next_states = torch.FloatTensor(next_states).to(device)
                actions = torch.LongTensor(actions).to(device)
                rewards = torch.FloatTensor(rewards).to(device)
                dones = torch.FloatTensor(dones).to(device)

                # values for model states
                q_values = model(states)
                next_q_values = target_model(next_states)

                # Q-values for next states
                max_next_q_values = torch.max(next_q_values, dim=1)[0]

                q_targets = rewards + gamma * max_next_q_values * (1 - dones)

                q_values_actions = q_values.gather(1, actions.unsqueeze(1)).squeeze()


                loss = criterion(q_values_actions, q_targets)


                optimizer.zero_grad()
                loss.backward()
                optimizer.step()
        
            epsilon = max(epsilon_min, epsilon * epsilon_decay)
        target_model.load_state_dict(model.state_dict())
        rewards_history.append(total_reward)


        # progress bar
        pbar.set_postfix({
        'Reward': total_reward,
        'Epsilon': f"{epsilon:.3f}",
        'Loss': f"{loss.item():.4f}" if 'loss' in locals() else 'N/A'
        })
    
    # save model
    torch.save(model.state_dict(),model_path)
    print("Model saved to", model_path)

    # display progress
    plt.plot(rewards_history)
    plt.xlabel("Episode")
    plt.ylabel("Total Reward")
    plt.title("Reward per Episode")
    plt.grid()
    plt.show()


### Evaluation

In [10]:

# Evaluation
def evaluate(model, test_data, window_size=30):
    env = TradingEnv(test_data, window_size)
    state = env.reset()
    done = False
    total_reward = 0

    while not done:
        with torch.no_grad():
            action = torch.argmax(model(torch.FloatTensor(state).unsqueeze(0).to(device))).item()
        next_state, reward, done = env.step(action)
        state = next_state
        total_reward += reward

    print(f"Evaluation Total Reward: {total_reward}")
    return total_reward

In [11]:
#start program
if __name__ == "__main__":
    train_data, test_data = split_data_by_year()
    model = train(train_data)
    evaluate(model, test_data)


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
  data = data.fillna(method='ffill')
  train_data = data[data['Year'] <= train_end_year].drop(columns='Year')
  test_data = data[data['Year'] >= test_start_year].drop(columns='Year')
Training Episodes:   0%|          | 0/50 [00:25<?, ?it/s]


KeyboardInterrupt: 