In [36]:
import csv
import gym
from gym import spaces
import numpy as np

In [38]:
tickers = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]

data = {i: {t: float(row[t]) for t in tickers} \
    for i, row in enumerate(csv.DictReader( \
    open("nasdaq_stock_prices.csv", mode='r'), delimiter=','))
}

In [None]:
class Environment(gym.Env):
    def __init__(self, data: dict, window_size: int, initial_balance: float):
        self.current_step = 0
        self.history_prices = data
        self.current_prices = self.history_prices[self.current_step]
        self.max_steps = len(data) - 1
        self.tickers = list(data[0].keys())
        self.window_size = window_size

        self.initial_balance = initial_balance
        self.history_balance = {0: self.initial_balance}
        self.current_balance = self.history_balance[self.current_step]

        self.initial_shares = {t: 0 for t in self.tickers}
        self.history_shares = {0: self.initial_shares}
        self.current_shares = self.history_shares[self.current_step]

        self.initial_value = 0
        self.history_value = {0: self.initial_value}
        self.current_value = self.history_value[self.current_step]

        self.action_space = spaces.Box(low = -1.0, high = 1.0, shape = (len(self.tickers),))
    
        self.observation_dimension = len(self.tickers) * (self.window_size + 1) + 2  #window of prices (n*w) + current_shares (n) + current_balance (1) + current_value (1)
        self.observation_space = spaces.Box(low = -np.inf, high = np.inf, shape = (self.observation_dimension,))

        self.done = False

    def reset(self):
        self.current_step = 0
        self.history_balance = {0: self.initial_balance}
        self.current_balance = self.initial_balance
        self.history_shares = {0: self.initial_shares}
        self.current_shares = self.initial_shares
        self.history_value = {0: self.initial_value}
        self.current_value = self.initial_value
        self.done = False
        return self._get_state()
    
    def render(self):
        return self.history_balance, self.history_shares, self.history_value
    
    def _get_state(self):
        start = max(0, self.current_step - self.window_size)
        end = self.current_step + 1
        return self.history_prices[start, end], self.history_balance[start, end], self.history_shares[start, end], self.history_value[start, end]
    
    def step(self, action: np.ndarray):
        if self.done:
            return self._get_state(), 0, self.done, {}

        if np.sum(action) > 1.0:
            raise ValueError(f"Invalid action: total buy fraction = {np.sum(action):.2f} > 1.0")
        
        if any([a < -1.0 for a in action]):
            raise ValueError(f"Invalid action: sell fraction < -1.0")

        for i, ticker in enumerate(self.tickers):
            act = action[i]
            if act < 0:
                shares_to_sell = self.current_shares[ticker] * (-act)
                proceeds = shares_to_sell * self.current_prices[ticker]
                self.current_balance += proceeds
                self.current_shares[ticker] -= shares_to_sell

            elif act > 0:
                amount_to_invest = self.current_balance * act
                shares_to_buy = amount_to_invest / self.current_prices[ticker]
                cost = shares_to_buy * self.current_prices[ticker]
                self.current_balance -= cost
                self.current_shares[ticker] += shares_to_buy

        self.current_value = self.current_balance + sum(self.current_shares[t] * self.current_prices[t] for t in self.tickers)

        self.current_step += 1
        self.done = self.current_step >= self.max_steps

        self.history_balance[self.current_step] = self.current_balance
        self.history_shares[self.current_step] = self.current_shares.copy()
        self.history_value[self.current_step] = self.current_value

        reward = self.current_value - self.initial_value

        return self._get_state(), reward, self.done, {}


In [40]:
environment = Environment(data, 10, initial_balance=1000)

In [42]:
environment.render()
environment.step(np.array([0.1, 0.1, 0.1, 0.1, 0.1, 0.1]))

KeyError: (0, 2)

In [None]:
class Agent: #To be defined
    def __init__(self, env):
        self.env = env

    def train(self):
        pass

    def test(self):
        pass