In [36]:
import csv
import gym
from gym import spaces
import numpy as np

In [6]:
tickers = ["AAPL", "AMZN", "GOOGL", "MSFT", "NVDA", "TSLA"]

data = {i: {ticker: float(row[ticker]) for ticker in tickers} \
                    for i, row in enumerate(csv.DictReader( \
                    open("nasdaq_stock_prices.csv", mode='r'), delimiter=','))}

In [None]:
class Environment(gym.Env):
    def __init__(self, data: dict, window_size: int, initial_balance: float):
        self.data = data
        self.max_steps = len(data) - 1
        self.tickers = list(data[0].keys())
        self.window_size = window_size

        self.current_step = 0

        self.initial_balance = initial_balance
        self.history_balance = {0: self.initial_balance}
        self.current_balance = self.history_balance[self.current_step]

        self.initial_shares = {ticker: 0 for ticker in self.tickers}
        self.history_shares = {0: self.initial_shares}
        self.current_shares = self.history_shares[self.current_step]

        self.initial_profit = 0
        self.history_profit = {0: self.initial_profit}
        self.current_profit = self.history_profit[self.current_step]

        self.action_space = spaces.Box(low = -1, high = 1, shape = (len(tickers),), dtype = np.float16)
        self.observation_space = spaces.Box(low = 0, high = 1, shape = (len(tickers) * 2,), dtype = np.float16)

        self.done = False

    def reset(self):
        self.current_step = 0
        self.history_balance = {0: self.initial_balance}
        self.current_balance = self.initial_balance
        self.history_shares = {0: self.initial_shares}
        self.current_shares = self.initial_shares
        self.history_profit = {0: self.initial_profit}
        self.current_profit = self.initial_profit
        self.done = False
        return self._get_state()
    
    def render(self):
        return self.history_balance, self.history_shares, self.history_profit
    
    def _get_state(self):
        start = max(0, self.current_step - self.window_size)
        end = self.current_step + 1
        return self.data[start, end], self.history_balance[start, end], self.history_shares[start, end], self.history_profit[start, end]
    
    def step(self, action):
        pass

In [31]:
environment = Environment(data, 10, initial_balance=1000)

In [33]:
environment.tickers

['AAPL', 'AMZN', 'GOOGL', 'MSFT', 'NVDA', 'TSLA']

In [None]:
class Agent:
    def __init__(self, env):
        self.env = env

    def train(self):
        pass

    def test(self):
        pass