In [1]:
import os

# Replace with your local folder path
path = "archive (4)"

for dirname, _, filenames in os.walk(path):
    for filename in filenames:
        print(os.path.join(dirname, filename))


archive (4)\BNBUSDT.csv
archive (4)\BNBUSDT_norm.csv
archive (4)\BTCUSDT.csv
archive (4)\BTCUSDT_norm.csv
archive (4)\ETHUSDT.csv
archive (4)\ETHUSDT_norm.csv
archive (4)\XRPUSDT.csv
archive (4)\XRPUSDT_norm.csv


Environment

In [None]:
import numpy as np
import random
from collections import deque

class SimpleTradingEnv:
    """Simplified trading environment"""

    def __init__(self, df, initial_balance=1000, lookback_window_size=50):
        self.df = df                          # price dataset (e.g., shape: [time, assets])
        self.initial_balance = initial_balance
        self.lookback_window_size = lookback_window_size
        self.reset()

    def reset(self):
        """Reset environment to initial state"""
        self.balance = self.initial_balance   # start with all cash
        self.net_worth = self.initial_balance
        self.prev_net_worth = self.initial_balance
        self.current_step = self.lookback_window_size
        self.quants = np.zeros(self.df.shape[1])  # no assets at start

        # store past steps of prices (observation window)
        self.history = deque(maxlen=self.lookback_window_size)
        for i in range(self.lookback_window_size):
            self.history.append(self.df[self.current_step - i - 1])

        return np.array(self.history)

    def step(self, action, transaction_cost=0.001):
        """
        Take one step in the environment.
        action = portfolio weights (e.g., [0.5, 0.5] for 2 assets)
        """
        prices_old = self.df[self.current_step - 1]
        self.current_step += 1
        prices_new = self.df[self.current_step]

        # asset returns
        returns = prices_new / prices_old - 1.0

        # portfolio return (dot product of weights and returns)
        portfolio_return = np.dot(action, returns)

        # update net worth
        self.prev_net_worth = self.net_worth
        self.net_worth *= (1 + portfolio_return)

        # transaction cost penalty
        cost = transaction_cost * self.net_worth * np.sum(np.abs(action))
        self.net_worth -= cost

        # reward: log change in net worth
        reward = np.log(self.net_worth / self.prev_net_worth)

        # done if bankrupt
        done = self.net_worth <= self.initial_balance / 2

        # update history for next state
        self.history.append(prices_new)
        obs = np.array(self.history)

        return obs, reward, done, self.net_worth


    def render(self):
        print(f"Step: {self.current_step}, Net Worth: {self.net_worth:.2f}")


Agent

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers

class SimpleAgent:
    """Simplified Actor-Critic trading agent"""

    def __init__(self, state_size, action_size, lr=0.001):
        self.state_size = state_size    # shape of input (lookback_window, features)
        self.action_size = action_size  # number of assets
        self.lr = lr

        # Build Actor (policy network → outputs portfolio weights)
        self.actor = models.Sequential([
            layers.Input(shape=self.state_size),
            layers.Flatten(),
            layers.Dense(64, activation="relu"),
            layers.Dense(self.action_size, activation="softmax")  # portfolio weights
        ])
        self.actor.compile(optimizer=optimizers.Adam(learning_rate=self.lr),
                           loss="categorical_crossentropy")

        # Build Critic (value network → predicts expected return)
        self.critic = models.Sequential([
            layers.Input(shape=self.state_size),
            layers.Flatten(),
            layers.Dense(64, activation="relu"),
            layers.Dense(1, activation="linear")  # value estimate
        ])
        self.critic.compile(optimizer=optimizers.Adam(learning_rate=self.lr),
                            loss="mse")

    def act(self, state):
        """Choose an action (portfolio weights) given current state"""
        state = np.expand_dims(state, axis=0)  # add batch dimension
        action_probs = self.actor.predict(state, verbose=0)[0]
        return action_probs

    def train(self, state, action_probs, reward, next_state, done, gamma=0.99):
        """One-step Actor-Critic update"""
        state = np.expand_dims(state, axis=0)
        next_state = np.expand_dims(next_state, axis=0)

        # Critic estimates
        value = self.critic.predict(state, verbose=0)[0]
        next_value = self.critic.predict(next_state, verbose=0)[0]

        # Compute target for critic
        target = reward + (0 if done else gamma * next_value)
        advantage = target - value

        # Train critic
        self.critic.train_on_batch(state, np.array([target]))

        # Train actor (policy gradient with advantage)
        self.actor.train_on_batch(state, np.expand_dims(action_probs, axis=0),
                                  sample_weight=np.array([advantage]))



In [4]:
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, backend as K

class SimpleActor:
    """Actor network: chooses actions (portfolio weights)"""
    def __init__(self, input_shape, action_space, lr=0.001):
        self.action_space = action_space

        inputs = layers.Input(shape=input_shape)
        x = layers.Flatten()(inputs)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dense(32, activation="relu")(x)
        outputs = layers.Dense(action_space, activation="softmax")(x)

        self.model = models.Model(inputs, outputs)
        self.model.compile(optimizer=optimizers.Adam(lr), loss="categorical_crossentropy")

    def predict(self, state):
        return self.model.predict(np.expand_dims(state, axis=0), verbose=0)[0]

    def train(self, state, action, advantage):
        state = np.expand_dims(state, axis=0)
        action = np.expand_dims(action, axis=0)
        self.model.train_on_batch(state, action, sample_weight=np.array([advantage]))


class SimpleCritic:
    """Critic network: evaluates state value"""
    def __init__(self, input_shape, lr=0.001):
        inputs = layers.Input(shape=input_shape)
        x = layers.Flatten()(inputs)
        x = layers.Dense(64, activation="relu")(x)
        x = layers.Dense(32, activation="relu")(x)
        outputs = layers.Dense(1, activation="linear")(x)

        self.model = models.Model(inputs, outputs)
        self.model.compile(optimizer=optimizers.Adam(lr), loss="mse")

    def predict(self, state):
        return self.model.predict(np.expand_dims(state, axis=0), verbose=0)[0]

    def train(self, state, target):
        state = np.expand_dims(state, axis=0)
        self.model.train_on_batch(state, np.array([target]))
