In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler

# Step 1: Load the data
df = pd.read_csv('/content/eth_usd_historical.csv')

# Step 2: Parse datetime and sort
df['Date'] = pd.to_datetime(df['Date'])
df.sort_values('Date', inplace=True)
df.reset_index(drop=True, inplace=True)

# Step 3: Drop rows with missing values
df.dropna(inplace=True)

# Step 4: Select only OHLCV features
feature_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
df = df[feature_cols]

# Step 5: Normalize features
scaler = MinMaxScaler()
df[feature_cols] = scaler.fit_transform(df[feature_cols])



print("Prepared data shape:", df.shape)  # (samples, window_size, num_features)
df

Prepared data shape: (1460, 5)


Unnamed: 0,Open,High,Low,Close,Volume
0,0.004090,0.003521,0.007358,0.004296,0.071038
1,0.004344,0.003099,0.006873,0.003574,0.072221
2,0.003618,0.003881,0.006772,0.005012,0.101882
3,0.005056,0.004194,0.008189,0.005203,0.064917
4,0.005248,0.004897,0.008623,0.005460,0.066080
...,...,...,...,...,...
1455,0.459993,0.451985,0.451029,0.451104,0.104209
1456,0.451306,0.476704,0.458582,0.482430,0.146596
1457,0.482969,0.487678,0.485310,0.475799,0.164793
1458,0.475872,0.475321,0.468929,0.465829,0.126883


In [2]:
df = df[['Open', 'High', 'Low', 'Close', 'Volume']]
df

Unnamed: 0,Open,High,Low,Close,Volume
0,0.004090,0.003521,0.007358,0.004296,0.071038
1,0.004344,0.003099,0.006873,0.003574,0.072221
2,0.003618,0.003881,0.006772,0.005012,0.101882
3,0.005056,0.004194,0.008189,0.005203,0.064917
4,0.005248,0.004897,0.008623,0.005460,0.066080
...,...,...,...,...,...
1455,0.459993,0.451985,0.451029,0.451104,0.104209
1456,0.451306,0.476704,0.458582,0.482430,0.146596
1457,0.482969,0.487678,0.485310,0.475799,0.164793
1458,0.475872,0.475321,0.468929,0.465829,0.126883


In [3]:
import gym
from gym import spaces

class CryptoTradingEnv(gym.Env):
    def __init__(self, df, initial_balance=10000, slippage_pct=0.001, fee_pct=0.001):
        super(CryptoTradingEnv, self).__init__()
        self.df = df.reset_index(drop=True)
        self.initial_balance = initial_balance
        self.current_step = 0
        self.slippage_pct = slippage_pct
        self.fee_pct = fee_pct

        # [Open, High, Low, Close, Volume, Balance, Holdings]
        self.observation_space = spaces.Box(
            low=0, high=np.inf, shape=(7,), dtype=np.float32
        )

        self.action_space = spaces.Discrete(3)
        self.reset()

    def reset(self):
        self.balance = self.initial_balance
        self.holdings = 0
        self.net_worth = self.initial_balance
        self.current_step = 0
        self.total_slippage = 0
        self.total_fees = 0
        return self._get_obs()

    def _get_obs(self):
        row = self.df.loc[self.current_step]
        return np.array([
            row['Open'], row['High'], row['Low'],
            row['Close'], row['Volume'],
            self.balance, self.holdings
        ], dtype=np.float32)

    def step(self, action):
        row = self.df.loc[self.current_step]
        price = row['Close']
        done = False
        reward = 0
        fee = 0
        slippage = 0

        if action == 1:  #Buy
          exec_price = price * (1 + self.slippage_pct)
          slippage = exec_price - price
          if self.balance > 0 and price > 0: # Added check for price > 0
                fee = self.balance * self.fee_pct
                self.holdings += (self.balance - fee) / exec_price
                self.balance = 0
        elif action == 2:  # Sell
              exec_price = price * (1 - self.slippage_pct)
              slippage = exec_price - price
              if self.holdings > 0 and price > 0: # Added check for price > 0
                proceeds = self.holdings * exec_price
                fee = proceeds * self.fee_pct
                self.balance += proceeds - fee
                self.holdings = 0

        self.net_worth = self.balance + self.holdings * price
        self.total_slippage += slippage
        self.total_fees += fee

        if self.current_step == len(self.df) - 1:
            done = True
            reward = self.net_worth - self.initial_balance
            next_obs = self._get_obs() # Get observation for the final step
        else:
            next_price = self.df.loc[self.current_step + 1, 'Close']
            new_net_worth = self.balance + self.holdings * next_price
            reward = new_net_worth - self.net_worth
            self.current_step += 1 # Increment step after calculating reward
            next_obs = self._get_obs()

        # Check for NaN in net_worth and reward
        if np.isnan(self.net_worth):
            self.net_worth = self.initial_balance # Reset net worth or handle as appropriate
            reward = -np.inf # Penalize for reaching an invalid state

        if np.isnan(reward):
            reward = -np.inf # Penalize for invalid reward

        return next_obs, reward, done, {}

    def render(self, mode='human'):
        print(f"Step: {self.current_step}, Net Worth: {self.net_worth:.2f}, Fees: {self.total_fees:.2f}, Slippage: {self.total_slippage:.2f}")

In [4]:
!pip install stable-baselines3 gym

Collecting stable-baselines3
  Downloading stable_baselines3-2.6.0-py3-none-any.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (

In [5]:
!pip install Shimmy

Collecting Shimmy
  Downloading Shimmy-2.0.0-py3-none-any.whl.metadata (3.5 kB)
Downloading Shimmy-2.0.0-py3-none-any.whl (30 kB)
Installing collected packages: Shimmy
Successfully installed Shimmy-2.0.0


In [6]:
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv

env = DummyVecEnv([lambda: CryptoTradingEnv(df)])
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)



Using cpu device
-----------------------------
| time/              |      |
|    fps             | 730  |
|    iterations      | 1    |
|    time_elapsed    | 2    |
|    total_timesteps | 2048 |
-----------------------------
-------------------------------------------
| time/                   |               |
|    fps                  | 621           |
|    iterations           | 2             |
|    time_elapsed         | 6             |
|    total_timesteps      | 4096          |
| train/                  |               |
|    approx_kl            | 9.4572315e-06 |
|    clip_fraction        | 0             |
|    clip_range           | 0.2           |
|    entropy_loss         | -1.1          |
|    explained_variance   | 0             |
|    learning_rate        | 0.0003        |
|    loss                 | 5.96e+10      |
|    n_updates            | 10            |
|    policy_gradient_loss | -9.13e-05     |
|    value_loss           | 1.26e+11      |
-------------------------

<stable_baselines3.ppo.ppo.PPO at 0x79870d0c6690>

In [7]:
obs = env.reset()
done = False
total_reward = 0

while not done:
    action, _ = model.predict(obs)
    obs, reward, done, _ = env.step(action)
    total_reward += reward

env.envs[0].render()
print(f"Total reward: {float(total_reward):.2f},Total reward: {float(total_reward):.2f}")

Step: 0, Net Worth: 10000.00, Fees: 0.00, Slippage: 0.00
Total reward: 65177.14


See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(
  print(f"Total reward: {float(total_reward):.2f}")
