In [1]:
pip install python-binance


Collecting python-binance
  Downloading python_binance-1.0.29-py2.py3-none-any.whl.metadata (13 kB)
Collecting dateparser (from python-binance)
  Downloading dateparser-1.2.1-py3-none-any.whl.metadata (29 kB)
Collecting aiohttp (from python-binance)
  Downloading aiohttp-3.12.13-cp312-cp312-win_amd64.whl.metadata (7.9 kB)
Collecting pycryptodome (from python-binance)
  Downloading pycryptodome-3.23.0-cp37-abi3-win_amd64.whl.metadata (3.5 kB)
Collecting aiohappyeyeballs>=2.5.0 (from aiohttp->python-binance)
  Downloading aiohappyeyeballs-2.6.1-py3-none-any.whl.metadata (5.9 kB)
Collecting aiosignal>=1.1.2 (from aiohttp->python-binance)
  Downloading aiosignal-1.3.2-py2.py3-none-any.whl.metadata (3.8 kB)
Collecting frozenlist>=1.1.1 (from aiohttp->python-binance)
  Downloading frozenlist-1.7.0-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting multidict<7.0,>=4.5 (from aiohttp->python-binance)
  Downloading multidict-6.4.4-cp312-cp312-win_amd64.whl.metadata (5.5 kB)
Collecting propcac


[notice] A new release of pip is available: 24.2 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:
from binance.client import Client
import pandas as pd
import numpy as np

In [3]:
client = Client()


In [4]:
def fetch_binance_data(symbol="BTCUSDT", interval=Client.KLINE_INTERVAL_1HOUR, lookback="2 days ago UTC"):
    klines = client.get_historical_klines(symbol, interval, lookback)
    df = pd.DataFrame(klines, columns=[
        "timestamp", "open", "high", "low", "close", "volume", 
        "close_time", "quote_asset_volume", "number_of_trades", 
        "taker_buy_base_volume", "taker_buy_quote_volume", "ignore"
    ])
    df = df[["timestamp", "open", "high", "low", "close", "volume"]].astype(float)
    return df


In [5]:
import gym
from gym import spaces

class BinanceTradingEnv(gym.Env):
    def __init__(self, symbol="BTCUSDT"):
        self.symbol = symbol
        self.data = fetch_binance_data(symbol)
        self.current_step = 0
        self.action_space = spaces.Discrete(3)  # 0: Hold, 1: Buy, 2: Sell
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(5,), dtype=np.float32)

    def reset(self):
        self.data = fetch_binance_data(self.symbol)
        self.current_step = 0
        self.position = 0  # 1 = long, -1 = short, 0 = flat
        self.entry_price = 0
        return self._get_obs()

    def _get_obs(self):
        row = self.data.iloc[self.current_step]
        return np.array([row.open, row.high, row.low, row.close, row.volume], dtype=np.float32)

    def step(self, action):
        current_price = self.data.iloc[self.current_step].close
        reward = 0

        # Simulate trade
        if action == 1 and self.position == 0:  # Buy
            self.position = 1
            self.entry_price = current_price
        elif action == 2 and self.position == 0:  # Sell
            self.position = -1
            self.entry_price = current_price
        elif action == 0 and self.position != 0:  # Exit position
            reward = (current_price - self.entry_price) * self.position
            self.position = 0
            self.entry_price = 0

        self.current_step += 1
        done = self.current_step >= len(self.data) - 1
        obs = self._get_obs() if not done else np.zeros(self.observation_space.shape)

        return obs, reward, done, {}


In [6]:
from stable_baselines3 import PPO

env = BinanceTradingEnv(symbol="BTCUSDT")
model = PPO("MlpPolicy", env, verbose=1)
model.learn(total_timesteps=10000)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.




---------------------------------
| rollout/           |          |
|    ep_len_mean     | 47       |
|    ep_rew_mean     | 361      |
| time/              |          |
|    fps             | 91       |
|    iterations      | 1        |
|    time_elapsed    | 22       |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 47           |
|    ep_rew_mean          | 289          |
| time/                   |              |
|    fps                  | 84           |
|    iterations           | 2            |
|    time_elapsed         | 48           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0055185673 |
|    clip_fraction        | 0.0811       |
|    clip_range           | 0.2          |
|    entropy_loss         | -1.09        |
|    explained_variance   | -1.91e-06    |
|    learning_r

<stable_baselines3.ppo.ppo.PPO at 0x221e2c4d280>