In [5]:
!pip install gymnasium
!pip install numpy




[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip





[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import gym
from gym import spaces
import numpy as np
import pandas as pd
from enum import Enum

class Actions(Enum):
    Hold = 0
    Buy1 = 1
    Buy2 = 2
    Sell1 = 3
    Sell2 = 4

class TurtleTradingEnv(gym.Env):
    def __init__(self, prices, signal_features=None, window_size=20, frame_bound=(20, None)):
        super(TurtleTradingEnv, self).__init__()

        self.original_prices = prices
        self.original_signals = signal_features
        self.window_size = window_size
        self.frame_bound = frame_bound

        start = self.frame_bound[0] - self.window_size
        end = self.frame_bound[1] if self.frame_bound[1] else len(prices)

        self.prices = prices[start:end]
        self.signal_features = signal_features[start:end] if signal_features is not None else None

        self.current_step = self.window_size
        self.initial_cash = 10000
        self.cash = self.initial_cash
        self.position = 0
        self.short_position = 0
        self.short_entry_price = None
        self.total_reward = 0
        self.total_profit = self.initial_cash
        self.entry_prices = []
        self.risk_per_trade = 0.01
        self.last_action = None
        self.last_buy_price = None
        self.pyramid_price = None
        self.trade_log = []

        obs_size = 3 + (self.signal_features.shape[1] if self.signal_features is not None else 3)
        self.action_space = spaces.Discrete(len(Actions))
        self.observation_space = spaces.Box(low=0, high=np.inf, shape=(obs_size,), dtype=np.float32)

    def reset(self, *, seed=None, options=None):
        super().reset(seed=seed)
        self.current_step = self.window_size
        self.cash = self.initial_cash
        self.position = 0
        self.short_position = 0
        self.short_entry_price = None
        self.total_reward = 0
        self.total_profit = self.initial_cash
        self.entry_prices = []
        self.last_action = None
        self.last_buy_price = None
        self.pyramid_price = None
        self.trade_log = []
        return self._get_obs(), {}

    def _log_trade(self, action_type, units, price):
        self.trade_log.append({
            'step': self.current_step,
            'action': action_type,
            'units': units,
            'price': price,
            'cash': self.cash,
            'position': self.position,
            'short_position': self.short_position,
            'portfolio': self._get_portfolio_value(price)
        })

    def _long_entry(self, units, price):
        if self.position == 0 and self.short_position == 0:
            self._buy(units, price)
            self.pyramid_price = price
            self._log_trade('long_entry', units, price)

    def _long_exit(self, units, price):
        if self.position >= units:
            self._sell(units, price)
            self._log_trade('long_exit', units, price)
            if self.last_buy_price and price < self.last_buy_price:
                print("💸 Sold at a loss")

    def _short(self, units, price):
        self.short_position += units
        self.short_entry_price = price
        self._log_trade('short_entry', units, price)
        print(f"🔻 SHORT {units} @ {price:.2f} | ShortPOS: {self.short_position}, Portfolio: {self._get_portfolio_value(price):.2f}")

    def _cover(self, units, price):
        if self.short_position >= units:
            gain = (self.short_entry_price - price) * units
            self.cash += gain
            self.short_position -= units
            self._log_trade('short_exit', units, price)
            print(f"✅ COVER {units} @ {price:.2f} | ShortPOS: {self.short_position}, Cash: {self.cash:.2f}, Portfolio: {self._get_portfolio_value(price):.2f}")
        else:
            print(f"❌ COVER FAILED: Tried {units}, Only Short {self.short_position}")

    def get_trade_log(self):
        return pd.DataFrame(self.trade_log)
    
    def step(self, action):
        price = self.prices[self.current_step]

        high_20 = np.max(self.prices[self.current_step - 20:self.current_step])
        low_20 = np.min(self.prices[self.current_step - 20:self.current_step])
        high_10 = np.max(self.prices[self.current_step - 10:self.current_step])
        low_10 = np.min(self.prices[self.current_step - 10:self.current_step])
        atr = self._calculate_atr(self.current_step, period=14)

        max_position = self._calculate_max_position(price, atr)
        requested_units = 1 if action == Actions.Buy1.value else 2 if action == Actions.Buy2.value else 0
        allowed_units = min(requested_units, max_position - self.position)

        prev_action = self.last_action
        self.last_action = action

        if action in [Actions.Buy1.value, Actions.Buy2.value]:
            if price > high_20:
                if allowed_units > 0:
                    self._long_entry(allowed_units, price)
                else:
                    print(f"⛔ BUY SKIPPED: Price broke high_20 but allowed_units={allowed_units}, position={self.position}, max={max_position}")
            else:
                print("❌ Turtle BUY rule not triggered")

        if self.position < max_position and self.pyramid_price:
            if price >= self.pyramid_price + 0.5 * atr:
                self._buy(1, price)
                self.pyramid_price = price

        if action in [Actions.Sell1.value, Actions.Sell2.value]:
            if price < low_10:
                self._long_exit(1 if action == Actions.Sell1.value else 2, price)
            else:
                print("❌ Turtle SELL rule not triggered")

        if self.position == 0 and self.short_position == 0 and price < low_20:
            self._short(1, price)

        if self.short_position > 0 and price > high_10:
            self._cover(self.short_position, price)

        #if self.short_position > 0 and self.short_entry_price and price > self.short_entry_price + 4 * atr:
            #print("🔺 SHORT STOP LOSS triggered")
            #self._cover(self.short_position, price)

        #if self.position > 0 and self.last_buy_price and price < self.last_buy_price - 4 * atr:
            #print("🔻 STOP LOSS triggered by Turtle rule")
            #self._sell(self.position, price)

        self.current_step += 1
        done = self.current_step >= len(self.prices) - 1

        self._update_profit(price)
        reward = (self.total_profit - self.initial_cash) / self.initial_cash

        if self.position > 0:
            current_profit = sum((price - entry) for entry in self.entry_prices)
            if current_profit > 0:
                reward += 0.1
            if current_profit < 0:
                reward -= 0.05

        if price > high_20 and action in [Actions.Buy1.value, Actions.Buy2.value]:
            reward += 0.2

        if price < low_10 and action in [Actions.Sell1.value, Actions.Sell2.value]:
            reward += 0.2

        # Penalize if agent sells at a loss
        if action in [Actions.Sell1.value, Actions.Sell2.value] and self.last_buy_price and price < self.last_buy_price:
            reward -= 0.3

        if prev_action is not None and action != Actions.Hold.value and prev_action != Actions.Hold.value:
            reward -= 0.1

        if self.position == 0 and action == Actions.Hold.value:
            reward -= 0.01

        if action in [Actions.Buy1.value, Actions.Buy2.value] and price <= high_20:
            distance = high_20 - price
            penalty = (distance / high_20) * 2
            reward -= penalty

        if action in [Actions.Buy1.value, Actions.Buy2.value] and self.last_buy_price:
            diff = abs(price - self.last_buy_price)
            if diff < atr:
                penalty = max(0, (1 - (diff / atr)) * 2)
                reward -= penalty

        self.total_reward += reward

        obs = self._get_obs()
        print(f"Price: {price:.2f}, High_20: {high_20:.2f}, Low_10: {low_10:.2f}, ATR: {atr:.2f}, LastBuy: {self.last_buy_price}")

        return obs, reward, False, done, {}

    def _buy(self, units, price):
        cost = units * price
        if self.cash >= cost:
            self.cash -= cost
            self.position += units
            self.last_buy_price = price
            print(f"✅ BUY {units} @ {price:.2f} | POS: {self.position}, CASH: {self.cash:.2f}, Portfolio: {self._get_portfolio_value(price):.2f}")
        else:
            print(f"❌ BUY FAILED: Need ${cost:.2f}, Have ${self.cash:.2f}")

    def _sell(self, units, price):
        if self.position >= units:
            self.cash += units * price
            self.position -= units
            print(f"✅ SELL {units} @ {price:.2f} | POS: {self.position}, CASH: {self.cash:.2f}, Portfolio: {self._get_portfolio_value(price):.2f}")
        else:
            print(f"❌ SELL FAILED: Tried {units}, Only Have {self.position}")

    def _calculate_max_position(self, price, atr):
        if atr == 0:
            return 0
        risk_dollars = self.risk_per_trade * self.total_profit
        unit_risk = atr
        return int(risk_dollars / unit_risk)

    def _update_profit(self, price):
        short_value = self.short_position * (self.short_entry_price - price) if self.short_position > 0 else 0
        self.total_profit = self.cash + self.position * price + short_value

    def _get_obs(self):
        price = self.prices[self.current_step]
        if self.signal_features is not None:
            signals = self.signal_features[self.current_step]
        else:
            signals = self._get_signal_features()
        return np.array([price, self.position, self.cash, *signals], dtype=np.float32)

    def _get_signal_features(self):
        high_20 = np.max(self.prices[self.current_step - 20:self.current_step])
        low_20 = np.min(self.prices[self.current_step - 20:self.current_step])
        high_10 = np.max(self.prices[self.current_step - 10:self.current_step])
        low_10 = np.min(self.prices[self.current_step - 10:self.current_step])
        atr = self._calculate_atr(self.current_step, period=14)
        return [high_20, low_20, high_10, low_10, atr]

    def _calculate_atr(self, idx, period=14):
        if idx < period + 1:
            return 0.0
        tr_list = []
        for i in range(idx - period + 1, idx + 1):
            high = self.original_signals[i][1]
            low = self.original_signals[i][2]
            prev_close = self.prices[i - 1]
            tr = max(high - low, abs(high - prev_close), abs(low - prev_close))
            tr_list.append(tr)
        return np.mean(tr_list)

    def _get_portfolio_value(self, price):
        short_value = self.short_position * (self.short_entry_price - price) if self.short_position > 0 else 0
        return self.cash + self.position * price + short_value


    def _buy(self, units, price):
        cost = units * price
        if self.cash >= cost:
            self.cash -= cost
            self.position += units
            self.last_buy_price = price
            print(f"✅ BUY {units} @ {price:.2f} | POS: {self.position}, CASH: {self.cash:.2f}, Portfolio: {self._get_portfolio_value(price):.2f}")
        else:
            print(f"❌ BUY FAILED: Need ${cost:.2f}, Have ${self.cash:.2f}")

    def _sell(self, units, price):
        if self.position >= units:
            self.cash += units * price
            self.position -= units
            print(f"✅ SELL {units} @ {price:.2f} | POS: {self.position}, CASH: {self.cash:.2f}, Portfolio: {self._get_portfolio_value(price):.2f}")
        else:
            print(f"❌ SELL FAILED: Tried {units}, Only Have {self.position}")

    def _calculate_max_position(self, price, atr):
        if atr == 0:
            return 0
        risk_dollars = self.risk_per_trade * self.total_profit
        unit_risk = atr
        return int(risk_dollars / unit_risk)

    def _update_profit(self, price):
        short_value = self.short_position * (self.short_entry_price - price) if self.short_position > 0 else 0
        self.total_profit = self.cash + self.position * price + short_value

    def _get_obs(self):
        price = self.prices[self.current_step]
        if self.signal_features is not None:
            signals = self.signal_features[self.current_step]
        else:
            signals = self._get_signal_features()
        return np.array([price, self.position, self.cash, *signals], dtype=np.float32)

    def _get_signal_features(self):
        high_20 = np.max(self.prices[self.current_step - 20:self.current_step])
        low_20 = np.min(self.prices[self.current_step - 20:self.current_step])
        high_10 = np.max(self.prices[self.current_step - 10:self.current_step])
        low_10 = np.min(self.prices[self.current_step - 10:self.current_step])
        atr = self._calculate_atr(self.current_step, period=14)
        return [high_20, low_20, high_10, low_10, atr]

    def _calculate_atr(self, idx, period=14):
        if idx < period + 1:
            return 0.0
        tr_list = []
        for i in range(idx - period + 1, idx + 1):
            high = self.original_signals[i][1]
            low = self.original_signals[i][2]
            prev_close = self.prices[i - 1]
            tr = max(high - low, abs(high - prev_close), abs(low - prev_close))
            tr_list.append(tr)
        return np.mean(tr_list)

    def _get_portfolio_value(self, price):
        short_value = self.short_position * (self.short_entry_price - price) if self.short_position > 0 else 0
        return self.cash + self.position * price + short_value


In [None]:
import time
import numpy as np
import alpaca_trade_api as tradeapi
from stable_baselines3 import A2C
import traceback
from enum import Enum

class Actions(Enum):
    Hold = 0
    Buy1 = 1
    Buy2 = 2
    Sell1 = 3
    Sell2 = 4

model = A2C.load("xx.zip")

API_KEY = 'PKQR9WYDEFY1OJ5RPZ9V'
SECRET_KEY = 'TTPOs8P5yl3uoGR9qonPPtBVCWWB4BGxAxpBtZ6b'
BASE_URL = 'https://paper-api.alpaca.markets'

api = tradeapi.REST(API_KEY, SECRET_KEY, BASE_URL)

SYMBOL = 'BTC/USD'
WINDOW_SIZE = 20

last_position = 0  # global pozisyon takibi

def get_recent_data(symbol, window):
    bars = api.get_crypto_bars("BTC/USD", timeframe="1Min", limit=5).df
    print(bars)

    
    prices = bars['close'].values
    highs = bars['high'].values
    lows = bars['low'].values
    return prices, highs, lows

def build_observation(prices, highs, lows, cash=10000, position=0):
    current_price = prices[-1]
    high_20 = np.max(prices[-20:])
    low_20 = np.min(prices[-20:])
    high_10 = np.max(prices[-10:])
    low_10 = np.min(prices[-10:])
    atr = np.mean([max(h - l, abs(h - c), abs(l - c))
                   for h, l, c in zip(highs[-14:], lows[-14:], prices[-15:-1])])
    obs = np.array([current_price, position, cash, high_20, low_20, high_10, low_10, atr], dtype=np.float32)
    obs = np.pad(obs, (0, 6), 'constant')  # (8,) -> (14,)
    return obs, high_20, low_10, atr

def buy_crypto(symbol):
    qty = 0.001  # örnek miktar
    api.submit_order(
        symbol=symbol,
        qty=qty,
        side='buy',
        type='market',
        time_in_force='gtc'
    )
    print(f"📈 BUY: {qty} {symbol}")

def sell_crypto(symbol, amount):
    qty = amount
    api.submit_order(
        symbol=symbol,
        qty=qty,
        side='sell',
        type='market',
        time_in_force='gtc'
    )
    print(f"📉 SELL: {qty} {symbol}")

# Live loop
while True:
    try:
        prices, highs, lows = get_recent_data(SYMBOL, WINDOW_SIZE + 15)
        if len(prices) < WINDOW_SIZE + 15:
            print("⏳ Yetersiz veri, bekleniyor...")
            time.sleep(5)
            continue

        obs, high_20, low_10, atr = build_observation(prices, highs, lows)
        action, _ = model.predict(obs, deterministic=True)

        current_position = int(obs[1])  # observation'dan pozisyon oku
        global last_position

        if current_position > last_position:
            buy_crypto(SYMBOL)
        elif current_position < last_position:
            sell_crypto(SYMBOL, 0.001)
        else:
            print("⏸ HOLD")

        last_position = current_position
        print(f"✅ POS: {current_position} | ACTION: {action}")
        print(f"🎯 Price: {prices[-1]:.2f} | High_20: {high_20:.2f} | Low_10: {low_10:.2f} | ATR: {atr:.2f}\n")

    except Exception:
        traceback.print_exc()

    time.sleep(5)


Exception: code expected at most 16 arguments, got 18
Exception: code expected at most 16 arguments, got 18


⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ POS: 0 | ACTION: 3
🎯 Price: 83491.12 | High_20: 83491.12 | Low_10: 83375.38 | ATR: 47.34

⏸ HOLD
✅ P

KeyboardInterrupt: 

In [6]:
import time
import numpy as np
import alpaca_trade_api as tradeapi
from stable_baselines3 import A2C
import traceback
from enum import Enum

class Actions(Enum):
    Hold = 0
    Buy1 = 1
    Buy2 = 2
    Sell1 = 3
    Sell2 = 4

model = A2C.load("xx.zip")

API_KEY = 'PKQR9WYDEFY1OJ5RPZ9V'
SECRET_KEY = 'TTPOs8P5yl3uoGR9qonPPtBVCWWB4BGxAxpBtZ6b'
BASE_URL = 'https://paper-api.alpaca.markets'

api = tradeapi.REST(API_KEY, SECRET_KEY, BASE_URL)

SYMBOL = 'BTC'
WINDOW_SIZE = 20

last_position = 0  # global pozisyon takibi

def get_recent_data(symbol, window):
    bars = api.get_bars(symbol, timeframe='1Min', limit=window).df
    prices = bars['close'].values
    highs = bars['high'].values
    lows = bars['low'].values
    return prices, highs, lows

def build_observation(prices, highs, lows, cash=10000, position=0):
    current_price = prices[-1]
    high_20 = np.max(prices[-20:])
    low_20 = np.min(prices[-20:])
    high_10 = np.max(prices[-10:])
    low_10 = np.min(prices[-10:])
    atr = np.mean([max(h - l, abs(h - c), abs(l - c))
                   for h, l, c in zip(highs[-14:], lows[-14:], prices[-15:-1])])
    obs = np.array([current_price, position, cash, high_20, low_20, high_10, low_10, atr], dtype=np.float32)
    obs = np.pad(obs, (0, 6), 'constant')  # (8,) -> (14,)
    return obs, high_20, low_10, atr


def buy_stock(symbol):
    qty = 1
    api.submit_order(
        symbol=symbol,
        qty=qty,
        side='buy',
        type='market',
        time_in_force='gtc'
    )
    print(f"📈 BUY: {qty} adet {symbol}")

def sell_stock_directly(symbol, amount):
    qty = amount
    api.submit_order(
        symbol=symbol,
        qty=qty,
        side='sell',
        type='market',
        time_in_force='gtc'
    )
    print(f"📉 SELL: {qty} adet {symbol}")

# Live loop
while True:
    try:
        prices, highs, lows = get_recent_data(SYMBOL, WINDOW_SIZE + 15)
        obs, high_20, low_10, atr = build_observation(prices, highs, lows)
        action, _ = model.predict(obs, deterministic=True)

        current_position = int(obs[1])  # observation'dan pozisyon oku
        global last_position

        if current_position > last_position:
            buy_stock(SYMBOL)
        elif current_position < last_position:
            sell_stock_directly(SYMBOL, 1)
        else:
            print("⏸ HOLD")

        last_position = current_position
        print(f"✅ POS: {current_position} | ACTION: {action}\n")
        print(f"🎯 Price: {prices[-1]:.2f} | High_20: {high_20:.2f} | Low_10: {low_10:.2f} | ATR: {atr:.2f}")


    except Exception:
        traceback.print_exc()

    time.sleep(5)


Exception: code expected at most 16 arguments, got 18
Exception: code expected at most 16 arguments, got 18


⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACTION: 0

🎯 Price: 37.13 | High_20: 37.33 | Low_10: 37.05 | ATR: 0.03
⏸ HOLD
✅ POS: 0 | ACT

KeyboardInterrupt: 

Name: stable_baselines3Note: you may need to restart the kernel to use updated packages.

Version: 2.5.0a1
Summary: Pytorch version of Stable Baselines, implementations of reinforcement learning algorithms.
Home-page: https://github.com/DLR-RM/stable-baselines3
Author: Antonin Raffin
Author-email: antonin.raffin@dlr.de
License: MIT
Location: c:\programdata\radioconda\envs\finrl_env\lib\site-packages
Requires: cloudpickle, gymnasium, matplotlib, numpy, pandas, torch
Required-by: finrl


In [23]:
# pip, setuptools ve wheel güncelle
!pip install --upgrade pip setuptools wheel

# uyumlu gym sürümünü kur
!pip install gym==0.21.0

# stable-baselines3 1.6.2 sürümünü kur
!pip install stable-baselines3==1.6.2


Collecting pip
  Downloading pip-25.0.1-py3-none-any.whl.metadata (3.7 kB)
Collecting setuptools
  Using cached setuptools-78.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting wheel
  Using cached wheel-0.45.1-py3-none-any.whl.metadata (2.3 kB)
Downloading pip-25.0.1-py3-none-any.whl (1.8 MB)
   ---------------------------------------- 0.0/1.8 MB ? eta -:--:--
    --------------------------------------- 0.0/1.8 MB 1.3 MB/s eta 0:00:02
   - -------------------------------------- 0.1/1.8 MB 1.1 MB/s eta 0:00:02
   ---- ----------------------------------- 0.2/1.8 MB 1.7 MB/s eta 0:00:01
   ------- -------------------------------- 0.3/1.8 MB 1.8 MB/s eta 0:00:01
   -------- ------------------------------- 0.4/1.8 MB 1.9 MB/s eta 0:00:01
   ------------ --------------------------- 0.6/1.8 MB 2.3 MB/s eta 0:00:01
   -------------- ------------------------- 0.6/1.8 MB 2.3 MB/s eta 0:00:01
   -------------- ------------------------- 0.7/1.8 MB 2.1 MB/s eta 0:00:01
   ---------------- ---------

ERROR: To modify pip, please run the following command:
C:\Python312\python.exe -m pip install --upgrade pip setuptools wheel

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting gym==0.21.0
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'


  error: subprocess-exited-with-error
  
  Getting requirements to build wheel did not run successfully.
  exit code: 1
  
  [3 lines of output]
  error in gym setup command: 'extras_require' must be a dictionary whose values are strings or lists of strings containing valid project/version requirement specifiers.
  [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
error: subprocess-exited-with-error

Getting requirements to build wheel did not run successfully.
exit code: 1

See above for output.

note: This error originates from a subprocess, and is likely not a problem with pip.

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Collecting stable-baselines3==1.6.2
  Using cached stable_baselines3-1.6.2-py3-none-any.whl.metadata (4.1 kB)
Collecting gym==0.21 (from stable-baselines3==1.6.2)
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'


  error: subprocess-exited-with-error
  
  Getting requirements to build wheel did not run successfully.
  exit code: 1
  
  [3 lines of output]
  error in gym setup command: 'extras_require' must be a dictionary whose values are strings or lists of strings containing valid project/version requirement specifiers.
  [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
error: subprocess-exited-with-error

Getting requirements to build wheel did not run successfully.
exit code: 1

See above for output.

note: This error originates from a subprocess, and is likely not a problem with pip.

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
pip install gym==0.21.0 --only-binary :all:

Defaulting to user installation because normal site-packages is not writeable
Note: you may need to restart the kernel to use updated packages.


ERROR: Could not find a version that satisfies the requirement gym==0.21.0 (from versions: none)
ERROR: No matching distribution found for gym==0.21.0


In [17]:
!pip3 uninstall stable-baselines3

^C


In [22]:
!pip3 install stable-baselines3==1.6.2

Collecting stable-baselines3==1.6.2
  Using cached stable_baselines3-1.6.2-py3-none-any.whl.metadata (4.1 kB)
Collecting gym==0.21 (from stable-baselines3==1.6.2)
  Using cached gym-0.21.0.tar.gz (1.5 MB)
  Installing build dependencies: started
  Installing build dependencies: finished with status 'done'
  Getting requirements to build wheel: started
  Getting requirements to build wheel: finished with status 'error'


  error: subprocess-exited-with-error
  
  Getting requirements to build wheel did not run successfully.
  exit code: 1
  
  [3 lines of output]
  error in gym setup command: 'extras_require' must be a dictionary whose values are strings or lists of strings containing valid project/version requirement specifiers.
  [end of output]
  
  note: This error originates from a subprocess, and is likely not a problem with pip.
error: subprocess-exited-with-error

Getting requirements to build wheel did not run successfully.
exit code: 1

See above for output.

note: This error originates from a subprocess, and is likely not a problem with pip.

[notice] A new release of pip is available: 24.0 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip
