In [1]:
!pip install gymnasium stable-baselines3 pandas numpy

Collecting gymnasium
  Downloading gymnasium-1.2.3-py3-none-any.whl.metadata (10 kB)
Collecting stable-baselines3
  Downloading stable_baselines3-2.7.1-py3-none-any.whl.metadata (4.8 kB)
Collecting farama-notifications>=0.0.1 (from gymnasium)
  Downloading Farama_Notifications-0.0.4-py3-none-any.whl.metadata (558 bytes)
Collecting torch<3.0,>=2.3 (from stable-baselines3)
  Downloading torch-2.10.0-cp312-cp312-win_amd64.whl.metadata (31 kB)
Collecting sympy>=1.13.3 (from torch<3.0,>=2.3->stable-baselines3)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading gymnasium-1.2.3-py3-none-any.whl (952 kB)
   ---------------------------------------- 0.0/952.1 kB ? eta -:--:--
   ---------------------- ----------------- 524.3/952.1 kB 2.8 MB/s eta 0:00:01
   ---------------------------------------- 952.1/952.1 kB 3.7 MB/s eta 0:00:00
Downloading stable_baselines3-2.7.1-py3-none-any.whl (188 kB)
Downloading Farama_Notifications-0.0.4-py3-none-any.whl (2.5 kB)
Downloading torc

In [1]:
# ============================================
# COMPLETE AI TRADING BOT - FRESH START
# ============================================

# Cell 1: Install Libraries
!pip install ccxt ta pandas numpy matplotlib stable-baselines3 gymnasium -q

# Cell 2: Import Libraries & Fetch Data
import ccxt
import pandas as pd
import numpy as np
import ta
from stable_baselines3 import PPO
import gymnasium as gym
from gymnasium import spaces
import warnings
warnings.filterwarnings('ignore')

print("ðŸ“¥ Fetching Bitcoin data from Binance...")
exchange = ccxt.binance()
ohlcv = exchange.fetch_ohlcv('BTC/USDT', timeframe='1h', limit=1000)
df = pd.DataFrame(ohlcv, columns=['timestamp', 'open', 'high', 'low', 'close', 'volume'])
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
print(f"âœ… Data loaded: {len(df)} rows")

# Cell 3: Add Technical Indicators
print("ðŸ“Š Adding Technical Indicators...")
df['rsi'] = ta.momentum.rsi(df['close'], window=14)
df['macd'] = ta.trend.macd(df['close'])
df['macd_signal'] = ta.trend.macd_signal(df['close'])
df['bollinger_mid'] = ta.volatility.bollinger_mavg(df['close'])
df['bollinger_high'] = ta.volatility.bollinger_hband(df['close'])
df['bollinger_low'] = ta.volatility.bollinger_lband(df['close'])
df['atr'] = ta.volatility.average_true_range(df['high'], df['low'], df['close'], window=14)
df = df.dropna().reset_index(drop=True)
print(f"âœ… Indicators added: {len(df)} rows")
print(f"ðŸ“‹ Columns: {list(df.columns)}")

# Cell 4: Define Trading Environment (FIXED - No bollinger_std)
class TradingEnv(gym.Env):
    metadata = {'render_modes': ['human']}

    def __init__(self, df):
        super(TradingEnv, self).__init__()
        self.df = df
        self.action_space = spaces.Discrete(3)  # 0=Hold, 1=Buy, 2=Sell
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf, shape=(8,), dtype=np.float32
        )
        self.step_count = 0
        self.position = 0
        self.buy_price = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.step_count = 0
        self.position = 0
        self.buy_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        row = self.df.iloc[self.step_count]
        obs = np.array([
            row['close'] / 100000,        # Normalized price
            row['rsi'] / 100,             # Normalized RSI
            row['macd'] / 1000,           # Normalized MACD
            row['macd_signal'] / 1000,    # Normalized MACD Signal
            row['bollinger_mid'] / 100000,
            row['atr'] / 1000,            # Normalized ATR
            row['volume'] / 100000000,
            self.position                 # Current position
        ])
        return obs.astype(np.float32)

    def step(self, action):
        current_price = self.df.iloc[self.step_count]['close']
        reward = 0
        done = False

        if action == 1 and self.position == 0:
            self.position = 1
            self.buy_price = current_price
        elif action == 2 and self.position == 1:
            self.position = 0
            profit_pct = (current_price - self.buy_price) / self.buy_price
            reward = profit_pct * 10

        self.step_count += 1
        if self.step_count >= len(self.df) - 1:
            done = True

        return self._get_observation(), reward, done, False, {}

    def render(self):
        pass

print("âœ… Trading Environment defined!")

# Cell 5: Train the Model
print("ðŸš€ Starting Training...")
env = TradingEnv(df)
model = PPO("MlpPolicy", env, verbose=1, learning_rate=0.0003, n_steps=2048)
model.learn(total_timesteps=5000)
model.save("ppo_trading_agent")
print("âœ… Training Complete!")

ðŸ“¥ Fetching Bitcoin data from Binance...
âœ… Data loaded: 1000 rows
ðŸ“Š Adding Technical Indicators...
âœ… Indicators added: 967 rows
ðŸ“‹ Columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'bollinger_mid', 'bollinger_high', 'bollinger_low', 'atr']
âœ… Trading Environment defined!
ðŸš€ Starting Training...
Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 966      |
|    ep_rew_mean     | -1.75    |
| time/              |          |
|    fps             | 851      |
|    iterations      | 1        |
|    time_elapsed    | 2        |
|    total_timesteps | 2048     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 966         |
|    ep_rew_mean          | -2.2        |
| time/                   |             |
|

In [2]:
print("Available columns:", list(df.columns))

Available columns: ['timestamp', 'open', 'high', 'low', 'close', 'volume', 'rsi', 'macd', 'macd_signal', 'bollinger_mid', 'bollinger_high', 'bollinger_low', 'atr']


In [3]:
# Test the model
obs, _ = env.reset()
total_profit = 0
buy_price = 0

for i in range(50):
    action, _states = model.predict(obs)
    obs, reward, done, _, _ = env.step(action)
    current_price = df.iloc[env.step_count]['close']
    
    if action == 1 and env.position == 1 and buy_price == 0:
        buy_price = current_price
        print(f"Step {i}: ðŸŸ¢ BUY at ${current_price:.2f}")
    elif action == 2 and env.position == 0 and buy_price > 0:
        profit = current_price - buy_price
        total_profit += profit
        print(f"Step {i}: ðŸ”´ SELL at ${current_price:.2f} | Profit: ${profit:.2f}")
        buy_price = 0
    
    if done:
        break

print("-" * 50)
print(f"ðŸ’° Total Profit: ${total_profit:.2f}")

Step 15: ðŸŸ¢ BUY at $92184.36
Step 16: ðŸ”´ SELL at $91942.00 | Profit: $-242.36
Step 18: ðŸŸ¢ BUY at $92269.42
Step 19: ðŸ”´ SELL at $92128.62 | Profit: $-140.80
Step 20: ðŸŸ¢ BUY at $92000.74
Step 24: ðŸ”´ SELL at $93449.99 | Profit: $1449.25
Step 28: ðŸŸ¢ BUY at $94226.84
Step 29: ðŸ”´ SELL at $94408.70 | Profit: $181.86
Step 32: ðŸŸ¢ BUY at $95414.00
Step 33: ðŸ”´ SELL at $95236.84 | Profit: $-177.16
Step 34: ðŸŸ¢ BUY at $95245.60
Step 36: ðŸ”´ SELL at $95720.99 | Profit: $475.39
Step 40: ðŸŸ¢ BUY at $95205.10
Step 41: ðŸ”´ SELL at $94900.01 | Profit: $-305.09
Step 46: ðŸŸ¢ BUY at $94998.19
--------------------------------------------------
ðŸ’° Total Profit: $1241.09


In [4]:
import os

# Create project folder
os.makedirs("AI_Trading_Bot", exist_ok=True)

# Save the model
model.save("AI_Trading_Bot/ppo_trading_agent")

# Save the data
df.to_csv("AI_Trading_Bot/btc_data.csv", index=False)

# Save environment code
with open("AI_Trading_Bot/trading_env.py", "w") as f:
    code = '''
import gymnasium as gym
from gymnasium import spaces
import pandas as pd
import numpy as np

class TradingEnv(gym.Env):
    metadata = {"render_modes": ["human"]}

    def __init__(self, df):
        super(TradingEnv, self).__init__()
        self.df = df
        self.action_space = spaces.Discrete(3)
        self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=(8,), dtype=np.float32)
        self.step_count = 0
        self.position = 0
        self.buy_price = 0

    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.step_count = 0
        self.position = 0
        self.buy_price = 0
        return self._get_observation(), {}

    def _get_observation(self):
        row = self.df.iloc[self.step_count]
        obs = np.array([
            row["close"] / 100000,
            row["rsi"] / 100,
            row["macd"] / 1000,
            row["macd_signal"] / 1000,
            row["bollinger_mid"] / 100000,
            row["atr"] / 1000,
            row["volume"] / 100000000,
            self.position
        ])
        return obs.astype(np.float32)

    def step(self, action):
        current_price = self.df.iloc[self.step_count]["close"]
        reward = 0
        done = False

        if action == 1 and self.position == 0:
            self.position = 1
            self.buy_price = current_price
        elif action == 2 and self.position == 1:
            self.position = 0
            profit_pct = (current_price - self.buy_price) / self.buy_price
            reward = profit_pct * 10

        self.step_count += 1
        if self.step_count >= len(self.df) - 1:
            done = True

        return self._get_observation(), reward, done, False, {}

    def render(self):
        pass
'''
    f.write(code)

print("âœ… All files saved to 'AI_Trading_Bot' folder!")

âœ… All files saved to 'AI_Trading_Bot' folder!
