In [33]:
# data_exploration.ipynb (converted to script for easy viewing)

import requests
import pandas as pd
import time
import os
from datetime import datetime

# === CONFIG ===
API_KEY = "GlYxSZP9hnooNl6gGAjtkptkeqehSnk5C60Akhpw5zupBK6O"
BASE_URL = "https://api.datasource.cybotrade.rs/bybit-linear/candle"
SYMBOL = "BTCUSDT"
INTERVAL = "1m"  # Valid: 1m, 5m, 15m, 30m, 1h, etc.
LIMIT = 1000

# === SET TIME RANGE: from March 8 to April 8, 2025 ===
start_time = int(datetime(2025, 3, 8, 6, 0).timestamp() * 1000)
end_time = int(datetime(2025, 4, 8, 6, 0).timestamp() * 1000)

headers = {
    "X-API-KEY": API_KEY
}

all_candles = []
print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")

while start_time < end_time:
    params = {
        "symbol": SYMBOL,
        "interval": INTERVAL,
        "start_time": start_time,
        "limit": LIMIT
    }

    response = requests.get(BASE_URL, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json().get("data", [])
        if not data:
            print("No more data returned.")
            break

        all_candles.extend(data)
        print(f"✅ Retrieved {len(data)} candles. Total: {len(all_candles)}")
        start_time = data[-1]["start_time"] + 60 * 1000  # move to next minute
        time.sleep(0.5)  # prevent hitting rate limits
    else:
        print(f"❌ Error {response.status_code}: {response.text}")
        break

# === CONVERT TO DATAFRAME ===
df = pd.DataFrame(all_candles)
df["timestamp"] = pd.to_datetime(df["start_time"], unit="ms")
df.set_index("timestamp", inplace=True)
df = df[["open", "high", "low", "close", "volume"]]

# === Ensure parent directory exists ===
os.makedirs("../datasets", exist_ok=True)

# === SAVE TO CSV ===
csv_path = f"../datasets/{SYMBOL}_{INTERVAL}_march_to_april.csv"
df.to_csv(csv_path)
print(f"💾 Saved to {csv_path} with {len(df)} rows.")

# === OPTIONAL: Preview ===
print(df.tail())

  print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")


Fetching 1m data for BTCUSDT from 2025-03-07 22:00:00 to 2025-04-07 22:00:00...
✅ Retrieved 1000 candles. Total: 1000
✅ Retrieved 1000 candles. Total: 2000
✅ Retrieved 1000 candles. Total: 3000
✅ Retrieved 1000 candles. Total: 4000
✅ Retrieved 1000 candles. Total: 5000
✅ Retrieved 1000 candles. Total: 6000
✅ Retrieved 1000 candles. Total: 7000
✅ Retrieved 1000 candles. Total: 8000
✅ Retrieved 1000 candles. Total: 9000
✅ Retrieved 1000 candles. Total: 10000
✅ Retrieved 1000 candles. Total: 11000
✅ Retrieved 1000 candles. Total: 12000
✅ Retrieved 1000 candles. Total: 13000
✅ Retrieved 1000 candles. Total: 14000
✅ Retrieved 1000 candles. Total: 15000
✅ Retrieved 1000 candles. Total: 16000
✅ Retrieved 1000 candles. Total: 17000
✅ Retrieved 1000 candles. Total: 18000
✅ Retrieved 1000 candles. Total: 19000
✅ Retrieved 1000 candles. Total: 20000
✅ Retrieved 1000 candles. Total: 21000
✅ Retrieved 1000 candles. Total: 22000
✅ Retrieved 1000 candles. Total: 23000
✅ Retrieved 1000 candles. Total:

In [None]:
#open terminal and install something before run this section
!pip install pandas_ta --no-cache-dir
!pip install numpy --upgrade --force-reinstall
!pip install ta

^C
Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable
Collecting numpy
  Using cached numpy-2.2.4-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.2.4-cp312-cp312-win_amd64.whl (12.6 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.4
    Uninstalling numpy-2.2.4:
      Successfully uninstalled numpy-2.2.4
Successfully installed numpy-2.2.4



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [None]:
### ⚙️ SECTION 2: FEATURE ENGINEERING
# This section transforms raw crypto price data into meaningful features
# that can help a machine learning model detect market trends.

# We compute common technical indicators used by traders:
# ---------------------------------------------------------
# - RSI (Relative Strength Index): Measures momentum (overbought/oversold signals)
# - MACD (Moving Average Convergence Divergence): Captures trend reversals
# - EMA-12 and EMA-26: Short and medium-term exponential moving averages
# - SMA-20: Simple average of past 20 close prices (trend direction)
# - Volatility: Rolling standard deviation over 20 periods

# We also calculate returns:
# - return_pct: % change from previous close to current
# - log_return: logarithmic return, useful for financial modeling

# After feature creation, we:
# - Drop any rows with NaN values (from rolling windows)
# - Save the engineered DataFrame as a CSV for model training


from ta.momentum import RSIIndicator
from ta.trend import MACD, EMAIndicator, SMAIndicator
from ta.volatility import BollingerBands
import numpy as np

print("🔧 Engineering features...")

# RSI
rsi = RSIIndicator(close=df["close"], window=14)
df["rsi"] = rsi.rsi()

# MACD
macd = MACD(close=df["close"])
df["macd"] = macd.macd()

# EMA
ema12 = EMAIndicator(close=df["close"], window=12)
ema26 = EMAIndicator(close=df["close"], window=26)
df["ema_12"] = ema12.ema_indicator()
df["ema_26"] = ema26.ema_indicator()

# SMA
sma20 = SMAIndicator(close=df["close"], window=20)
df["sma_20"] = sma20.sma_indicator()

# Volatility (standard deviation of closing price)
df["volatility"] = df["close"].rolling(window=20).std()

# Returns
df["return_pct"] = df["close"].pct_change() * 100
df["log_return"] = (df["close"] / df["close"].shift(1)).apply(lambda x: pd.NA if x <= 0 else np.log(x))

# Clean NA
df.dropna(inplace=True)

# Save to CSV
os.makedirs("../datasets", exist_ok=True)
out_path = "../datasets/BTCUSDT_features.csv"
df.to_csv(out_path)
print(f"💾 Features saved to {out_path} with {len(df)} rows.")

# Preview
print(df.tail())


🔧 Engineering features...
💾 Features saved to ../datasets/BTCUSDT_features.csv with 44950 rows.
                        open     high      low    close  volume        rsi  \
timestamp                                                                    
2025-04-08 03:55:00  79756.0  79777.0  79734.6  79770.0  18.796  39.087965   
2025-04-08 03:56:00  79770.0  79770.0  79724.0  79734.9  20.871  36.549372   
2025-04-08 03:57:00  79734.9  79776.0  79726.3  79776.0  16.550  41.352447   
2025-04-08 03:58:00  79776.0  79858.4  79776.0  79858.3  38.774  49.582580   
2025-04-08 03:59:00  79858.3  79867.0  79853.2  79867.0  30.839  50.375370   

                          macd        ema_12        ema_26     sma_20  \
timestamp                                                               
2025-04-08 03:55:00 -54.061874  79823.440784  79877.502659  79865.095   
2025-04-08 03:56:00 -57.120374  79809.819125  79866.939499  79853.825   
2025-04-08 03:57:00 -55.587057  79804.616183  79860.203239  79847