In [None]:
# Traning data from 2018 to 2021 january
import requests
import pandas as pd
import time
import os
from datetime import datetime

API_KEY = "GlYxSZP9hnooNl6gGAjtkptkeqehSnk5C60Akhpw5zupBK6O"
BASE_URL = "https://api.datasource.cybotrade.rs/coinbase/candle"
SYMBOL = "BTC-USD"
INTERVAL = "1h"
LIMIT = 1000

# === SET TIME RANGE: Jan 1, 2018 – Dec 31, 2020 ===
start_time = int(datetime(2018, 1, 1).timestamp() * 1000)
end_time = int(datetime(2020, 12, 31, 23, 59).timestamp() * 1000)

headers = {"X-API-KEY": API_KEY}

all_candles = []
print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")

while start_time < end_time:
    params = {"symbol": SYMBOL, "interval": INTERVAL, "start_time": start_time, "limit": LIMIT}

    response = requests.get(BASE_URL, headers=headers, params=params)

    if response.status_code == 200:
        response_json = response.json()
        data = response_json.get("data", [])
        
        # Check data structure carefully here:
        if not data or not isinstance(data, list):
            print("No more data returned or invalid format.")
            break

        for candle in data:
            formatted_candle = {
                "timestamp": candle["start_time"],
                "open": candle["open"],
                "high": candle["high"],
                "low": candle["low"],
                "close": candle["close"],
                "volume": candle["volume"]
            }
            all_candles.append(formatted_candle)

        print(f"✅ Retrieved {len(data)} candles. Total: {len(all_candles)}")
        start_time = data[-1]["start_time"] + 60 * 60 * 1000  # 1 hour increment
        time.sleep(0.5)
    else:
        print(f"❌ Error {response.status_code}: {response.text}")
        break

# === CONVERT TO DATAFRAME ===
df = pd.DataFrame(all_candles)
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
df.set_index("timestamp", inplace=True)

# === SAVE TO CSV ===
os.makedirs("../datasets", exist_ok=True)
csv_path = f"../datasets/{SYMBOL}_{INTERVAL}_Training data_2018_to_2020.csv"
df.to_csv(csv_path)
print(f"💾 Saved to {csv_path} with {len(df)} rows.")

# === OPTIONAL: Preview ===
print(df.tail())


  print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")


Fetching 1h data for BTC-USD from 2017-12-31 16:00:00 to 2020-12-31 15:59:00...
✅ Retrieved 997 candles. Total: 997
✅ Retrieved 1000 candles. Total: 1997
✅ Retrieved 1000 candles. Total: 2997
✅ Retrieved 997 candles. Total: 3994
✅ Retrieved 1000 candles. Total: 4994
✅ Retrieved 985 candles. Total: 5979
✅ Retrieved 1000 candles. Total: 6979
✅ Retrieved 1000 candles. Total: 7979
✅ Retrieved 999 candles. Total: 8978
✅ Retrieved 1000 candles. Total: 9978
✅ Retrieved 1000 candles. Total: 10978
✅ Retrieved 999 candles. Total: 11977
✅ Retrieved 999 candles. Total: 12976
✅ Retrieved 1000 candles. Total: 13976
✅ Retrieved 1000 candles. Total: 14976
✅ Retrieved 1000 candles. Total: 15976
✅ Retrieved 999 candles. Total: 16975
✅ Retrieved 1000 candles. Total: 17975
✅ Retrieved 999 candles. Total: 18974
✅ Retrieved 1000 candles. Total: 19974
✅ Retrieved 1000 candles. Total: 20974
✅ Retrieved 1000 candles. Total: 21974
✅ Retrieved 1000 candles. Total: 22974
✅ Retrieved 999 candles. Total: 23973
✅ Re

In [None]:
# Backtest data from Jan 1, 2021 – Dec 31, 2023
import requests
import pandas as pd
import time
import os
from datetime import datetime

# === CONFIG ===
API_KEY = "GlYxSZP9hnooNl6gGAjtkptkeqehSnk5C60Akhpw5zupBK6O"
BASE_URL = "https://api.datasource.cybotrade.rs/coinbase/candle"
SYMBOL = "BTC-USD"
INTERVAL = "1h"
LIMIT = 1000

# === SET TIME RANGE: Jan 1, 2021 – Dec 31, 2023 ===
start_time = int(datetime(2021, 1, 1).timestamp() * 1000)
end_time = int(datetime(2023, 12, 31, 23, 59).timestamp() * 1000)

headers = {"X-API-KEY": API_KEY}
all_candles = []

print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")

while start_time < end_time:
    params = {"symbol": SYMBOL, "interval": INTERVAL, "start_time": start_time, "limit": LIMIT}
    response = requests.get(BASE_URL, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json().get("data", [])
        if not data or not isinstance(data, list):
            print("No more data returned or invalid format.")
            break

        for candle in data:
            formatted_candle = {
                "timestamp": candle["start_time"],
                "open": candle["open"],
                "high": candle["high"],
                "low": candle["low"],
                "close": candle["close"],
                "volume": candle["volume"]
            }
            all_candles.append(formatted_candle)

        print(f"✅ Retrieved {len(data)} candles. Total: {len(all_candles)}")
        start_time = data[-1]["start_time"] + 60 * 60 * 1000  # increment 1h
        time.sleep(0.5)
    else:
        print(f"❌ Error {response.status_code}: {response.text}")
        break

# === CONVERT TO DATAFRAME ===
df = pd.DataFrame(all_candles)
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
df.set_index("timestamp", inplace=True)

# === SAVE TO CSV ===
os.makedirs("../datasets", exist_ok=True)
csv_path = f"../datasets/{SYMBOL}_{INTERVAL}_Backtest_2021_to_2023.csv"
df.to_csv(csv_path)
print(f"💾 Saved to {csv_path} with {len(df)} rows.")

# === OPTIONAL: Preview ===
print(df.tail())


  print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")


Fetching 1h data for BTC-USD from 2020-12-31 16:00:00 to 2023-12-31 15:59:00...
✅ Retrieved 1000 candles. Total: 1000
✅ Retrieved 1000 candles. Total: 2000
✅ Retrieved 1000 candles. Total: 3000
✅ Retrieved 1000 candles. Total: 4000
✅ Retrieved 1000 candles. Total: 5000
✅ Retrieved 1000 candles. Total: 6000
✅ Retrieved 1000 candles. Total: 7000
✅ Retrieved 1000 candles. Total: 8000
✅ Retrieved 1000 candles. Total: 9000
✅ Retrieved 1000 candles. Total: 10000
✅ Retrieved 1000 candles. Total: 11000
✅ Retrieved 1000 candles. Total: 12000
✅ Retrieved 1000 candles. Total: 13000
✅ Retrieved 1000 candles. Total: 14000
✅ Retrieved 1000 candles. Total: 15000
✅ Retrieved 1000 candles. Total: 16000
✅ Retrieved 1000 candles. Total: 17000
✅ Retrieved 1000 candles. Total: 18000
✅ Retrieved 1000 candles. Total: 19000
✅ Retrieved 996 candles. Total: 19996
✅ Retrieved 1000 candles. Total: 20996
✅ Retrieved 1000 candles. Total: 21996
✅ Retrieved 1000 candles. Total: 22996
✅ Retrieved 1000 candles. Total: 

In [None]:
# === Forward Test Data===
import requests
import pandas as pd
import time
import os
from datetime import datetime

API_KEY = "GlYxSZP9hnooNl6gGAjtkptkeqehSnk5C60Akhpw5zupBK6O"
BASE_URL = "https://api.datasource.cybotrade.rs/bybit-linear/candle"
SYMBOL = "BTCUSDT"
INTERVAL = "1h"  # Changed to 1 hour
LIMIT = 1000

# === SET TIME RANGE: From Jan 1, 2024 to Current Time ===
start_time = int(datetime(2024, 1, 1).timestamp() * 1000)
end_time = int(datetime.now().timestamp() * 1000)

headers = {"X-API-KEY": API_KEY}

all_candles = []
print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")

while start_time < end_time:
    params = {"symbol": SYMBOL, "interval": INTERVAL, "start_time": start_time, "limit": LIMIT}

    response = requests.get(BASE_URL, headers=headers, params=params)

    if response.status_code == 200:
        data = response.json().get("data", [])
        if not data:
            print("No more data returned.")
            break

        for candle in data:
            formatted_candle = {
                "timestamp": candle["start_time"],
                "open": candle["open"],
                "high": candle["high"],
                "low": candle["low"],
                "close": candle["close"],
                "volume": candle["volume"]
            }
            all_candles.append(formatted_candle)

        print(f"✅ Retrieved {len(data)} candles. Total: {len(all_candles)}")
        start_time = data[-1]["start_time"] + 60 * 60 * 1000  # 1 hour increment
        time.sleep(0.5)
    else:
        print(f"❌ Error {response.status_code}: {response.text}")
        break

# === CONVERT TO DATAFRAME ===
df = pd.DataFrame(all_candles)
df["timestamp"] = pd.to_datetime(df["timestamp"], unit="ms")
df.set_index("timestamp", inplace=True)

# === SAVE TO CSV ===
os.makedirs("../datasets", exist_ok=True)
csv_path = f"../datasets/{SYMBOL}_{INTERVAL}_ForwardTestData_2024_to_now.csv"
df.to_csv(csv_path)
print(f"💾 Saved to {csv_path} with {len(df)} rows.")

# === OPTIONAL: Preview ===
print(df.tail())


  print(f"Fetching {INTERVAL} data for {SYMBOL} from {datetime.utcfromtimestamp(start_time/1000)} to {datetime.utcfromtimestamp(end_time/1000)}...")


Fetching 1h data for BTCUSDT from 2023-12-31 16:00:00 to 2025-04-08 09:12:30.274000...
✅ Retrieved 1000 candles. Total: 1000
✅ Retrieved 1000 candles. Total: 2000
✅ Retrieved 1000 candles. Total: 3000
✅ Retrieved 1000 candles. Total: 4000
✅ Retrieved 1000 candles. Total: 5000
✅ Retrieved 1000 candles. Total: 6000
✅ Retrieved 1000 candles. Total: 7000
✅ Retrieved 1000 candles. Total: 8000
✅ Retrieved 1000 candles. Total: 9000
✅ Retrieved 1000 candles. Total: 10000
✅ Retrieved 1000 candles. Total: 11000
✅ Retrieved 129 candles. Total: 11129
❌ Error 400: {"error":"invalid 'start_time', must be UNIX timestamp (ms) not bigger than 2025-04-08 08:00:00 UTC"}
💾 Saved to ../datasets/BTCUSDT_1h_ForwardTestData_2024_to_now.csv with 11129 rows.
                        open     high      low    close    volume
timestamp                                                        
2025-04-08 04:00:00  79867.0  80368.5  79822.1  80249.9  2322.311
2025-04-08 05:00:00  80249.9  80268.8  79501.0  79822.6  33

In [None]:
#open terminal and install something before run this section
!pip install pandas_ta --no-cache-dir
!pip install numpy --upgrade --force-reinstall
!pip install ta

^C
Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable
Collecting numpy
  Using cached numpy-2.2.4-cp312-cp312-win_amd64.whl.metadata (60 kB)
Using cached numpy-2.2.4-cp312-cp312-win_amd64.whl (12.6 MB)
Installing collected packages: numpy
  Attempting uninstall: numpy
    Found existing installation: numpy 2.2.4
    Uninstalling numpy-2.2.4:
      Successfully uninstalled numpy-2.2.4
Successfully installed numpy-2.2.4



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


Defaulting to user installation because normal site-packages is not writeable



[notice] A new release of pip is available: 24.2 -> 25.0.1
[notice] To update, run: C:\Users\A\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [None]:
### SECTION 2: FEATURE ENGINEERING (Forward Test Data)
print("🔧 Engineering features for forward test data...")

rsi = RSIIndicator(close=df["close"], window=14)
df["rsi"] = rsi.rsi()

macd = MACD(close=df["close"])
df["macd"] = macd.macd()

ema12 = EMAIndicator(close=df["close"], window=12)
ema26 = EMAIndicator(close=df["close"], window=26)
df["ema_12"] = ema12.ema_indicator()
df["ema_26"] = ema26.ema_indicator()

sma20 = SMAIndicator(close=df["close"], window=20)
df["sma_20"] = sma20.sma_indicator()

df["volatility"] = df["close"].rolling(window=20).std()

df["return_pct"] = df["close"].pct_change() * 100
df["log_return"] = (df["close"] / df["close"].shift(1)).apply(lambda x: pd.NA if x <= 0 else np.log(x))

df.dropna(inplace=True)

out_path = "../datasets/BTCUSDT_forward_features.csv"
df.to_csv(out_path)
print(f"💾 Forward test features saved to {out_path} with {len(df)} rows.")
print(df.tail())

🔧 Engineering features for forward test data...
💾 Forward test features saved to ../datasets/BTCUSDT_forward_features.csv with 11029 rows.
                        open     high      low    close    volume        rsi  \
timestamp                                                                      
2025-04-08 04:00:00  79867.0  80368.5  79822.1  80249.9  2322.311  57.239203   
2025-04-08 05:00:00  80249.9  80268.8  79501.0  79822.6  3343.819  54.378175   
2025-04-08 06:00:00  79822.6  79822.7  79242.6  79421.1  3119.643  51.760221   
2025-04-08 07:00:00  79421.1  79685.3  79188.9  79464.7  3311.373  52.030299   
2025-04-08 08:00:00  79464.7  79500.0  78915.8  79129.2  4114.861  49.723362   

                           macd        ema_12        ema_26     sma_20  \
timestamp                                                                
2025-04-08 04:00:00  322.946003  79388.459948  79065.513945  78631.030   
2025-04-08 05:00:00  333.656332  79455.250725  79121.594393  78809.725   
2025

In [None]:
### SECTION 2.1: FEATURE ENGINEERING (Backtest Data)
print("🔧 Engineering features for backtest data...")

backtest_df = pd.read_csv("../datasets/BTC-USD_1h_Backtest_2021_to_2023.csv", index_col="timestamp", parse_dates=True)

rsi = RSIIndicator(close=backtest_df["close"], window=14)
backtest_df["rsi"] = rsi.rsi()

macd = MACD(close=backtest_df["close"])
backtest_df["macd"] = macd.macd()

ema12 = EMAIndicator(close=backtest_df["close"], window=12)
ema26 = EMAIndicator(close=backtest_df["close"], window=26)
backtest_df["ema_12"] = ema12.ema_indicator()
backtest_df["ema_26"] = ema26.ema_indicator()

sma20 = SMAIndicator(close=backtest_df["close"], window=20)
backtest_df["sma_20"] = sma20.sma_indicator()

backtest_df["volatility"] = backtest_df["close"].rolling(window=20).std()

backtest_df["return_pct"] = backtest_df["close"].pct_change() * 100
backtest_df["log_return"] = (backtest_df["close"] / backtest_df["close"].shift(1)).apply(lambda x: pd.NA if x <= 0 else np.log(x))

backtest_df.dropna(inplace=True)

out_path_bt = "../datasets/BTCUSDT_backtest_features.csv"
backtest_df.to_csv(out_path_bt)
print(f"💾 Backtest features saved to {out_path_bt} with {len(backtest_df)} rows.")

🔧 Engineering features for backtest data...
💾 Backtest features saved to ../datasets/BTCUSDT_backtest_features.csv with 26971 rows.


In [None]:
### SECTION 2.2: FEATURE ENGINEERING (Training Data)
# This section transforms raw crypto price data into meaningful features
# that can help a machine learning model detect market trends.

# We compute common technical indicators used by traders:
# ---------------------------------------------------------
# - RSI (Relative Strength Index): Measures momentum (overbought/oversold signals)
# - MACD (Moving Average Convergence Divergence): Captures trend reversals
# - EMA-12 and EMA-26: Short and medium-term exponential moving averages
# - SMA-20: Simple average of past 20 close prices (trend direction)
# - Volatility: Rolling standard deviation over 20 periods

# We also calculate returns:
# - return_pct: % change from previous close to current
# - log_return: logarithmic return, useful for financial modeling

# After feature creation, we:
# - Drop any rows with NaN values (from rolling windows)
# - Save the engineered DataFrame as a CSV for model training
print("🔧 Engineering features for training data...")

train_df = pd.read_csv("../datasets/BTC-USD_1h_Training data_2018_to_2020.csv", index_col="timestamp", parse_dates=True)

rsi = RSIIndicator(close=train_df["close"], window=14)
train_df["rsi"] = rsi.rsi()

macd = MACD(close=train_df["close"])
train_df["macd"] = macd.macd()

ema12 = EMAIndicator(close=train_df["close"], window=12)
ema26 = EMAIndicator(close=train_df["close"], window=26)
train_df["ema_12"] = ema12.ema_indicator()
train_df["ema_26"] = ema26.ema_indicator()

sma20 = SMAIndicator(close=train_df["close"], window=20)
train_df["sma_20"] = sma20.sma_indicator()

train_df["volatility"] = train_df["close"].rolling(window=20).std()

train_df["return_pct"] = train_df["close"].pct_change() * 100
train_df["log_return"] = (train_df["close"] / train_df["close"].shift(1)).apply(lambda x: pd.NA if x <= 0 else np.log(x))

train_df.dropna(inplace=True)

out_path_train = "../datasets/BTCUSDT_train_features.csv"
train_df.to_csv(out_path_train)
print(f"💾 Training features saved to {out_path_train} with {len(train_df)} rows.")

🔧 Engineering features for training data...
💾 Training features saved to ../datasets/BTCUSDT_train_features.csv with 26947 rows.
