# Paramter and base dev data

In [None]:
import sqlite3
import pandas as pd
import json

# =============================================================================
# Load configuration
# =============================================================================
with open("config.json", "r") as config_file:
    config = json.load(config_file)
DB_PATH = config["db_path"]

# =============================================================================
# CONFIGURE TARGET PREPARATION PARAMETERS
# =============================================================================
TABLE_NAME     = "bchusdt_1m"        # Define the table name here
OPEN_TIME_FROM = "2025-10-20 00:00"  # Define the start of the interval
OPEN_TIME_TO   = "2025-10-21 00:00"  # Define the end of the interval
ROLLING_WINDOW = 240                 # Rolling window in minutes (4 hours)

target = 'spike_flag'

# =============================================================================
# CONFIGURE INDICATORS AND THEIR PARAMETERS
# =============================================================================
# Define a structure for indicators and their parameters
indicator_config = {
    "momentum": {
        "rsi": [{"name": "rsi_backward", "window": 14}],
        "roc": [
            {"name": "roc", "window": 14}, 
            {"name": "roc", "window": 140}
        ]
    },
    "trend": {
        "macd": [{"name": "macd_diff"}],
        "sma": [
            {"name": "sma_ratio", "window": 14},
            {"name": "sma_ratio", "window": 140}
        ]
    },
    "volatility": {
        "bollinger_band": [
            {"name": "bb_width", "window": 14},
            {"name": "bb_width", "window": 140}
        ]
    }
}

# Generate the features list dynamically from indicator_config
features = []
for category, indicators in indicator_config.items():
    for indicator, params_list in indicators.items():
        for params in params_list:
            if "window" in params:
                features.append(f"{params['name']}_{params['window']}")
            else:
                features.append(params["name"])

print("Features list:", features)

# =============================================================================
# Fetch data from database
# =============================================================================
conn = sqlite3.connect(DB_PATH)
# SQL query to fetch data within the specified interval
query = f"""
    SELECT t.open_time, t.close
    FROM {TABLE_NAME} t
    WHERE open_time BETWEEN ? AND ?
    ORDER BY open_time ASC
"""
df = pd.read_sql_query(query, conn, params=(OPEN_TIME_FROM, OPEN_TIME_TO))

# Close the database connection
conn.close()

# =============================================================================
# Ensure open_time is in datetime format and set it as the index
# =============================================================================
df["open_time"] = pd.to_datetime(df["open_time"])
df.set_index("open_time", inplace=True)

display(df.tail())

Features list: ['rsi_backward_14', 'roc_14', 'roc_140', 'macd_diff', 'sma_ratio_14', 'sma_ratio_140', 'bb_width_14', 'bb_width_140']


Unnamed: 0_level_0,close
open_time,Unnamed: 1_level_1
2025-10-20 23:56:00,481.5
2025-10-20 23:57:00,481.1
2025-10-20 23:58:00,481.4
2025-10-20 23:59:00,481.4
2025-10-21 00:00:00,480.9


# target

In [2]:
# =============================================================================
# Calculate rolling max, ratio, and 90th percentile target (spike_flag)
# =============================================================================

# Calculate the forward-looking rolling maximum 
df["rolling_max"] = (
    df.iloc[::-1]["close"]  # Reverse DataFrame
    .rolling(window = ROLLING_WINDOW, min_periods=1)
    .max()
    .iloc[::-1]             # Reverse back
)

# Calculate the ratio of rolling maximum to the current close price
df["ratio"] = df["rolling_max"] / df["close"]

# Calculate the 90th percentile of the ratio
percentile_90 = df["ratio"].quantile(0.9)

# Add the spike_flag column based on the 90th percentile
df[target] = (df["ratio"] >= percentile_90).astype(int)

# =============================================================================
# Display the final DataFrame
# =============================================================================
display(df.tail())  # Use tail() since the newest data is now at the top

Unnamed: 0_level_0,close,rolling_max,ratio,spike_flag
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2025-10-20 23:56:00,481.5,481.5,1.0,0
2025-10-20 23:57:00,481.1,481.4,1.000624,0
2025-10-20 23:58:00,481.4,481.4,1.0,0
2025-10-20 23:59:00,481.4,481.4,1.0,0
2025-10-21 00:00:00,480.9,480.9,1.0,0


# variables

In [3]:
import ta

# =============================================================================
# CALCULATE INDICATORS
# =============================================================================
# Momentum indicators
for params in indicator_config["momentum"]["rsi"]:
    df[f"{params['name']}_{params['window']}"] = ta.momentum.RSIIndicator(
        close=df["close"], window=params["window"]
    ).rsi()

for params in indicator_config["momentum"]["roc"]:
    df[f"{params['name']}_{params['window']}"] = ta.momentum.ROCIndicator(
        close=df["close"], window=params["window"]
    ).roc()

# Trend indicators
for params in indicator_config["trend"]["macd"]:
    macd_calc = ta.trend.MACD(close=df["close"])
    df[params["name"]] = macd_calc.macd_diff()

for params in indicator_config["trend"]["sma"]:
    df[f"{params['name']}_{params['window']}"] = df["close"] / ta.trend.SMAIndicator(
        close=df["close"], window=params["window"]
    ).sma_indicator()

# Volatility indicators
for params in indicator_config["volatility"]["bollinger_band"]:
    bb_calc = ta.volatility.BollingerBands(close=df["close"], window=params["window"])
    df[f"{params['name']}_{params['window']}"] = bb_calc.bollinger_wband()

# =============================================================================
# FINAL FEATURES + TARGET AND MISSING ROWS
# ============================================================================
df.tail()

Unnamed: 0_level_0,close,rolling_max,ratio,spike_flag,rsi_backward_14,roc_14,roc_140,macd_diff,sma_ratio_14,sma_ratio_140,bb_width_14,bb_width_140
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
2025-10-20 23:56:00,481.5,481.5,1.0,0,44.366243,-0.04152,-0.475403,0.114712,1.00046,0.995594,0.361156,1.013629
2025-10-20 23:57:00,481.1,481.4,1.000624,0,37.910544,-0.145289,-0.578632,0.09562,0.999733,0.994808,0.341891,1.028687
2025-10-20 23:58:00,481.4,481.4,1.0,0,44.440293,-0.124481,-0.557736,0.101909,1.000445,0.995468,0.296172,1.039586
2025-10-20 23:59:00,481.4,481.4,1.0,0,44.440293,-0.062279,-0.598802,0.104398,1.00049,0.995511,0.276801,1.04955
2025-10-21 00:00:00,480.9,480.9,1.0,0,36.932598,-0.083108,-0.78399,0.072229,0.99951,0.994533,0.280362,1.062923
