In [13]:
import requests
import pandas as pd
import time
from datetime import datetime, timedelta
import pandas_ta as ta
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
# ... any other libraries you'll use ...

### Cell 2: Data Fetching Function throuh (get_kraken_ohlc)

In [14]:
def get_kraken_ohlc(pair="XXBTZUSD", interval=240, days=1825):
    """
    Fetch 4-hour OHLC data from Kraken's public API, handling variable columns.

    :param pair: Trading pair (e.g., "XXBTZUSD" for BTC/USD)
    :param interval: Time interval in minutes (240 = 4-hour)
    :param days: Number of days of data to fetch
    :return: Pandas DataFrame with OHLC data
    """
    base_url = "https://api.kraken.com/0/public/OHLC"
    end_time = int(datetime.now().timestamp())
    start_time = end_time - (days * 24 * 60 * 60)

    all_data = []

    while start_time < end_time:
        params = {
            "pair": pair,
            "interval": interval,
            "since": start_time
        }

        response = requests.get(base_url, params=params)
        data = response.json()

        if "error" in data and data["error"]:
            print(f"Error fetching data at start_time {start_time}:", data["error"])
            break

        candles = data["result"][pair]

        if not candles:
            break

        all_data.extend(candles)
        start_time = int(candles[-1][0]) + 1

        time.sleep(1)

    # --- Flexible Column Handling ---
    df = pd.DataFrame(all_data)  # Create DataFrame *without* specifying columns

    # Rename columns based on their *position*, not assumed names
    df = df.rename(
        columns={
            0: "time",
            1: "open",
            2: "high",
            3: "low",
            4: "close",
            5: "vwap",  # Might not be present
            6: "volume",  # Might be in position 5 if vwap is absent
            7: "count",  # Might be in position 6 or absent
        }
    )

    # Drop 'vwap' and 'count' if they exist
    for col in ["vwap", "count"]:
        if col in df.columns:
            df = df.drop(columns=[col])

    # Convert timestamp and ensure numeric types
    df["time"] = pd.to_datetime(df["time"], unit="s")
    for col in ["open", "high", "low", "close", "volume"]:
        if col in df.columns:  # Check if column exists before converting
            df[col] = df[col].astype(float)

    return df

### Cell 3: Fetch Data (Run Only Once)


In [None]:
# Fetch data from Kraken and save to CSV.
# Run this cell ONLY the first time, or when you need to refresh the data.
btc_4h_data = get_kraken_ohlc(days=1825)
btc_4h_data.to_csv("kraken_btc_4h_5years.csv", index=False)
print(btc_4h_data.head())

### Cell 4: Load Data from CSV (Use for Subsequent Runs)

In [None]:
# Load data from the CSV file.
# Run this cell instead of Cell 3 after the initial data fetch.
btc_4h_data = pd.read_csv("kraken_btc_4h_5years.csv", parse_dates=['time'], index_col='time')
print(btc_4h_data.head())

### Cell 5: Feature Engineering Function (add_indicators)

In [None]:
def add_indicators(df):
    # ... (code from feature_engineering.py) ...

### Cell 6: Apply Feature Engineering

In [None]:
btc_4h_data = add_indicators(btc_4h_data)
print(btc_4h_data.head())  # Check the results

### Cell 7: Clustering Model Function (clustering_model)

In [None]:
def clustering_model(df_cluster, n_clusters=3):
    # ... (code from models.py) ...

### Cell 8: Perform Clustering and Add to DataFrame

In [None]:
# Select the features you want to use for clustering
cluster_features = ['RSI', 'MACD', 'volume']  # Example – adjust as needed
df_cluster = btc_4h_data[cluster_features]

clusters = clustering_model(df_cluster)  # Get cluster assignments
btc_4h_data['Cluster'] = clusters  # Add cluster labels to your main DataFrame

print(btc_4h_data.head())  # See the cluster assignments