In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import pytz
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report
from sklearn.ensemble import HistGradientBoostingClassifier

# hourly_bitcoin_prediction.py



HORIZON_MIN = 60
FEATURES = [
    "return_1", "momentum_5", "momentum_10", "close_vs_ma5", "close_vs_ma10",
    "volatility_10", "rsi", "body", "upper_wick", "lower_wick", "volume",
    "volume_delta", "trade_count"
]

def build_features(df: pd.DataFrame) -> pd.DataFrame:
    df = df.copy()
    df["return_1"] = df["close"].pct_change()
    df["momentum_5"] = df["close"].pct_change(5)
    df["momentum_10"] = df["close"].pct_change(10)
    df["close_vs_ma5"] = df["close"] / df["close"].rolling(5).mean() - 1
    df["close_vs_ma10"] = df["close"] / df["close"].rolling(10).mean() - 1
    df["volatility_10"] = df["close"].pct_change().rolling(10).std()
    delta = df["close"].diff()
    gain = delta.where(delta > 0, 0).rolling(14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(14).mean()
    rs = gain / loss
    df["rsi"] = 100 - (100 / (1 + rs))
    df["body"] = (df["close"] - df["open"]) / df["open"]
    df["upper_wick"] = (df["high"] - df[["close", "open"]].max(axis=1)) / df["open"]
    df["lower_wick"] = (df[["close", "open"]].min(axis=1) - df["low"]) / df["open"]
    df["volume_delta"] = df["volume"].diff()
    df["target_1h"] = (df["close"].shift(-HORIZON_MIN) > df["close"]).astype(int)
    return df

def train_model(df: pd.DataFrame):
    df = build_features(df).dropna()
    X = df[FEATURES]
    y = df["target_1h"]

    split_idx = int(len(df) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]

    model = HistGradientBoostingClassifier(
        learning_rate=0.05,
        max_depth=6,
        max_iter=300,
        l2_regularization=0.0,
        random_state=42,
    )
    model.fit(X_train, y_train)

    preds = model.predict(X_test)
    proba = model.predict_proba(X_test)[:, 1]

    print("Accuracy:", accuracy_score(y_test, preds))
    print("ROC AUC:", roc_auc_score(y_test, proba))
    print("Confusion Matrix:\n", confusion_matrix(y_test, preds))
    print("Classification Report:\n", classification_report(y_test, preds))
    return model, df

def nyc_hour_rounded():
    tz = pytz.timezone("America/New_York")
    now = datetime.now(tz)
    return now.replace(minute=0, second=0, microsecond=0).strftime("%H:%M")

def predict_latest(model, df: pd.DataFrame):
    latest = df.dropna().iloc[-1]
    X_latest = latest[FEATURES].to_frame().T
    p_up = model.predict_proba(X_latest)[0, 1]
    direction = "UP" if p_up >= 0.5 else "DOWN"
    print(f"{nyc_hour_rounded()} -> {direction} (P(UP)={p_up:.2%})")

def predict_manual(model, row: pd.Series):
    X_row = row[FEATURES].to_frame().T
    p_up = model.predict_proba(X_row)[0, 1]
    direction = "UP" if p_up >= 0.5 else "DOWN"
    print(f"{nyc_hour_rounded()} -> {direction} (P(UP)={p_up:.2%})")
    