# BITCOIN PRICE PREDICTION - LOGISTIC REGRESSION

**Objectif**: Pr√©dire si le prix du BTC va monter ou descendre dans 1h

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import (
    classification_report, 
    roc_auc_score, 
    confusion_matrix,
    accuracy_score,
    precision_score,
    recall_score
)
import joblib
import os
import json
import warnings
warnings.filterwarnings('ignore')

print("=" * 70)
print(" " * 15 + "BITCOIN PRICE PREDICTION")
print(" " * 15 + "LOGISTIC REGRESSION MODEL")
print("=" * 70)

# ‚úÖ SAFE ROC-AUC FUNCTION
def safe_roc_auc(y_true, y_proba):
    """Compute ROC AUC safely; return NaN if only one class present."""
    try:
        if len(np.unique(y_true)) < 2:
            return float('nan')
        return roc_auc_score(y_true, y_proba)
    except Exception as e:
        print(f"‚ö†Ô∏è  ROC-AUC error: {e}")
        return float('nan')

               BITCOIN PRICE PREDICTION
               LOGISTIC REGRESSION MODEL


## √âTAPE 1 : CHARGEMENT DES DONN√âES

In [2]:
print("\n[1/10] Loading dataset...")

df = pd.read_csv('data/features/btc_features_complete.csv', index_col=0, parse_dates=True)

print(f"‚úì Dataset loaded successfully")
print(f"  - Total rows: {df.shape[0]:,}")
print(f"  - Total columns: {df.shape[1]}")
print(f"  - Date range: {df.index.min()} ‚Üí {df.index.max()}")
print(f"  - Frequency: {pd.infer_freq(df.index) or 'Irregular'}")


[1/10] Loading dataset...
‚úì Dataset loaded successfully
  - Total rows: 51,443
  - Total columns: 86
  - Date range: 2020-01-31 00:00:00 ‚Üí 2025-12-14 18:00:00
  - Frequency: Irregular


## √âTAPE 2-5 : TARGET, FEATURES, SPLIT & SCALING

In [3]:
print("\n[2/10] Creating target variable...")
y = (df['target_direction_1h'] > 0).astype(int)
class_distribution = y.value_counts().sort_index()
print(f"‚úì Target variable created: 'price_direction_1h'")
print(f"  - Class 0 (DOWN): {class_distribution[0]:,} samples ({class_distribution[0]/len(y)*100:.1f}%)")
print(f"  - Class 1 (UP):   {class_distribution[1]:,} samples ({class_distribution[1]/len(y)*100:.1f}%)")
balance_ratio = min(class_distribution) / max(class_distribution)
print(f"  - Balance ratio: {balance_ratio:.2f} (1.0 = perfectly balanced)")

print("\n[3/10] Preparing features...")
drop_cols = [c for c in df.columns if 'target' in c.lower()] + [
    'Datetime', 'Close', 'future_return_1h', 'future_return_6h', 'future_return_24h'
]
X = df.drop(columns=drop_cols, errors='ignore')

if 'fear_greed_classification' in X.columns:
    print("‚úì Encoding 'fear_greed_classification' feature...")
    sentiment_mapping = {'Extreme Fear': 0, 'Fear': 1, 'Neutral': 2, 'Greed': 3, 'Extreme Greed': 4}
    X['fear_greed_classification_num'] = X['fear_greed_classification'].map(sentiment_mapping)
    X = X.drop(columns=['fear_greed_classification'])

X = X.select_dtypes(include=[np.number])
print(f"‚úì Feature selection completed")
print(f"  - Number of features: {X.shape[1]}")
print(f"  - Sample features: {list(X.columns[:5])}")

print("\n[4/10] Splitting data (chronological split)...")
n = len(X)
test_size = int(n * 0.15)
val_size = int(n * 0.15)
train_size = n - test_size - val_size

X_train = X.iloc[:train_size].copy()
y_train = y.iloc[:train_size].copy()
X_val = X.iloc[train_size:train_size+val_size].copy()
y_val = y.iloc[train_size:train_size+val_size].copy()
X_test = X.iloc[train_size+val_size:].copy()
y_test = y.iloc[train_size+val_size:].copy()

print(f"‚úì Data split completed (70% / 15% / 15%)")
print(f"\n  Training set: {X_train.shape[0]:,} samples | {X_train.index.min()} ‚Üí {X_train.index.max()}")
print(f"  Validation set: {X_val.shape[0]:,} samples | {X_val.index.min()} ‚Üí {X_val.index.max()}")
print(f"  Test set: {X_test.shape[0]:,} samples | {X_test.index.min()} ‚Üí {X_test.index.max()}")

print("\n[5/10] Cleaning and scaling data...")
for df_part in [X_train, X_val, X_test]:
    df_part.replace([np.inf, -np.inf], np.nan, inplace=True)

nan_counts = X_train.isna().sum()
total_nans = nan_counts.sum()
features_with_nan = (nan_counts > 0).sum()
print(f"‚úì Infinite values replaced with NaN")
print(f"  - Total NaN values: {total_nans:,}")
print(f"  - Features with NaN: {features_with_nan}/{X_train.shape[1]}")

# ‚úÖ COMPUTE AND SAVE MEDIANS
medians = X_train.median()
os.makedirs('models', exist_ok=True)
joblib.dump(medians, 'models/medians.pkl')

X_train.fillna(medians, inplace=True)
X_val.fillna(medians, inplace=True)
X_test.fillna(medians, inplace=True)
print(f"‚úì Missing values imputed using training set medians (saved to models/medians.pkl)")

scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_val_s = scaler.transform(X_val)
X_test_s = scaler.transform(X_test)

print(f"‚úì Features standardized (StandardScaler)")


[2/10] Creating target variable...
‚úì Target variable created: 'price_direction_1h'
  - Class 0 (DOWN): 25,320 samples (49.2%)
  - Class 1 (UP):   26,123 samples (50.8%)
  - Balance ratio: 0.97 (1.0 = perfectly balanced)

[3/10] Preparing features...
‚úì Encoding 'fear_greed_classification' feature...
‚úì Feature selection completed
  - Number of features: 79
  - Sample features: ['Open', 'High', 'Low', 'Volume', 'returns']

[4/10] Splitting data (chronological split)...
‚úì Data split completed (70% / 15% / 15%)

  Training set: 36,011 samples | 2020-01-31 00:00:00 ‚Üí 2024-03-11 18:00:00
  Validation set: 7,716 samples | 2024-03-11 19:00:00 ‚Üí 2025-01-27 06:00:00
  Test set: 7,716 samples | 2025-01-27 07:00:00 ‚Üí 2025-12-14 18:00:00

[5/10] Cleaning and scaling data...
‚úì Infinite values replaced with NaN
  - Total NaN values: 0
  - Features with NaN: 0/79
‚úì Missing values imputed using training set medians (saved to models/medians.pkl)
‚úì Features standardized (StandardScale

## √âTAPE 6-7 : TRAINING & VALIDATION EVAL

In [4]:
print("\n[6/10] Training Logistic Regression model...")
clf = LogisticRegression(
    class_weight='balanced',
    max_iter=5000,
    C=1.0,
    solver='saga',
    tol=1e-4,
    random_state=42,
    verbose=0,
    n_jobs=-1
)
print("  Training in progress...")
clf.fit(X_train_s, y_train)
print(f"‚úì Model trained successfully | Converged in: {clf.n_iter_[0]} iterations")

print("\n[7/10] Evaluating model on validation set...")
y_val_pred = clf.predict(X_val_s)
y_val_proba = clf.predict_proba(X_val_s)[:,1]

# ‚úÖ USE zero_division AND safe_roc_auc
val_accuracy = accuracy_score(y_val, y_val_pred)
val_precision = precision_score(y_val, y_val_pred, zero_division=0)
val_recall = recall_score(y_val, y_val_pred, zero_division=0)
val_roc_auc = safe_roc_auc(y_val, y_val_proba)
cm_val = confusion_matrix(y_val, y_val_pred)

print("\n" + "="*70)
print(" " * 20 + "VALIDATION SET RESULTS")
print("="*70)
print("\nClassification Report:")
print(classification_report(y_val, y_val_pred, digits=4))
print("Performance Metrics:")
print(f"  ‚Ä¢ Accuracy:  {val_accuracy:.4f}")
print(f"  ‚Ä¢ Precision: {val_precision:.4f} (of predicted UPs, how many were correct)")
print(f"  ‚Ä¢ Recall:    {val_recall:.4f} (of actual UPs, how many we caught)")
print(f"  ‚Ä¢ ROC-AUC:   {val_roc_auc:.4f}" if not np.isnan(val_roc_auc) else f"  ‚Ä¢ ROC-AUC:   N/A")

print("\nConfusion Matrix:")
print(f"                    Predicted")
print(f"                    DOWN      UP")
print(f"  Actual DOWN     {cm_val[0,0]:6d}   {cm_val[0,1]:6d}")
print(f"  Actual UP       {cm_val[1,0]:6d}   {cm_val[1,1]:6d}")

if np.isnan(val_roc_auc):
    performance = "‚ö†Ô∏è  N/A - Check data"
elif val_roc_auc < 0.55:
    performance = "‚ö†Ô∏è  POOR - Barely better than random"
elif val_roc_auc < 0.65:
    performance = "‚ö° FAIR - Moderate power"
elif val_roc_auc < 0.75:
    performance = "‚úì GOOD - Decent ability"
else:
    performance = "üåü EXCELLENT - Strong power"
print(f"\nOverall Performance: {performance}")


[6/10] Training Logistic Regression model...
  Training in progress...
‚úì Model trained successfully | Converged in: 5000 iterations

[7/10] Evaluating model on validation set...

                    VALIDATION SET RESULTS

Classification Report:
              precision    recall  f1-score   support

           0     0.5164    0.6166    0.5621      3785
           1     0.5460    0.4439    0.4897      3931

    accuracy                         0.5286      7716
   macro avg     0.5312    0.5303    0.5259      7716
weighted avg     0.5315    0.5286    0.5252      7716

Performance Metrics:
  ‚Ä¢ Accuracy:  0.5286
  ‚Ä¢ Precision: 0.5460 (of predicted UPs, how many were correct)
  ‚Ä¢ Recall:    0.4439 (of actual UPs, how many we caught)
  ‚Ä¢ ROC-AUC:   0.5432

Confusion Matrix:
                    Predicted
                    DOWN      UP
  Actual DOWN       2334     1451
  Actual UP         2186     1745

Overall Performance: ‚ö†Ô∏è  POOR - Barely better than random


## √âTAPE 8 : BACKTEST FUNCTION & VALIDATION

In [5]:
print("\n[8/10] Running realistic backtest on validation set...")

# ‚úÖ FIXED realistic_backtest FUNCTION
def realistic_backtest(y_true, y_pred, prices, initial_capital=10000, transaction_fee=0.001):
    """Simule une strat√©gie de trading bas√©e sur les pr√©dictions"""
    cash = initial_capital
    btc_held = 0
    portfolio_values = []
    trades = []
    num_trades = 0
    
    for i in range(len(y_pred)):
        current_price = prices.iloc[i]
        timestamp = prices.index[i]
        
        # Signal d'ACHAT (pr√©diction UP)
        if y_pred[i] == 1 and cash > 0:
            btc_to_buy = cash / current_price
            btc_fee = btc_to_buy * transaction_fee
            btc_held = btc_to_buy - btc_fee
            usd_fee = btc_fee * current_price
            trades.append({'timestamp': timestamp, 'action': 'BUY', 'price': current_price, 'btc_amount': btc_held, 'fee': usd_fee})
            cash = 0
            num_trades += 1
            
        # Signal de VENTE (pr√©diction DOWN)
        elif y_pred[i] == 0 and btc_held > 0:
            cash_from_sale = btc_held * current_price
            usd_fee = cash_from_sale * transaction_fee
            trades.append({'timestamp': timestamp, 'action': 'SELL', 'price': current_price, 'btc_amount': btc_held, 'fee': usd_fee})
            cash = cash_from_sale - usd_fee
            btc_held = 0
            num_trades += 1
        
        total_value = cash + (btc_held * current_price)
        portfolio_values.append(total_value)
    
    final_price = prices.iloc[-1]
    final_value = cash + (btc_held * final_price)
    total_return = (final_value - initial_capital) / initial_capital * 100
    
    btc_bought_hold = initial_capital / prices.iloc[0]
    buy_hold_value = btc_bought_hold * final_price
    buy_hold_return = (buy_hold_value - initial_capital) / initial_capital * 100
    
    returns = pd.Series(portfolio_values).pct_change().dropna()
    sharpe_ratio = (returns.mean() / returns.std()) * np.sqrt(252*24) if returns.std() > 0 else 0
    
    cumulative = pd.Series(portfolio_values)
    running_max = cumulative.expanding().max()
    drawdown = (cumulative - running_max) / running_max
    max_drawdown = drawdown.min() * 100
    
    return {
        'initial_capital': initial_capital, 'final_value': final_value, 'total_return': total_return,
        'buy_hold_value': buy_hold_value, 'buy_hold_return': buy_hold_return, 'outperformance': total_return - buy_hold_return,
        'portfolio_values': portfolio_values, 'num_trades': num_trades, 'sharpe_ratio': sharpe_ratio, 'max_drawdown': max_drawdown, 'trades': trades
    }

val_prices = df.loc[X_val.index, 'Close']
val_backtest = realistic_backtest(y_val, y_val_pred, val_prices, initial_capital=10000, transaction_fee=0.001)

print("\n" + "="*70)
print(" " * 20 + "VALIDATION BACKTEST RESULTS")
print("="*70)
print(f"\n  Starting Capital:       ${val_backtest['initial_capital']:,.2f}")
print(f"  Final Portfolio Value:  ${val_backtest['final_value']:,.2f}")
print(f"  Total Return:           {val_backtest['total_return']:+.2f}%")
print(f"  Number of Trades:       {val_backtest['num_trades']}")
print(f"  Sharpe Ratio:           {val_backtest['sharpe_ratio']:.2f}")
print(f"  Max Drawdown:           {val_backtest['max_drawdown']:.2f}%")
print(f"\n  Buy & Hold Benchmark:")
print(f"  Final Value:            ${val_backtest['buy_hold_value']:,.2f}")
print(f"  Total Return:           {val_backtest['buy_hold_return']:+.2f}%")
print(f"\n  Strategy Performance:")
print(f"  Outperformance:         {val_backtest['outperformance']:+.2f}%")


[8/10] Running realistic backtest on validation set...

                    VALIDATION BACKTEST RESULTS

  Starting Capital:       $10,000.00
  Final Portfolio Value:  $2,920.76
  Total Return:           -70.79%
  Number of Trades:       1555
  Sharpe Ratio:           -3.22
  Max Drawdown:           -71.14%

  Buy & Hold Benchmark:
  Final Value:            $13,749.66
  Total Return:           +37.50%

  Strategy Performance:
  Outperformance:         -108.29%


## √âTAPE 9 : TEST SET EVALUATION

In [6]:
print("\n[9/10] Final evaluation on test set...")

y_test_pred = clf.predict(X_test_s)
y_test_proba = clf.predict_proba(X_test_s)[:,1]

# ‚úÖ USE zero_division AND safe_roc_auc FOR TEST SET
test_accuracy = accuracy_score(y_test, y_test_pred)
test_precision = precision_score(y_test, y_test_pred, zero_division=0)
test_recall = recall_score(y_test, y_test_pred, zero_division=0)
test_roc_auc = safe_roc_auc(y_test, y_test_proba)
cm_test = confusion_matrix(y_test, y_test_pred)

print("\n" + "="*70)
print(" " * 22 + "TEST SET RESULTS")
print("="*70)
print("\nClassification Report:")
print(classification_report(y_test, y_test_pred, digits=4))
print("Performance Metrics:")
print(f"  ‚Ä¢ Accuracy:  {test_accuracy:.4f}")
print(f"  ‚Ä¢ Precision: {test_precision:.4f}")
print(f"  ‚Ä¢ Recall:    {test_recall:.4f}")
print(f"  ‚Ä¢ ROC-AUC:   {test_roc_auc:.4f}" if not np.isnan(test_roc_auc) else f"  ‚Ä¢ ROC-AUC:   N/A")

print("\nConfusion Matrix:")
print(f"                    Predicted")
print(f"                    DOWN      UP")
print(f"  Actual DOWN     {cm_test[0,0]:6d}   {cm_test[0,1]:6d}")
print(f"  Actual UP       {cm_test[1,0]:6d}   {cm_test[1,1]:6d}")

test_prices = df.loc[X_test.index, 'Close']
test_backtest = realistic_backtest(y_test, y_test_pred, test_prices, initial_capital=10000, transaction_fee=0.001)

print("\n" + "="*70)
print(" " * 22 + "TEST BACKTEST RESULTS")
print("="*70)
print(f"\n  Starting Capital:       ${test_backtest['initial_capital']:,.2f}")
print(f"  Final Portfolio Value:  ${test_backtest['final_value']:,.2f}")
print(f"  Total Return:           {test_backtest['total_return']:+.2f}%")
print(f"  Number of Trades:       {test_backtest['num_trades']}")
print(f"  Sharpe Ratio:           {test_backtest['sharpe_ratio']:.2f}")
print(f"  Max Drawdown:           {test_backtest['max_drawdown']:.2f}%")
print(f"  Buy & Hold Return:      {test_backtest['buy_hold_return']:+.2f}%")
print(f"  Outperformance:         {test_backtest['outperformance']:+.2f}%")


[9/10] Final evaluation on test set...

                      TEST SET RESULTS

Classification Report:
              precision    recall  f1-score   support

           0     0.5150    0.6681    0.5816      3838
           1     0.5345    0.3773    0.4423      3878

    accuracy                         0.5219      7716
   macro avg     0.5247    0.5227    0.5120      7716
weighted avg     0.5248    0.5219    0.5116      7716

Performance Metrics:
  ‚Ä¢ Accuracy:  0.5219
  ‚Ä¢ Precision: 0.5345
  ‚Ä¢ Recall:    0.3773
  ‚Ä¢ ROC-AUC:   0.5344

Confusion Matrix:
                    Predicted
                    DOWN      UP
  Actual DOWN       2564     1274
  Actual UP         2415     1463

                      TEST BACKTEST RESULTS

  Starting Capital:       $10,000.00
  Final Portfolio Value:  $3,113.85
  Total Return:           -68.86%
  Number of Trades:       1503
  Sharpe Ratio:           -4.22
  Max Drawdown:           -71.96%
  Buy & Hold Return:      -10.15%
  Outperformance: 

## √âTAPE 10 : SAVE MODEL & METRICS

In [7]:
print("\n[10/10] Saving model and artifacts...")

joblib.dump(clf, 'models/logistic_regression.pkl')
joblib.dump(scaler, 'models/scaler.pkl')

feature_names = X_train.columns.tolist()
joblib.dump(feature_names, 'models/feature_names.pkl')

metrics = {
    'model_type': 'Logistic Regression',
    'training_date': pd.Timestamp.now().strftime('%Y-%m-%d %H:%M:%S'),
    'data_info': {'total_samples': len(df), 'num_features': X_train.shape[1], 'train_samples': len(X_train), 'val_samples': len(X_val), 'test_samples': len(X_test)},
    'validation': {
        'accuracy': float(val_accuracy), 'precision': float(val_precision), 'recall': float(val_recall),
        'roc_auc': float(val_roc_auc) if not np.isnan(val_roc_auc) else None,
        'backtest_return': float(val_backtest['total_return']), 'buy_hold_return': float(val_backtest['buy_hold_return']),
        'outperformance': float(val_backtest['outperformance']), 'num_trades': int(val_backtest['num_trades']),
        'sharpe_ratio': float(val_backtest['sharpe_ratio']), 'max_drawdown': float(val_backtest['max_drawdown'])
    },
    'test': {
        'accuracy': float(test_accuracy), 'precision': float(test_precision), 'recall': float(test_recall),
        'roc_auc': float(test_roc_auc) if not np.isnan(test_roc_auc) else None,
        'backtest_return': float(test_backtest['total_return']), 'buy_hold_return': float(test_backtest['buy_hold_return']),
        'outperformance': float(test_backtest['outperformance']), 'num_trades': int(test_backtest['num_trades']),
        'sharpe_ratio': float(test_backtest['sharpe_ratio']), 'max_drawdown': float(test_backtest['max_drawdown'])
    },
    'model_params': clf.get_params()
}

with open('models/metrics.json', 'w') as f:
    json.dump(metrics, f, indent=4)

print("‚úì Model saved:           models/logistic_regression.pkl")
print("‚úì Scaler saved:          models/scaler.pkl")
print("‚úì Feature names saved:   models/feature_names.pkl")
print("‚úì Medians saved:         models/medians.pkl")
print("‚úì Metrics saved:         models/metrics.json")

print("\n" + "="*70)
print(" " * 20 + "‚úì TRAINING COMPLETED!")
print("="*70)


[10/10] Saving model and artifacts...
‚úì Model saved:           models/logistic_regression.pkl
‚úì Scaler saved:          models/scaler.pkl
‚úì Feature names saved:   models/feature_names.pkl
‚úì Medians saved:         models/medians.pkl
‚úì Metrics saved:         models/metrics.json

                    ‚úì TRAINING COMPLETED!


## BONUS : PREDICTION UTILITIES + DATE TEST (2024-12-12)

In [8]:
# ‚úÖ ROBUST predict_for_date FUNCTION
def predict_for_date(date_str, model, scaler, dataframe):
    """Faire une pr√©diction pour une date sp√©cifique"""
    target_date = pd.to_datetime(date_str)
    
    drop_cols = [c for c in dataframe.columns if 'target' in c.lower()] + [
        'Datetime', 'Close', 'future_return_1h', 'future_return_6h', 'future_return_24h'
    ]
    X = dataframe.drop(columns=drop_cols, errors='ignore')
    
    if 'fear_greed_classification' in X.columns:
        mapping = {'Extreme Fear':0, 'Fear':1, 'Neutral':2, 'Greed':3, 'Extreme Greed':4}
        X['fear_greed_classification_num'] = X['fear_greed_classification'].map(mapping)
        X = X.drop(columns=['fear_greed_classification'])
    
    X = X.select_dtypes(include=[np.number])
    X_pred = X.loc[dataframe.index.date == target_date.date()]
    
    if X_pred.empty:
        print(f"‚ùå No data available for {date_str}")
        return None
    
    X_pred = X_pred.replace([np.inf, -np.inf], np.nan)
    
    # Load saved medians and feature names
    medians = None
    feature_names = None
    try:
        medians = joblib.load('models/medians.pkl')
    except Exception:
        medians = None
    try:
        feature_names = joblib.load('models/feature_names.pkl')
    except Exception:
        feature_names = None
    
    if feature_names is not None:
        for col in feature_names:
            if col not in X_pred.columns:
                fill_val = medians[col] if (medians is not None and col in medians.index) else 0
                X_pred[col] = fill_val
        X_pred = X_pred[feature_names]
    
    if medians is not None:
        X_pred = X_pred.fillna(medians)
    else:
        X_pred = X_pred.fillna(X_pred.median())
    
    X_pred_s = scaler.transform(X_pred)
    predictions = model.predict(X_pred_s)
    probabilities = model.predict_proba(X_pred_s)[:,1]
    
    results = pd.DataFrame({
        'timestamp': X_pred.index,
        'prediction': ['UP' if p == 1 else 'DOWN' for p in predictions],
        'probability_up': probabilities,
        'confidence': [prob if pred == 1 else 1-prob for pred, prob in zip(predictions, probabilities)]
    })
    
    return results

In [9]:
# ‚úÖ TEST FUNCTION FOR SPECIFIC DATES
def run_test_for_date(date_str, model, scaler, dataframe, transaction_fee=0.001):
    """Test predictions and backtest for a specific date"""
    print("\n" + "="*70)
    print(f" TEST FOR DATE: {date_str} ")
    print("="*70)
    
    preds = predict_for_date(date_str, model, scaler, dataframe)
    if preds is None:
        return None
    
    mask = dataframe.index.date == pd.to_datetime(date_str).date()
    y_true = (dataframe.loc[mask, 'target_direction_1h'] > 0).astype(int)
    
    preds = preds.set_index('timestamp')
    preds = preds.loc[preds.index.isin(y_true.index)]
    y_true = y_true.loc[preds.index]
    
    if len(preds) == 0:
        print("No overlapping timestamps")
        return None
    
    y_pred_bin = (preds['prediction'] == 'UP').astype(int).values
    y_proba = preds['probability_up'].values
    
    acc = accuracy_score(y_true, y_pred_bin)
    prec = precision_score(y_true, y_pred_bin, zero_division=0)
    rec = recall_score(y_true, y_pred_bin, zero_division=0)
    auc = safe_roc_auc(y_true, y_proba)
    
    print(f"\n  üìä Metrics:")
    print(f"    ‚Ä¢ Samples: {len(y_true)}\")\n    ‚Ä¢ Accuracy: {acc:.4f}\")\n    ‚Ä¢ Precision: {prec:.4f}\")\n    ‚Ä¢ Recall: {rec:.4f}\")\n    ‚Ä¢ ROC-AUC: {auc:.4f}" if not np.isnan(auc) else f"    ‚Ä¢ ROC-AUC: N/A")
    
    prices = dataframe.loc[preds.index, 'Close']
    bt = realistic_backtest(y_true.values, y_pred_bin, prices, initial_capital=10000, transaction_fee=transaction_fee)
    
    print(f"\n  üí∞ Backtest Results:\")\n    ‚Ä¢ Final Value: ${bt['final_value']:,.2f}\")\n    ‚Ä¢ Total Return: {bt['total_return']:+.2f}%\")\n    ‚Ä¢ Trades: {bt['num_trades']}\")\n    ‚Ä¢ Sharpe: {bt['sharpe_ratio']:.2f}\")\n    ‚Ä¢ Max DD: {bt['max_drawdown']:.2f}%\")\n    ‚Ä¢ Outperformance: {bt['outperformance']:+.2f}%")
    
    if bt['num_trades'] > 0 and len(bt['trades']) > 0:
        print(f"\n  üìà First 5 Trades:")
        for t in bt['trades'][:5]:
            print(f"    - {t['timestamp']} {t['action']} {t['btc_amount']:.6f} BTC @ ${t['price']:.2f} (fee ${t['fee']:.2f})")
    
    return {'metrics': {'accuracy': acc, 'precision': prec, 'recall': rec, 'roc_auc': auc}, 'backtest': bt, 'predictions': preds}

# üéØ RUN THE REQUESTED TEST FOR 2024-12-12
print("\n\nüîç RUNNING REQUESTED TEST FOR 2024-12-12...\n")
test_result_2024_12_12 = run_test_for_date('2024-12-12', clf, scaler, df)

print("\n" + "="*70)
print("‚úÖ ALL OPERATIONS COMPLETED SUCCESSFULLY!")
print("="*70)



üîç RUNNING REQUESTED TEST FOR 2024-12-12...


 TEST FOR DATE: 2024-12-12 

  üìä Metrics:
    ‚Ä¢ Samples: 24")
    ‚Ä¢ Accuracy: 0.5417")
    ‚Ä¢ Precision: 0.4000")
    ‚Ä¢ Recall: 0.2000")
    ‚Ä¢ ROC-AUC: 0.5357

  üí∞ Backtest Results:")
    ‚Ä¢ Final Value: $9,892.03")
    ‚Ä¢ Total Return: -1.08%")
    ‚Ä¢ Trades: 4")
    ‚Ä¢ Sharpe: -9.40")
    ‚Ä¢ Max DD: -1.83%")
    ‚Ä¢ Outperformance: -0.32%

  üìà First 5 Trades:
    - 2024-12-12 00:00:00 BUY 0.099134 BTC @ $100772.46 (fee $10.00)
    - 2024-12-12 01:00:00 SELL 0.099134 BTC @ $100496.30 (fee $9.96)
    - 2024-12-12 18:00:00 BUY 0.098582 BTC @ $100857.59 (fee $9.95)
    - 2024-12-12 22:00:00 SELL 0.098582 BTC @ $100444.00 (fee $9.90)

‚úÖ ALL OPERATIONS COMPLETED SUCCESSFULLY!
