In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import pickle
import json
import requests
import os
from datetime import datetime, timedelta
from xgboost import XGBClassifier # Needed for loading XGBoost
from includes.technical_indicators import calculate_rsi, calculate_macd, calculate_williams_r, calculate_bollinger_b, calculate_natr, calculate_volume_roc

class CryptoPredictor:
    def __init__(self, prefix="btc_lstm"):
        self.prefix = prefix
        self.model = None
        self.scaler = None
        self.threshold = 0.5
        self.window_size = 7 # Must match your training window (e.g., 7 or 14)
        self.model_type = "keras" # Default, will be detected in load_artifacts
        
    def load_artifacts(self):
        """Smart loader: Detects if model is Keras, XGBoost, or Pickle."""
        print(f"Loading system for prefix: {self.prefix}...")
        
        # 1. Define Paths
        folder = "saved_artifacts"
        path_keras = f"{folder}/{self.prefix}_model.keras" # LSTM
        path_json  = f"{folder}/{self.prefix}_model.json"  # XGBoost
        path_pkl   = f"{folder}/{self.prefix}_model.pkl"   # Random Forest
        
        # 2. Detect and Load Model
        if os.path.exists(path_keras):
            print(f"Detected Keras model: {path_keras}")
            self.model = tf.keras.models.load_model(path_keras)
            self.model_type = "keras"
            
        elif os.path.exists(path_json):
            print(f"Detected XGBoost model: {path_json}")
            self.model = XGBClassifier()
            self.model.load_model(path_json)
            self.model_type = "xgboost"
            
        elif os.path.exists(path_pkl):
            print(f"Detected Pickle model (RF/Sklearn): {path_pkl}")
            with open(path_pkl, "rb") as f:
                self.model = pickle.load(f)
            self.model_type = "sklearn"
            
        else:
            raise ValueError(f"No model found for prefix '{self.prefix}' in '{folder}/'. Checked .keras, .json, and .pkl")

        # 3. Load Scaler
        with open(f"{folder}/{self.prefix}_scaler.pkl", "rb") as f:
            self.scaler = pickle.load(f)
            
        # 4. Load Config
        with open(f"{folder}/{self.prefix}_config.json", "r") as f:
            config = json.load(f)
            self.threshold = config["optimal_threshold"]
            
        print(f"System loaded. Type: {self.model_type}. Threshold: {self.threshold:.4f}")

    def fetch_recent_data(self, ticker="BTC-USD", target_date_str=None):
        """
        Fetches data directly from Yahoo API.
        """
        # 1. Calculate Time Window
        if target_date_str:
            target_dt = datetime.strptime(target_date_str, "%Y-%m-%d")
        else:
            target_dt = datetime.now()

        # Set END time to 23:59:59 of the target date
        end_dt = target_dt.replace(hour=23, minute=59, second=59)
        period2 = int(end_dt.timestamp())
        
        # Set START time to 180 days before that
        start_dt = end_dt - timedelta(days=180)
        period1 = int(start_dt.timestamp())

        print(f"Fetching {ticker} data ending on {target_dt.strftime('%Y-%m-%d')}...")
        
        # 2. Define API Endpoint
        url = f"https://query1.finance.yahoo.com/v8/finance/chart/{ticker}"
        params = {
            "period1": period1,
            "period2": period2,
            "interval": "1d",
            "events": "div,split"
        }
        headers = {'User-Agent': 'Mozilla/5.0'}

        # 3. Perform Request
        try:
            response = requests.get(url, params=params, headers=headers)
            response.raise_for_status()
            data = response.json()
            
            result = data['chart']['result'][0]
            timestamps = result['timestamp']
            quote = result['indicators']['quote'][0]
            
            df = pd.DataFrame({
                'Date': pd.to_datetime(timestamps, unit='s'),
                'Open': quote['open'],
                'High': quote['high'],
                'Low': quote['low'],
                'Close': quote['close'],
                'Volume': quote['volume']
            })
            df.set_index('Date', inplace=True)
            df = df.dropna()
            
        except Exception as e:
            raise ValueError(f"API Error: {e}")

        if df.empty:
            raise ValueError("API returned empty data.")
            
        return df

    def preprocess_live_data(self, df):
        """Calculates indicators, scales, and reshapes data based on model type."""
        # 1. Calculate Indicators
        df['RSI'] = calculate_rsi(df)
        df['MACD_Line'], df['Signal_Line'] = calculate_macd(df)
        df['Williams_R'] = calculate_williams_r(df)
        df['Bollinger_B'] = calculate_bollinger_b(df)
        df['NATR'] = calculate_natr(df)
        df['Vol_ROC'] = calculate_volume_roc(df)
        
        # 2. Drop NaNs
        df = df.dropna()
        
        # 3. Select Features
        feature_cols = ['Close', 'RSI', 'MACD_Line', 'Signal_Line', 
                        'Williams_R', 'Bollinger_B', 'NATR', 'Vol_ROC']
        
        # 4. Get the LAST Window
        last_window = df[feature_cols].tail(self.window_size).values
        
        if len(last_window) < self.window_size:
            raise ValueError(f"Not enough data. Needed {self.window_size}, got {len(last_window)}.")
            
        # 5. Scale
        scaled_window = self.scaler.transform(last_window)
        
        # 6. Reshape based on Model Type
        # LSTM wants 3D: (1, 14, 8)
        # XGBoost/RF want 2D: (1, 112) [Flattened]
        if self.model_type == "keras":
            final_input = np.expand_dims(scaled_window, axis=0)
        else:
            final_input = scaled_window.flatten().reshape(1, -1)
        
        last_date = df.index[-1]
        last_price = df['Close'].iloc[-1]
        
        return final_input, last_price, last_date

    def predict_next_day(self, date_str=None):
        """
        Main execution method with Probability & Confidence reporting.
        """
        # 1. Get Data
        df = self.fetch_recent_data(target_date_str=date_str)
        
        # 2. Process
        X_input, current_price, data_date = self.preprocess_live_data(df)
        
        # 3. Get Probability (Universal Logic)
        if self.model_type == "keras":
            # Keras returns [[0.45]]
            prob = self.model.predict(X_input, verbose=0)[0][0]
        else:
            # XGBoost/Sklearn return [[0.55, 0.45]] (Class 0, Class 1)
            # We want the probability of Class 1 (Up)
            prob = self.model.predict_proba(X_input)[0][1]
        
        # 4. Apply Optimum Threshold
        prediction = 1 if prob > self.threshold else 0
        direction = "UP ðŸŸ¢" if prediction == 1 else "DOWN ðŸ”´"
        
        # 5. Calculate Confidence
        distance = abs(prob - self.threshold)
        
        if distance < 0.02:
            confidence = "Very Low (Risky)"
        elif distance < 0.05:
            confidence = "Low"
        elif distance < 0.10:
            confidence = "Medium"
        else:
            confidence = "High ðŸ”¥"

        # 6. Report
        print(f"\n--- PREDICTION REPORT ---")
        print(f"Data Date:       {data_date.strftime('%Y-%m-%d')}")
        print(f"Input Price:     ${current_price:,.2f} (Reference Level)")
        print(f"---------------------------")
        print(f"Probability (Up): {prob*100:.2f}%")
        print(f"Threshold Used:   {self.threshold*100:.2f}%")
        print(f"Margin:           {distance*100:.2f}% pts")
        print(f"---------------------------")
        print(f"Prediction:       {direction}")
        print(f"Confidence:       {confidence}")
        
        return prediction


2025-12-15 14:47:39.499395: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# --- Usage Example ---
if __name__ == "__main__":
    # Ensure you use the correct prefix for the model you trained!
    # Examples: "btc_lstm", "btc_xgboost", "btc_rf, btc_svm, btc_knn"
    bot = CryptoPredictor(prefix="btc_knn") 
    
    bot.load_artifacts()
    bot.predict_next_day()

Loading system for prefix: btc_knn...
Detected Pickle model (RF/Sklearn): saved_artifacts/btc_knn_model.pkl
System loaded. Type: sklearn. Threshold: 0.4800
Fetching BTC-USD data ending on 2025-12-15...

--- PREDICTION REPORT ---
Data Date:       2025-12-15
Input Price:     $86,206.04 (Reference Level)
---------------------------
Probability (Up): 40.66%
Threshold Used:   48.00%
Margin:           7.34% pts
---------------------------
Prediction:       DOWN ðŸ”´
Confidence:       Medium
