In [None]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

import tensorflow as tf
import time
import pandas as pd
from IPython.display import display
from trading_utils import preprocess_data, SYMBOLS
from trading_env import CryptoTradingEnvironment

# Data and Model Paths
DATA_FILEPATH = 'data/ohlcv.csv.gz'
POLICY_SAVE_PATH = 'policy'

# Model Hyperparameters
CONTEXT_LENGTH = 10
NUM_TRAINING_STEPS = 1000  # Increased for more meaningful training
ALPHA = 1.0 # LinUCB exploration parameter

# --- Main Deployment Logic ---
print("--- Starting Bandit Deployment Simulation ---")

# 1. Load the trained policy
print(f"Loading trained policy from {POLICY_SAVE_PATH}...")
loaded_policy = tf.saved_model.load(POLICY_SAVE_PATH)

# 2. Load data and create a hold-out set for "live" simulation
df = pd.read_csv(DATA_FILEPATH, compression='gzip', parse_dates=['timestamp']).set_index('timestamp')
all_data = preprocess_data(df)
live_data_stream = all_data.iloc[NUM_TRAINING_STEPS:]

# 3. Instantiate the *actual* environment with the live data
# This handles all state, buffering, and observation logic for us.
live_env = CryptoTradingEnvironment(data=live_data_stream, symbols=SYMBOLS, context_len=CONTEXT_LENGTH)

# 4. Define helper for action names
def get_action_name(action):
    crypto_index = action // 2
    action_type = "BUY" if action % 2 == 0 else "SELL"
    return f"{SYMBOLS[crypto_index]}"

# 5. Run the live simulation loop
print(f"\n--- Starting Live Inference Simulation ({len(live_data_stream) - CONTEXT_LENGTH - 5} steps) ---")

time_step = live_env.reset()
steps = 0
while not time_step.is_last():
    # Get an action from the loaded policy
    action_step = loaded_policy.action(time_step)
    action = action_step.action.numpy()[0]
    
    # In a real bot, you would execute the trade here
    print(f"Step {steps+1}: Policy chose action: {get_action_name(action)}")
    
    # Step the environment to get the next state
    time_step = live_env.step(action)
    steps += 1
    
    # time.sleep(0.1) # Simulate waiting for the next data candle

print("\nLive simulation finished.")

2025-07-14 16:49:59.342821: I tensorflow/core/util/port.cc:113] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-07-14 16:49:59.368379: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-07-14 16:49:59.368396: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-07-14 16:49:59.369070: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-07-14 16:49:59.373217: I tensorflow/core/platform/cpu_feature_guar

Loading trained policy from policy...


2025-07-14 16:50:00.592170: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-07-14 16:50:00.599213: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2025-07-14 16:50:00.602288: I external/local_xla/xla/stream_executor/cuda/cuda_executor.cc:901] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-

Loading data from data/ohlcv.csv.gz...
Processing symbols: ['BTC', 'DOGE', 'XRP', 'ETH', 'SOL']
Data prepared. Shape: (13093, 15)

--- Starting Live Inference Simulation (100 steps) ---
Step 10: Policy chose action: BUY ETH
Step 11: Policy chose action: SELL DOGE
Step 12: Policy chose action: BUY XRP
Step 13: Policy chose action: BUY BTC
Step 14: Policy chose action: SELL BTC
Step 15: Policy chose action: SELL ETH
Step 16: Policy chose action: BUY SOL
Step 17: Policy chose action: BUY DOGE
Step 18: Policy chose action: BUY DOGE
Step 19: Policy chose action: SELL BTC
Step 20: Policy chose action: BUY BTC
Step 21: Policy chose action: SELL XRP
Step 22: Policy chose action: BUY XRP
Step 23: Policy chose action: BUY SOL
Step 24: Policy chose action: BUY BTC
Step 25: Policy chose action: SELL SOL
Step 26: Policy chose action: BUY BTC
Step 27: Policy chose action: SELL BTC
Step 28: Policy chose action: SELL XRP
Step 29: Policy chose action: SELL DOGE
Step 30: Policy chose action: BUY BTC
Ste