## Explore training data

<a target="_blank" href="https://colab.research.google.com/github/Pappa/bandido/blob/main/notebooks/crypto/03-tf-agents-bandit-deploy.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
</a>

In [None]:
try:
  import tf_agents
except ImportError:
  %pip install tf-agents
  %pip install tf-keras

In [None]:
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'

import tensorflow as tf
import time
import pandas as pd
from IPython.display import display
from trading_utils import preprocess_data, SYMBOLS
from trading_env import CryptoTradingEnvironment

# Data and Model Paths
DATA_FILEPATH = 'data/ohlcv.csv.gz'
POLICY_SAVE_PATH = 'policy'

# Model Hyperparameters
CONTEXT_LENGTH = 10
NUM_TRAINING_STEPS = 1000  # Increased for more meaningful training
ALPHA = 1.0 # LinUCB exploration parameter

# --- Main Deployment Logic ---
print("--- Starting Bandit Deployment Simulation ---")

# 1. Load the trained policy
print(f"Loading trained policy from {POLICY_SAVE_PATH}...")
loaded_policy = tf.saved_model.load(POLICY_SAVE_PATH)

# 2. Load data and create a hold-out set for "live" simulation
df = pd.read_csv(DATA_FILEPATH, compression='gzip', parse_dates=['timestamp']).set_index('timestamp')
all_data = preprocess_data(df)
live_data_stream = all_data.iloc[NUM_TRAINING_STEPS:]

# 3. Instantiate the *actual* environment with the live data
# This handles all state, buffering, and observation logic for us.
live_env = CryptoTradingEnvironment(data=live_data_stream, symbols=SYMBOLS, context_len=CONTEXT_LENGTH)

# 4. Define helper for action names
def get_action_name(action):
    crypto_index = action // 2
    action_type = "BUY" if action % 2 == 0 else "SELL"
    return f"{SYMBOLS[crypto_index]}"

# 5. Run the live simulation loop
print(f"\n--- Starting Live Inference Simulation ({len(live_data_stream) - CONTEXT_LENGTH - 5} steps) ---")

time_step = live_env.reset()
steps = 0
while not time_step.is_last():
    # Get an action from the loaded policy
    action_step = loaded_policy.action(time_step)
    action = action_step.action.numpy()[0]
    
    # In a real bot, you would execute the trade here
    print(f"Step {steps+1}: Policy chose action: {get_action_name(action)}")
    
    # Step the environment to get the next state
    time_step = live_env.step(action)
    steps += 1
    
    # time.sleep(0.1) # Simulate waiting for the next data candle

print("\nLive simulation finished.")