In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

df_es = pd.DataFrame()
for i in range(1, 6):
    df_temp = pd.read_csv(f'ES_part_{i}.csv')
    df_es = pd.concat([df_es, df_temp], ignore_index=True)

df_surprise = pd.read_csv('US_economic_releases_events.csv')

df_surprise.drop(columns=['S', 'Month', 'Surv(A)', 'Surv(H)', 'Surv(L)',], inplace=True)

df_surprise.drop(columns=['Flag', 'Country/Region', 'Day', 'C', 'Category','Subcategory', 'Std Dev', 'Period', 'Actual'])

# Dropping all rows for which surprise column has NaN or 0 value
df_surprise.dropna(subset=['Surprise'], inplace=True)
df_surprise = df_surprise[df_surprise['Surprise'] != 0]

df_surprise.replace("--", pd.NA, inplace=True)

# Redoing dropping all rows for which surprise column has NaN or 0 value
df_surprise.dropna(subset=['Surprise'], inplace=True)
df_surprise = df_surprise[df_surprise['Surprise'] != 0]

# Convert 'Surprise' column to float
df_surprise['Surprise'] = pd.to_numeric(df_surprise['Surprise'], errors='coerce')

# Again filtering out rows where 'Surprise' is 0 or NaN
df_surprise = df_surprise[df_surprise['Surprise'] != 0].dropna(subset=['Surprise'])

df_surprise.dropna(subset=['Time'], inplace=True)

# Wincorsizing to get results between 0.5% and 99.5% percentile for Surprise values
lower_bound = df_surprise['Surprise'].quantile(0.005)
upper_bound = df_surprise['Surprise'].quantile(0.995)

df_surprise = df_surprise[(df_surprise['Surprise'] >= lower_bound) & (df_surprise['Surprise'] <= upper_bound)]

# Step 1: Ensure columns are strings
df_surprise['Date'] = df_surprise['Date'].astype(str)
df_surprise['Time'] = df_surprise['Time'].astype(str)

# Step 2: Handle missing times (if any)
df_surprise['Time'] = df_surprise['Time'].fillna('00:00:00')

# Step 3: Combine Date and Time into DateTime
df_surprise['DateTime'] = pd.to_datetime(
    df_surprise['Date'].str[:10] + ' ' + df_surprise['Time'],
    format='%Y-%m-%d %H:%M:%S',
    errors='coerce'  # Converts invalid parsing to NaT instead of raising error
)

# First we drop Date and DateTime and change the column name for Unnamed: 0 to Date

df_surprise.drop(columns=['Date', 'DateTime'], inplace=True)
df_surprise.rename(columns={'Unnamed: 0': 'Date'}, inplace=True)

# Now let's again create a DateTime column with the new Date column and check for number of NaN values
# Step 1: Ensure columns are strings
df_surprise['Date'] = df_surprise['Date'].astype(str)
df_surprise['Time'] = df_surprise['Time'].astype(str)

# Step 2: Handle missing times (if any)
df_surprise['Time'] = df_surprise['Time'].fillna('00:00:00')

# Step 3: Combine Date and Time into DateTime
df_surprise['DateTime'] = pd.to_datetime(
    df_surprise['Date'].str[:10] + ' ' + df_surprise['Time'],
    format='%Y-%m-%d %H:%M:%S',
    errors='coerce'  # Converts invalid parsing to NaT instead of raising error
)

# Step 1: Ensure columns are strings
df_es['Date'] = df_es['Date'].astype(str)
df_es['Time'] = df_es['Time'].astype(str)

# Step 2: Handle missing times (if any) and pad with seconds
df_es['Time'] = df_es['Time'].fillna('00:00')  # Fill missing times
df_es['Time'] = df_es['Time'] + ':00'  # Add seconds to make HH:MM:SS format

# Step 3: Combine Date and Time into DateTime with correct format
df_es['DateTime'] = pd.to_datetime(
    df_es['Date'] + ' ' + df_es['Time'],
    format='%m/%d/%Y %H:%M:%S',  # Matches MM/DD/YYYY date and HH:MM:SS time
    errors='coerce'
)

# Now we create the merged dataframe for our analysis - to allow us to match times of surprise with the price of the future at the time
df_combined = pd.merge(
    df_es,
    df_surprise,
    on='DateTime',
    how='outer',
    suffixes=('_es', '_surprise'),
    indicator=True  # this shows the source of each using suffix
)

# Some surprise announcements might have come before the starting point for the data on the futures, these would be meaningless for our analysis and should thus
# be removed by removing all rows with NaN values for Open

df_combined.dropna(subset=['Open'], inplace=True)


  df_surprise = pd.read_csv('US_economic_releases_events.csv')
  df_surprise.replace("--", pd.NA, inplace=True)


In [4]:
df_combined

Unnamed: 0,Date_es,Time_es,Open,High,Low,Close,Volume,DateTime,Date_surprise,Period,...,Subcategory,R,Day,Surv(M),# Ests.,Std Dev,Surprise,Country/Region,Flag,_merge
2,09/10/1997,00:01:00,0.00,0.00,0.00,0.00,0.0,1997-09-10 00:01:00,,,...,,,,,,,,,,left_only
3,09/10/1997,00:02:00,0.00,0.00,0.00,0.00,0.0,1997-09-10 00:02:00,,,...,,,,,,,,,,left_only
4,09/10/1997,00:03:00,0.00,0.00,0.00,0.00,0.0,1997-09-10 00:03:00,,,...,,,,,,,,,,left_only
5,09/10/1997,00:04:00,0.00,0.00,0.00,0.00,0.0,1997-09-10 00:04:00,,,...,,,,,,,,,,left_only
6,09/10/1997,00:05:00,0.00,0.00,0.00,0.00,0.0,1997-09-10 00:05:00,,,...,,,,,,,,,,left_only
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9695761,12/19/2024,15:56:00,5941.75,5942.00,5941.25,5941.75,318.0,2024-12-19 15:56:00,,,...,,,,,,,,,,left_only
9695762,12/19/2024,15:57:00,5941.75,5942.00,5941.25,5941.50,386.0,2024-12-19 15:57:00,,,...,,,,,,,,,,left_only
9695763,12/19/2024,15:58:00,5941.50,5941.50,5940.75,5941.00,484.0,2024-12-19 15:58:00,,,...,,,,,,,,,,left_only
9695764,12/19/2024,15:59:00,5940.75,5941.25,5940.75,5941.00,6462.0,2024-12-19 15:59:00,,,...,,,,,,,,,,left_only


ANN

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam # Keep Adam import
from tensorflow.keras.callbacks import EarlyStopping
# from tensorflow.keras.wrappers.scikit_learn import KerasClassifier # Older TF versions
# Use scikeras.wrappers for newer TF versions if using GridSearchCV
from sklearn.model_selection import train_test_split # We will split manually by time
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report, accuracy_score

# --- FIX for Eager Execution Error ---
# Explicitly enable eager execution which might be needed in some environments/versions
# Or when certain operations trigger issues in graph mode during fit/evaluate
try:
    tf.config.run_functions_eagerly(True)
    print("TensorFlow Eager Execution enabled.")
except AttributeError:
    print("tf.config.run_functions_eagerly is not available (likely TF 1.x). Ensure TF 2.x is installed for default eager execution.")

# Print TensorFlow version
print(f"TensorFlow Version: {tf.__version__}")
# --------------------------------------


# Install ta if you don't have it: pip install ta
# Or use another library or manual calculations for technical indicators
try:
    import ta
except ImportError:
    print("Technical Analysis library 'ta' not found. Installing...")
    # Attempt to install 'ta' using pip within the environment if possible
    # Note: Direct pip install within the script might not work in all environments.
    # If this fails, please install it manually using: pip install ta
    import subprocess
    import sys
    try:
        subprocess.check_call([sys.executable, "-m", "pip", "install", "ta"])
        import ta
        print("Successfully installed and imported 'ta'.")
    except Exception as e:
        print(f"Failed to install 'ta': {e}")
        print("Please install 'ta' manually: pip install ta")
        # As a fallback, provide dummy functions or stop execution if 'ta' is critical
        # For this example, we'll stop if 'ta' cannot be loaded or installed.
        sys.exit("Required library 'ta' could not be loaded or installed.")
try:
    import matplotlib.pyplot as plt
    PLOTTING_ENABLED = True
except ImportError:
    print("\nMatplotlib not found. Cannot generate plots. Install with: pip install matplotlib")
    PLOTTING_ENABLED = False


# --- Configuration ---
# Define the lookback periods for indicators
RSI_PERIOD = 14
SMA_PERIOD = 50
ADX_PERIOD = 20
CORR_PERIOD = 24
MOM_PERIOD = 20
# Define the prediction/holding windows after a surprise
PREDICTION_WINDOWS = [10, 20, 40] # minutes - Updated to list

# Define ANN parameters (based on the paper/prompt)
N_NEURONS = 100
ACTIVATION_HIDDEN = 'relu'
ACTIVATION_OUTPUT = 'sigmoid'
# OPTIMIZER = Adam() # REMOVE optimizer instance creation from global scope
LOSS_FUNCTION = 'binary_crossentropy'
METRICS = ['accuracy']
EPOCHS = 20 # Max epochs
BATCH_SIZE = 32 # Standard batch size
EARLY_STOPPING_PATIENCE = 5 # Stop if validation loss doesn't improve for 5 epochs

# Define data split percentages
TRAIN_PCT = 0.30
VALIDATE_PCT = 0.20
# TEST_PCT = 0.50 # Remainder

# --- 1. Load Data ---
# Load the dataset provided by the user.
# The user specified using 'df_combined' which should be loaded from the CSV.
try:
    # Assuming the CSV file 'combined_datalast 10percdent.csv' is in the accessible path
    df_full = pd.read_csv('combined_datalast 10percdent.csv')
    print("Successfully loaded 'combined_datalast 10percdent.csv'.")
except FileNotFoundError:
    print("Error: 'combined_datalast 10percdent.csv' not found.")
    print("Please ensure the file is uploaded or in the correct directory.")
    # Exit or handle error appropriately
    # For demonstration, create a dummy dataframe if file not found
    # NOTE: Replace this with actual error handling if file is essential
    print("Creating dummy data for demonstration purposes.")
    dates = pd.to_datetime(pd.date_range(start='2023-01-01 09:00', periods=50000, freq='T'))
    price = 100 + np.random.randn(50000).cumsum() * 0.1
    volume = np.random.randint(100, 1000, 50000)
    df_full = pd.DataFrame({
        'DateTime': dates, # Ensure dummy data has 'DateTime' column
        'Open': price - np.random.rand(50000) * 0.1,
        'High': price + np.random.rand(50000) * 0.1,
        'Low': price - np.random.rand(50000) * 0.1,
        'Close': price,
        'Volume': volume,
        'Ticker': 'Dummy', # Assume a single instrument for simplicity
        'Event': np.nan,
        'Surprise': np.nan
    })
    # Add some dummy surprises
    surprise_indices = np.random.choice(df_full.index, size=100, replace=False)
    # Need to reset index if DateTime is not the index yet in dummy data
    if not isinstance(df_full.index, pd.DatetimeIndex):
         df_full = df_full.set_index('DateTime')
    df_full.loc[surprise_indices, 'Event'] = 'Dummy Event'
    df_full.loc[surprise_indices, 'Surprise'] = np.random.randn(100) * 0.5 # Standardized surprises


# --- 2. Preprocessing & Feature Engineering ---

# *** Explicitly use 'DateTime' column ***
datetime_col = 'DateTime'
print(f"Using specified column '{datetime_col}' for datetime index.")

# Check if the specified datetime column exists
if datetime_col not in df_full.columns:
    # If 'DateTime' is already the index (e.g. from dummy data creation)
    if df_full.index.name == datetime_col:
         print(f"Column '{datetime_col}' is already the index.")
         # No action needed if it's already the index
    else:
        raise ValueError(f"Specified datetime column '{datetime_col}' not found in the DataFrame. Available columns: {df_full.columns.tolist()}")
else:
    # Convert the specified column to datetime and set as index
    try:
        df_full[datetime_col] = pd.to_datetime(df_full[datetime_col])
        df_full = df_full.set_index(datetime_col)
        print(f"Column '{datetime_col}' successfully converted and set as index.")
    except Exception as e:
        raise ValueError(f"Could not parse the '{datetime_col}' column as datetime. Error: {e}")

df_full = df_full.sort_index() # Ensure chronological order

# Select necessary columns (adjust if names differ)
# Assuming standard OHLCV columns exist
ohlcv_cols = ['Open', 'High', 'Low', 'Close', 'Volume']
surprise_cols = ['Ticker', 'Event', 'Surprise'] # Keep Ticker if multiple instruments exist

# Check for required columns
required_cols = ohlcv_cols + surprise_cols
missing_cols = [col for col in required_cols if col not in df_full.columns]
if missing_cols:
    raise ValueError(f"Missing required columns: {missing_cols}")

# Ensure numeric types for OHLCV and Surprise
for col in ohlcv_cols:
    df_full[col] = pd.to_numeric(df_full[col], errors='coerce')
df_full['Surprise'] = pd.to_numeric(df_full['Surprise'], errors='coerce')

# Drop rows with NaN in essential OHLCV columns
df_full.dropna(subset=ohlcv_cols, inplace=True)

print(f"Data shape after initial loading & cleaning: {df_full.shape}")
print(f"Time range: {df_full.index.min()} to {df_full.index.max()}")

# Calculate Technical Indicators using the 'ta' library
print("Calculating technical indicators...")
# RSI
df_full['RSI'] = ta.momentum.RSIIndicator(close=df_full['Close'], window=RSI_PERIOD).rsi()
# SMA
df_full['SMA'] = ta.trend.SMAIndicator(close=df_full['Close'], window=SMA_PERIOD).sma_indicator()
# ADX
adx_indicator = ta.trend.ADXIndicator(high=df_full['High'], low=df_full['Low'], close=df_full['Close'], window=ADX_PERIOD)
df_full['ADX'] = adx_indicator.adx()
# Momentum
df_full['Momentum'] = ta.momentum.ROCIndicator(close=df_full['Close'], window=MOM_PERIOD).roc()
# Correlation (e.g., with a benchmark or rolling self-correlation - let's use rolling return correlation)
# Calculate rolling return correlation with itself (measures serial correlation)
# Or potentially correlate with another asset if available. Using % change correlation.
df_full['Return'] = df_full['Close'].pct_change()
# Simple rolling std dev as a proxy if correlation is complex/unclear
df_full['Volatility'] = df_full['Return'].rolling(window=CORR_PERIOD).std()
# Using Volatility as 'Correlation' feature placeholder - replace if a specific correlation is needed
df_full['Correlation'] = df_full['Volatility']

# Previous Day's Price Differences
# Ensure we have daily data points or resample/forward-fill previous day's data
df_daily = df_full['Close'].resample('D').last()
df_daily_open = df_full['Open'].resample('D').first()
df_daily_high = df_full['High'].resample('D').max()
df_daily_low = df_full['Low'].resample('D').min()

# Calculate previous day's values - shift(1) gets the prior day
prev_close = df_daily.shift(1)
prev_open = df_daily_open.shift(1)
prev_high = df_daily_high.shift(1)
prev_low = df_daily_low.shift(1)

# Map previous day's values back to the minute data
df_full['Date'] = df_full.index.date
df_full['Prev_Close'] = df_full['Date'].map(prev_close)
df_full['Prev_Open'] = df_full['Date'].map(prev_open)
df_full['Prev_High'] = df_full['Date'].map(prev_high)
df_full['Prev_Low'] = df_full['Date'].map(prev_low)

# Calculate difference features
df_full['Prev_OC_Diff'] = df_full['Prev_Open'] - df_full['Prev_Close']
df_full['Prev_CH_Diff'] = df_full['Prev_Close'] - df_full['Prev_High']
df_full['Prev_CL_Diff'] = df_full['Prev_Close'] - df_full['Prev_Low']

# ---- Define Target Variables for Multiple Windows ----
print("Calculating target variables for multiple windows...")
for window in PREDICTION_WINDOWS:
    # Calculate the closing price 'window' minutes into the future
    df_full[f'Future_Close_{window}'] = df_full['Close'].shift(-window)
    # Calculate the price change percentage
    df_full[f'Future_Return_{window}'] = (df_full[f'Future_Close_{window}'] - df_full['Close']) / df_full['Close']
    # Create binary target: 1 if price goes up, 0 otherwise
    df_full[f'Target_{window}'] = (df_full[f'Future_Return_{window}'] > 0).astype(int)

# Filter for Surprise Events
# A surprise event is where the 'Surprise' column is not NaN
df_surprises = df_full.dropna(subset=['Surprise']).copy()

# --- Loop through each prediction window ---
results_summary = {} # Dictionary to store results for each window

for window in PREDICTION_WINDOWS:
    print(f"\n--- Processing for {window}-minute window ---")

    # Define Features (X) and Target (y) for the surprise events
    # Features: 9 indicators + Surprise value
    feature_cols = [
        'RSI', 'SMA', 'ADX', 'Volume', 'Correlation',
        'Prev_OC_Diff', 'Prev_CH_Diff', 'Prev_CL_Diff', 'Momentum',
        'Surprise' # Include the surprise value itself
    ]
    target_col = f'Target_{window}'
    future_return_col = f'Future_Return_{window}'

    # Select only the rows corresponding to surprises and the relevant columns
    # Include all feature cols, the specific target, and the specific future return
    relevant_cols = feature_cols + [target_col, future_return_col]
    df_model_data = df_surprises[relevant_cols].copy()

    # Drop rows with NaNs introduced by indicators or THIS SPECIFIC target calculation
    df_model_data.dropna(inplace=True)

    print(f"Shape of data for {window}-min window modeling: {df_model_data.shape}")

    if df_model_data.empty:
        print(f"Warning: No valid data points remaining for {window}-min window after NaN removal. Skipping this window.")
        results_summary[window] = {'status': 'Skipped - No data'}
        continue

    # Prepare X and y
    X = df_model_data[feature_cols]
    y = df_model_data[target_col]
    # Keep future returns for backtesting P&L calculation
    future_returns_for_backtest = df_model_data[future_return_col]
    timestamps_for_backtest = df_model_data.index # Keep timestamps for yearly analysis

    # --- 3. Data Splitting (Chronological) ---
    n_total = len(X)
    n_train = int(n_total * TRAIN_PCT)
    n_val = int(n_total * VALIDATE_PCT)
    n_test = n_total - n_train - n_val

    print(f"Total samples: {n_total}, Training: {n_train}, Validation: {n_val}, Test: {n_test}")

    if n_train == 0 or n_test == 0 or n_val == 0:
         print(f"Warning: Not enough data for the specified train/validation/test splits for {window}-min window. Skipping.")
         results_summary[window] = {'status': f'Skipped - Not enough data for split ({n_total} samples)'}
         continue

    X_train, X_val, X_test = X[:n_train], X[n_train:n_train+n_val], X[n_train+n_val:]
    y_train, y_val, y_test = y[:n_train], y[n_train:n_train+n_val], y[n_train+n_val:]
    timestamps_test = timestamps_for_backtest[n_train+n_val:]
    future_returns_test = future_returns_for_backtest[n_train+n_val:]

    print("Data split into training, validation, and test sets chronologically.")

    # --- 4. Scaling ---
    scaler = MinMaxScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)

    print("Features scaled using MinMaxScaler.")

    # --- 5. ANN Model Definition ---
    def build_classifier(input_dim, window_suffix):
        model = Sequential(name=f"Macro_Surprise_ANN_{window_suffix}min")
        model.add(Input(shape=(input_dim,))) # Define input shape explicitly
        model.add(Dense(N_NEURONS, activation=ACTIVATION_HIDDEN))
        # Optional: Add dropout for regularization if needed
        # model.add(Dropout(0.2))
        model.add(Dense(1, activation=ACTIVATION_OUTPUT)) # Output layer for binary classification

        # *** FIX: Create a NEW optimizer instance HERE ***
        optimizer_instance = Adam()
        # *** --------------------------------------- ***

        model.compile(optimizer=optimizer_instance, # Use the new instance
                      loss=LOSS_FUNCTION,
                      metrics=METRICS)
        return model

    # Get input dimensions from scaled training data
    input_dim = X_train_scaled.shape[1]
    # Build a new model instance for this window
    tf.keras.backend.clear_session() # Clear previous models from memory
    model = build_classifier(input_dim, window)
    print(f"\nModel Summary for {window}-min window:")
    model.summary() # Print model architecture

    # --- 6. Model Training ---
    # Implement early stopping
    early_stopping = EarlyStopping(monitor='val_loss', # Monitor validation loss
                                 patience=EARLY_STOPPING_PATIENCE,
                                 restore_best_weights=True, # Restore weights from the best epoch
                                 verbose=1)

    print(f"Starting model training for {window}-min window...")
    # *** This is where the error occurred ***
    history = model.fit(X_train_scaled, y_train,
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE,
                        validation_data=(X_val_scaled, y_val),
                        callbacks=[early_stopping],
                        verbose=1) # Set verbose=1 or 2 for progress

    print(f"Model training finished for {window}-min window.")

    # --- 7. Evaluation ---
    print(f"\nEvaluating model on test data for {window}-min window...")
    loss, accuracy = model.evaluate(X_test_scaled, y_test, verbose=0)
    print(f"Test Loss: {loss:.4f}")
    print(f"Test Accuracy: {accuracy:.4f}")

    # Generate predictions for classification report
    y_pred_proba = model.predict(X_test_scaled)
    y_pred_binary = (y_pred_proba > 0.5).astype(int) # Threshold at 0.5

    print("\nClassification Report:")
    # Use try-except for report generation in case of issues with few samples
    try:
        report = classification_report(y_test, y_pred_binary, target_names=['Down (0)', 'Up (1)'], output_dict=True, zero_division=0)
        print(classification_report(y_test, y_pred_binary, target_names=['Down (0)', 'Up (1)'], zero_division=0))
    except Exception as e:
        print(f"Could not generate classification report: {e}")
        report = {"Error": str(e)}


    # --- 8. Backtesting Simulation ---
    print(f"\nSimulating trading strategy on test data for {window}-min window...")

    # Create a DataFrame for backtesting results
    backtest_results = pd.DataFrame({
        'Timestamp': timestamps_test,
        'Actual_Direction': y_test,
        'Predicted_Probability': y_pred_proba.flatten(),
        'Predicted_Direction': y_pred_binary.flatten(),
        'Future_Return': future_returns_test # Use the correct future return for this window
    })

    # Check if backtest_results is empty before proceeding
    if backtest_results.empty:
        print("No trades to simulate in backtest.")
        total_return = 0
        yearly_returns_dict = {}
    else:
        # Trading Logic:
        # Go Long if Predicted_Direction is 1 (Prob > 0.5)
        # Go Short if Predicted_Direction is 0 (Prob < 0.5)
        # P&L Calculation: Uses the future return corresponding to THIS window
        backtest_results['Strategy_Return'] = np.where(
            backtest_results['Predicted_Direction'] == 1, # Condition for Long
            backtest_results['Future_Return'],            # P&L if Long
            -backtest_results['Future_Return']            # P&L if Short
        )

        # Calculate Cumulative Returns
        backtest_results['Cumulative_Strategy_Return'] = (1 + backtest_results['Strategy_Return']).cumprod() - 1
        total_return = backtest_results['Cumulative_Strategy_Return'].iloc[-1]

        # Calculate Yearly Returns (if data spans multiple years)
        backtest_results['Year'] = backtest_results['Timestamp'].dt.year
        yearly_returns = backtest_results.groupby('Year')['Strategy_Return'].apply(lambda x: (1 + x).prod() - 1)
        yearly_returns_dict = {}
        if len(yearly_returns) > 0:
            print("\nApproximate Yearly Returns:")
            for year, ret in yearly_returns.items():
                print(f"  {year}: {ret:.2%}")
                yearly_returns_dict[year] = ret
        else:
            print("\nCould not calculate yearly returns (test data might not span a full year or more).")


    # --- 9. Results Storage ---
    print(f"\n--- Backtesting Results ({window}-min window) ---")
    print(f"Total Strategy Return on Test Set: {total_return:.2%}")

    # Store results
    results_summary[window] = {
        'status': 'Completed',
        'test_accuracy': accuracy,
        'classification_report': report,
        'total_return': total_return,
        'yearly_returns': yearly_returns_dict,
        'n_test_trades': len(backtest_results)
    }

    # Plotting cumulative returns (optional, requires matplotlib)
    if PLOTTING_ENABLED and not backtest_results.empty:
        plt.figure(figsize=(12, 6))
        plt.plot(backtest_results['Timestamp'], backtest_results['Cumulative_Strategy_Return'], label=f'Strategy Cumulative Return ({window} min)')
        plt.title(f'ANN Strategy Cumulative Returns ({window}-min window)')
        plt.xlabel('Date')
        plt.ylabel('Cumulative Return')
        plt.legend()
        plt.grid(True)
        # plt.show() # Display the plot
        # Instead of showing, save to file
        plot_filename = f'cumulative_returns_plot_{window}min.png'
        plt.savefig(plot_filename)
        print(f"\nCumulative returns plot saved to '{plot_filename}'.")
        plt.close() # Close the plot figure


# --- Final Summary ---
print("\n\n--- Overall Summary Across Windows ---")
for window, results in results_summary.items():
    print(f"\n--- Window: {window} minutes ---")
    if results['status'] == 'Completed':
        print(f"  Status: {results['status']}")
        print(f"  Test Accuracy: {results['test_accuracy']:.4f}")
        print(f"  Total Strategy Return: {results['total_return']:.2%}")
        print(f"  Number of Test Trades: {results['n_test_trades']}")
        print( "  Yearly Returns:")
        if results.get('yearly_returns'): # Use .get for safety
            for year, ret in results['yearly_returns'].items():
                print(f"    {year}: {ret:.2%}")
        else:
            print("    N/A")
        # Optionally print parts of the classification report again if needed
        # print(f"  Precision (Up): {results['classification_report']['Up (1)']['precision']:.2f}")
        # print(f"  Recall (Up): {results['classification_report']['Up (1)']['recall']:.2f}")
    else:
        print(f"  Status: {results['status']}")


print("\n--- Code and Strategy Explanation ---")
print("""
The code trains and evaluates separate Artificial Neural Network (ANN) models for predicting market direction over three different time horizons following a macroeconomic surprise: 10, 20, and 40 minutes.

**Error Fix 1**: Added `tf.config.run_functions_eagerly(True)` near the start to resolve a `NotImplementedError` related to NumPy conversion during model fitting.
**Error Fix 2**: Moved the `Adam()` optimizer instantiation *inside* the `build_classifier` function (within the processing loop). This ensures that each new model compiled gets a fresh optimizer instance, preventing the `ValueError: Unknown variable...` caused by optimizer state conflicts between models for different time windows.

1.  **Data Loading & Preparation**: Loads data, specifically looks for the 'DateTime' column to use as the time index, checks for its existence, parses it, sets it as the index, and sorts the data chronologically. Cleans data by ensuring numeric types and handling NaNs in OHLCV.
2.  **Feature Engineering**:
    * **Technical Indicators & Previous Day Features**: Calculated exactly as before (RSI, SMA, ADX, etc.). These features capture the market state *before* the surprise.
    * **Multiple Target Variables**: Instead of one target, the code now calculates three target variables (`Target_10`, `Target_20`, `Target_40`) and their corresponding future returns (`Future_Return_10`, etc.). Each target indicates whether the price went up (1) or down (0) over its specific time window (10, 20, or 40 minutes) after the surprise.
3.  **Processing Loop**: The core logic (data preparation for modeling, splitting, scaling, training, evaluation, backtesting) is placed inside a loop that iterates through the specified `PREDICTION_WINDOWS` (10, 20, 40).
4.  **Window-Specific Modeling**: Within each loop iteration:
    * The data (`df_model_data`) is prepared using the features and the *specific* target variable (`Target_window`) and future return (`Future_Return_window`) for the current window. Crucially, rows with NaNs resulting from the lookahead required for *that specific window* are dropped.
    * The data is split chronologically (Train/Val/Test) and scaled.
    * A *new instance* of the ANN model (with the same architecture: 100 neurons, ReLU/Sigmoid) is built. Crucially, a **new `Adam()` optimizer instance** is created and used when compiling this specific model. `tf.keras.backend.clear_session()` is also used to help ensure separation between models.
    * The model is evaluated on the test set for accuracy and other classification metrics.
5.  **Window-Specific Backtesting**:
    * The model trained for a specific window (e.g., 10 minutes) makes predictions on the test set.
    * The trading simulation (Long if P(Up) > 0.5, else Short) uses the predictions from the window-specific model.
    * The Profit & Loss (P&L) for each simulated trade is calculated using the actual price return over *that specific window* (e.g., `Future_Return_10` for the 10-minute model).
    * Cumulative and yearly returns are calculated for each window's strategy.
6.  **Results**: The script outputs the evaluation metrics (accuracy, classification report) and backtesting results (total and yearly returns) separately for each of the 10, 20, and 40-minute strategies. Plots for cumulative returns for each strategy are also saved if Matplotlib is installed. This allows comparing the effectiveness of the strategy across different holding periods.
""")

TensorFlow Eager Execution enabled.
TensorFlow Version: 2.19.0
Successfully loaded 'combined_datalast 10percdent.csv'.
Using specified column 'DateTime' for datetime index.
Column 'DateTime' successfully converted and set as index.
Data shape after initial loading & cleaning: (9695665, 30)
Time range: 1997-09-10 00:01:00 to 2024-12-19 16:00:00
Calculating technical indicators...
Calculating target variables for multiple windows...

--- Processing for 10-minute window ---
Shape of data for 10-min window modeling: (23037, 12)
Total samples: 23037, Training: 6911, Validation: 4607, Test: 11519
Data split into training, validation, and test sets chronologically.
Features scaled using MinMaxScaler.

Model Summary for 10-min window:


Starting model training for 10-min window...
Epoch 1/20
[1m  1/216[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m22s[0m 107ms/step - accuracy: 0.4062 - loss: 0.7485



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - accuracy: 0.5017 - loss: 0.6997 - val_accuracy: 0.5405 - val_loss: 0.6906
Epoch 2/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.5379 - loss: 0.6891 - val_accuracy: 0.5179 - val_loss: 0.6933
Epoch 3/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - accuracy: 0.5462 - loss: 0.6884 - val_accuracy: 0.5220 - val_loss: 0.6930
Epoch 4/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - accuracy: 0.5464 - loss: 0.6885 - val_accuracy: 0.5257 - val_loss: 0.6907
Epoch 5/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 26ms/step - accuracy: 0.5321 - loss: 0.6896 - val_accuracy: 0.4886 - val_loss: 0.6989
Epoch 6/20
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 27ms/step - accuracy: 0.5430 - loss: 0.6887 - val_accuracy: 0.5264 - val_loss: 0.6910
Epoch 6: early stopping
Restoring

Starting model training for 20-min window...
Epoch 1/20




ValueError: Unknown variable: <Variable path=Macro_Surprise_ANN_20min/dense/kernel, shape=(10, 100), dtype=float32, value=[[-0.15912457 -0.07778201 -0.11036867 -0.2084291   0.01118602  0.07709253
   0.14044595  0.12906677 -0.141143   -0.10201868 -0.06379631 -0.18756813
  -0.14059012 -0.21918686  0.03818512  0.16077071 -0.04443537  0.11455986
  -0.1637147  -0.0924841  -0.00245337 -0.07138312 -0.01098305  0.05244443
  -0.19269794 -0.02330846 -0.22716634  0.01932731  0.16841102 -0.05899431
   0.02472252  0.08458424 -0.1998723  -0.0263721  -0.22157125 -0.05893917
   0.05949757  0.22655344 -0.22641698  0.23168838 -0.09623949 -0.22026505
  -0.13087073 -0.03688776  0.21862108  0.1650708  -0.2318468   0.15136436
  -0.00342833 -0.15685278  0.00820594  0.1165579  -0.21798173  0.08096775
  -0.23066443  0.03088737  0.04200688 -0.01644334 -0.13526514  0.14054757
   0.13499653  0.01369642 -0.06750844 -0.0062526   0.18552127  0.17012522
   0.21003205  0.0147178  -0.20078377  0.07293499  0.11151463  0.03271702
   0.11758497  0.03443545 -0.06683709 -0.01768745 -0.09401286  0.13644883
  -0.06640488  0.21335784 -0.14673236 -0.12463578  0.1301502   0.11335486
   0.02112219  0.02707371  0.07507277  0.11227897 -0.09508224 -0.18217453
   0.18073803  0.09973073 -0.1731191  -0.14247146  0.15628809 -0.01864626
  -0.07992411 -0.09096669 -0.05395937  0.21948147]
 [ 0.13559228 -0.00771844  0.14471346 -0.20546767 -0.07091577 -0.21540184
   0.1747159  -0.19240655  0.18048912 -0.17660683  0.01847893  0.0543007
   0.15602896  0.13838509  0.14074147  0.17107537  0.11527759  0.08038232
   0.01192632  0.11650595  0.0837391   0.03178585  0.06711549 -0.00212792
  -0.00511411 -0.02846995  0.05585214  0.10849312 -0.18795533  0.21815857
   0.04996616  0.02368692  0.12712184  0.10630691  0.04822662  0.18679953
  -0.14401549  0.11091459 -0.01371852  0.18439075  0.17881414  0.05175316
   0.13164878  0.03545773 -0.07839975  0.09533465 -0.13743196  0.11968371
  -0.17247747  0.16446665  0.0487569   0.0923793  -0.02993563  0.16644487
   0.06486964 -0.15953533 -0.19999419 -0.03827459  0.15468487  0.19738874
   0.09284508 -0.19570568  0.1586439  -0.18535724 -0.20493501 -0.00268479
  -0.12323263 -0.03516316 -0.03069018 -0.13938832 -0.1508306   0.22956136
  -0.06292506 -0.18274552 -0.10880611  0.09140843 -0.09881933  0.02417767
   0.23238042 -0.0051385   0.04901773  0.11084837 -0.07644445 -0.17934173
   0.1724534  -0.14102343 -0.1362525   0.05480069 -0.1045333   0.16841593
  -0.17096697  0.09395584 -0.03351735 -0.19997147 -0.22202466  0.03001586
   0.15614495  0.1312384   0.02940318  0.15969989]
 [-0.05464867  0.02677473  0.04102314 -0.14022589  0.00128438 -0.14870018
   0.14159358  0.18561852  0.2003665   0.22459555 -0.02575347  0.02145523
  -0.19514641  0.16763553  0.05474836  0.03946164 -0.0506099   0.17941359
   0.04226443  0.22492841 -0.04534438 -0.18811955 -0.17822242  0.13778305
   0.14946419  0.20567876 -0.17816645  0.00216706 -0.14564198 -0.02845319
  -0.06880736  0.12799323 -0.03237112  0.16160455  0.01673695  0.20655465
   0.10986838 -0.07395376 -0.04244344 -0.03587139  0.03480908  0.08645818
  -0.00497714  0.05853626  0.03668848  0.01817513  0.12389898  0.17376184
  -0.19488454 -0.0047465  -0.11849494  0.14003524 -0.05729431 -0.0409071
   0.22976679  0.2312907   0.14489466  0.15175274  0.20209643  0.07005382
   0.06378052 -0.11236078 -0.05749728  0.2161353  -0.08392134  0.11488819
   0.07814968  0.13069266  0.09547764  0.22193632 -0.00640807  0.12734246
   0.09927565  0.20066014 -0.09092431  0.21239564 -0.07493456  0.10277998
   0.11715275 -0.04520395  0.228118   -0.2195057   0.02778584 -0.15917045
  -0.11938285 -0.15692851 -0.20488897 -0.10226347  0.01182893  0.00567484
   0.2274867  -0.15205956  0.07476005 -0.20046978  0.22501215 -0.21692726
   0.10890749 -0.02048652 -0.13754633  0.09965417]
 [ 0.09210199  0.21284243  0.02937502  0.09001991 -0.0243697  -0.07979147
  -0.21101806  0.18328574  0.18184546  0.13808697  0.20232502  0.10345784
  -0.10760771  0.08945686  0.07069623  0.17707291 -0.14850199 -0.18262196
   0.16466877  0.18116918 -0.11810059  0.04921582  0.20815292  0.17077681
  -0.19872151 -0.14083807  0.02640373  0.13164973 -0.03194416  0.13787821
  -0.04950571 -0.2065189   0.14738807 -0.0250237   0.0432469  -0.17287132
  -0.14099193  0.07403588 -0.17146516 -0.12542    -0.16405243 -0.18947041
   0.04947871 -0.14807035 -0.1906438   0.11741886 -0.1127897  -0.05467752
  -0.05535205  0.10098866 -0.15678507 -0.19598833  0.0396201   0.08508757
  -0.2042303  -0.17959388 -0.05159448  0.08899742  0.1094906   0.09802973
   0.02158013  0.10539633  0.02696419  0.11996722 -0.16204244 -0.14175484
  -0.11953397 -0.10612427 -0.03415614 -0.17043999  0.01458555 -0.08374639
  -0.02259649  0.08932418 -0.06710865 -0.05263585 -0.03229769 -0.20522685
  -0.17956208 -0.07702571 -0.05621675  0.08161247 -0.02182335  0.14283812
   0.03290519 -0.16879818 -0.0120354   0.2065899   0.22301722 -0.13832635
   0.12246907  0.22806221 -0.22174753  0.1678831   0.08206806  0.22869867
   0.01441528  0.18605089 -0.20249222  0.04053897]
 [-0.16375607  0.12920886  0.1847415  -0.01392049  0.05680129  0.1377964
   0.2267324  -0.15053982 -0.13256748  0.22310257  0.23223662  0.1714352
   0.05783755  0.0889616  -0.07658622  0.07645676  0.01607884  0.02375942
   0.18133917  0.14065433 -0.22695252  0.10581619 -0.22442532 -0.16969335
  -0.03516516 -0.11743256  0.23005316 -0.09028603  0.19393882  0.20043859
   0.17934167  0.16098258  0.08913034  0.03851715  0.22108737 -0.02282296
  -0.05521658 -0.1663138  -0.18997344  0.04073817  0.07298076 -0.18409069
  -0.12630513  0.08088675 -0.03294778  0.06311822 -0.11114038 -0.13198504
   0.1431196   0.1111156   0.13505566 -0.0224167   0.02620733  0.19270417
   0.18960583 -0.19150527  0.1491518   0.02696252  0.14968574  0.03248355
  -0.01135786  0.15695217  0.11359075 -0.20903382 -0.12329589  0.16221032
  -0.21975127  0.07127821  0.03664166  0.21847892  0.04753456  0.00340973
  -0.07645959 -0.13045049  0.20201418 -0.02855247 -0.23061827  0.18334082
  -0.0839043  -0.11232909  0.02229831 -0.06059189 -0.05243757 -0.0933508
   0.05281135  0.2326298  -0.06721812  0.10993636 -0.10021695  0.05017027
   0.15127572  0.12637103 -0.229132   -0.05083597 -0.14445537 -0.03472048
  -0.15900145  0.03788733 -0.1341978  -0.05315416]
 [-0.00631446  0.09030619 -0.02687792  0.11319    -0.10396038  0.00192071
   0.05291176 -0.13681974  0.1660234  -0.1605707  -0.20753556  0.03217635
  -0.06066707  0.23351422 -0.16535679  0.21083006  0.18323976  0.00349948
  -0.20260069  0.16143984  0.17423686  0.16093627 -0.19655105  0.18229878
   0.18466184 -0.20498976 -0.16927417 -0.09331377  0.17615914  0.01787671
   0.1887291  -0.22702235 -0.00745228  0.10351503  0.0175094  -0.14653552
   0.11437899 -0.21195737  0.21987137 -0.14786398 -0.03248249 -0.09212394
   0.05487874 -0.21103242 -0.12955032  0.09930411 -0.14766353 -0.06369179
  -0.23322745  0.01718113 -0.17535682  0.09357125  0.1114493  -0.01980518
   0.04371768  0.1777555  -0.2070896   0.17000699 -0.05589724 -0.17180783
   0.06153464 -0.2286931   0.07990801  0.14929843 -0.16648474 -0.16193904
   0.18115693  0.18624872  0.17287126 -0.08367567  0.03226426 -0.1759754
  -0.21374923 -0.16105413 -0.08236134 -0.18300666 -0.18701275  0.05647355
   0.03049502  0.04554525 -0.12888062  0.0197663   0.22654504  0.06321999
  -0.12883908 -0.17516677  0.15608358  0.00493236 -0.17235826 -0.14290072
  -0.03234217  0.2039764   0.18754506  0.07246649  0.15664124 -0.1260808
  -0.00220224  0.13035333 -0.21649668  0.06035563]
 [-0.10628608  0.22102031 -0.19985554 -0.20289464  0.15609688  0.13244525
   0.18801576  0.08511215 -0.15938555 -0.22497351 -0.17841268 -0.02000463
  -0.11448885 -0.08729392  0.01092114  0.13958487  0.07945693 -0.06774905
   0.13801831  0.02491853 -0.22530381 -0.1344809  -0.11023464 -0.19229402
   0.17304432  0.1178453  -0.07435881 -0.10454132  0.21786469 -0.11652099
  -0.16059771  0.1528179   0.04274061  0.08246368 -0.21208721 -0.20586531
  -0.02562858 -0.074665    0.01876688 -0.04154144  0.10691696  0.23284474
  -0.13929355  0.00771393  0.03655523  0.05134568 -0.11090267 -0.13470364
   0.15798375 -0.12169456 -0.20463845 -0.1394057   0.00933892  0.13980058
   0.17282683  0.15136337 -0.01612039 -0.0748689  -0.11514752 -0.20601599
   0.07936835 -0.13135582 -0.13918407  0.18585554  0.06851047 -0.08504596
   0.14189655  0.125469   -0.11264426 -0.16862251 -0.05353107  0.04031926
   0.15361238 -0.00738908 -0.11820672  0.05626023 -0.22147988 -0.11218598
   0.21505609 -0.02812393 -0.07780254  0.09866741 -0.01664045 -0.08088753
   0.14844209  0.23220608 -0.02970237  0.0626581   0.1015      0.11883694
  -0.1222671  -0.18356231 -0.1695067   0.16631526 -0.15677905  0.03313833
   0.06374329 -0.14307049 -0.00248845  0.21843916]
 [-0.10632578 -0.22327787  0.0079924   0.10463154 -0.06797758  0.17857444
  -0.21734305  0.1662525   0.1616348  -0.10080458  0.07664502 -0.02558598
  -0.03153844  0.17064723 -0.20951547 -0.1571661   0.07545358 -0.11955569
  -0.12290193 -0.04065235  0.16034156 -0.00812726 -0.04549991  0.14282784
  -0.1992736   0.18129742  0.16089946 -0.0321186   0.05664757  0.20627406
  -0.04450291 -0.1391378  -0.15802951 -0.20482928 -0.18631826 -0.20803677
   0.10360652 -0.23181473  0.12579814  0.15612033 -0.06692289  0.13527036
   0.04136452 -0.05644605 -0.14420736 -0.11110279 -0.13136217  0.05290347
   0.11985654 -0.22334796  0.04418018  0.03657806 -0.19049607  0.19886658
   0.1759724  -0.07000865  0.18472001 -0.19326112 -0.04905452 -0.14967723
  -0.02028644 -0.13099217 -0.12756202  0.00553541  0.17835554 -0.00807413
  -0.11737415 -0.11546775 -0.18748254  0.14848384 -0.08499184  0.01973993
   0.0359005   0.1781426   0.16608527  0.12669274 -0.11589483  0.1675601
   0.01894522  0.12290031 -0.08643296  0.18975312 -0.10086066  0.0873746
   0.11606389 -0.15357992  0.16645288  0.20536935  0.00885759  0.21139753
   0.16621038 -0.17852071 -0.00658603  0.01212795 -0.14206186  0.0966095
   0.01357485 -0.19708216 -0.20309532 -0.21773772]
 [-0.06486481  0.05218521 -0.13839038 -0.13702828 -0.22543551 -0.006915
   0.02982739 -0.15398857 -0.06282721  0.0748708  -0.07084128  0.193679
  -0.18511975 -0.13225816  0.18669194  0.15770963  0.2038579  -0.0020543
  -0.18930162 -0.1984573  -0.06376809 -0.19166887 -0.00418343  0.1549879
  -0.19832449 -0.05571103 -0.15945932 -0.02101249 -0.09980519 -0.1889883
   0.20500144 -0.00082844  0.19289666  0.23000097 -0.1477935   0.08229157
  -0.02464934  0.04785255  0.07375965 -0.18092965 -0.11949371  0.13513634
   0.06472364  0.01492043 -0.1277952   0.00723116  0.16772267  0.08338255
   0.05270085 -0.17231348  0.21956217  0.01540855  0.00983733  0.06116688
   0.0047456  -0.22790581 -0.05244631  0.21094981 -0.2196374   0.18554634
  -0.00134017  0.2050533  -0.12951134 -0.08303092 -0.14277962  0.18292385
   0.14953253 -0.1715404   0.1728319   0.0597693  -0.1761058   0.22304851
   0.20793632 -0.16834003 -0.13829404  0.19003397 -0.16253129 -0.09105472
   0.12481952  0.03161049  0.15667063 -0.16200837 -0.12481407  0.19992632
  -0.13205114  0.1643554   0.22365072  0.21727192  0.18474942 -0.23043291
  -0.05532905  0.18539944  0.19972807 -0.1287079   0.20807952  0.13193527
  -0.02723691  0.19615236 -0.2313829   0.0622068 ]
 [ 0.01106252  0.15737334 -0.17479348  0.06740901  0.1578005   0.01815134
  -0.04865305 -0.20071334  0.21867955  0.05215353  0.17317232  0.17973351
   0.23094618 -0.16438407  0.08258814 -0.05786172  0.14917153  0.09700653
   0.05009645  0.14533249 -0.05471799  0.07131526 -0.22184727 -0.13670191
   0.00294149  0.11098245 -0.17498402 -0.17736724  0.13629377  0.03480884
  -0.02087356 -0.08967981 -0.16013236 -0.23135556  0.06220022  0.09227568
   0.08539107  0.04654318  0.20976582  0.11166623 -0.02619042 -0.02605739
  -0.19160761  0.11887124  0.08498466  0.16664928 -0.02459745  0.2191506
   0.13644165 -0.10082251  0.21851316 -0.10301535 -0.2216173  -0.17417975
   0.04422092 -0.10870031 -0.1364127  -0.08466598  0.12196168 -0.00212568
  -0.21041518 -0.0673379  -0.05438946  0.07771239 -0.07983029 -0.20288862
   0.09353432 -0.1673393   0.14667183 -0.02547662  0.0549455  -0.08526857
   0.15315479 -0.22939989 -0.1490013  -0.16630305  0.209108   -0.11497162
   0.20121324  0.23312256  0.02933443 -0.10594396  0.05978817  0.10426518
  -0.06032395 -0.11357599 -0.02587514 -0.15372357 -0.21716437 -0.11090244
  -0.22098112  0.22349748 -0.06296408 -0.03760178 -0.2253913  -0.0175364
   0.22352648 -0.06220736  0.12262836  0.03443128]]>. This optimizer can only be called for the variables it was originally built with. When working with a new set of variables, you should recreate a new optimizer instance.