In [None]:
# !pip install --upgrade pandas-ta scikit-learn numpy

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, metrics
import numpy as np
import pandas as pd
import pandas_ta as ta
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import os
import math


In [None]:
class Config:
    # List the exact names of your CSV files here.
    DATA_FILES = ['AAPL.csv', 'AMZN.csv', 'MSFT.csv']

    # --- KEY CHANGE: Our target is now 'returns' ---
    TARGET_COL = 'returns'

    # A stable sequence length that is divisible by 4
    SEQUENCE_LENGTH = 16

    # Stable starting hyperparameters
    LATENT_DIM = 64
    BATCH_SIZE = 16
    EPOCHS = 40
    LEARNING_RATE = 1e-4
    DIFFUSION_STEPS = 100

    # Number of input features for the main forecaster
    NUM_FEATURES = 5  # Close, RSI, MACD, MACDs, MACDh

In [None]:
def load_and_preprocess_data(file_list, sequence_length):
    """
    MODIFIED to predict percentage returns instead of absolute price.
    This is the professional standard for financial time series.
    """
    all_files = file_list
    if not all_files:
        raise ValueError("The DATA_FILES list in the Config class is empty.")

    for f in all_files:
        if not os.path.exists(f):
            raise FileNotFoundError(f"The file '{f}' was not found. Please ensure it has been uploaded.")

    all_x, all_y = [], []

    feature_scaler = MinMaxScaler(feature_range=(0, 1))
    # Returns are small numbers centered around 0. We scale them to be between -1 and 1.
    target_scaler = MinMaxScaler(feature_range=(-1, 1))

    # --- Create a temporary DataFrame to fit the scalers consistently ---
    df_list_for_scaling = []
    for f in all_files:

        df = pd.read_csv(f) # Fallback if 'Date' column is not there or not parsable
        df.columns = df.columns.str.lower()
        df['date'] = pd.to_datetime(df['date'])
        df['returns'] = df['close'].pct_change()
        df_list_for_scaling.append(df)

    full_df_for_scaling = pd.concat(df_list_for_scaling)
    full_df_for_scaling.ta.rsi(close='close', length=14, append=True)
    full_df_for_scaling.ta.macd(close='close', fast=12, slow=26, signal=9, append=True)
    full_df_for_scaling.dropna(inplace=True)

    feature_cols = ['close', 'RSI_14', 'MACD_12_26_9', 'MACDs_12_26_9', 'MACDh_12_26_9']
    feature_scaler.fit(full_df_for_scaling[feature_cols])
    target_scaler.fit(full_df_for_scaling[['returns']])

    # --- Process each file individually to create sequences ---
    for f in all_files:

        df = pd.read_csv(f)
        df.columns = df.columns.str.lower()
        df.sort_values(by='date', inplace=True)
        df['returns'] = df['close'].pct_change()
        df.ta.rsi(close='close', length=14, append=True)
        # Use high/low if available for indicators like ATR in the future
        # df.ta.atr(high='high', low='low', close='close', length=14, append=True)
        df.ta.macd(close='close', fast=12, slow=26, signal=9, append=True)
        df.dropna(inplace=True)

        if len(df) < 2 * sequence_length:
            continue

        features_df = df[feature_cols]
        target_df = df[['returns']]

        scaled_features = feature_scaler.transform(features_df)
        scaled_target = target_scaler.transform(target_df)

        for i in range(len(scaled_features) - 2 * sequence_length):
            all_x.append(scaled_features[i:i + sequence_length])
            all_y.append(scaled_target[i + sequence_length:i + 2 * sequence_length])

    return np.array(all_x, dtype=np.float32), np.array(all_y, dtype=np.float32), target_scaler

In [None]:
def cosine_beta_schedule(timesteps, s=0.008):
    steps = timesteps + 1; x = tf.linspace(0.0, float(timesteps), steps)
    alphas_cumprod = tf.cos(((x / float(timesteps)) + s) / (1 + s) * math.pi * 0.5) ** 2
    alphas_cumprod = alphas_cumprod / alphas_cumprod[0]; betas = 1 - (alphas_cumprod[1:] / alphas_cumprod[:-1])
    return tf.clip_by_value(betas, 0.0001, 0.9999)

def get_diffusion_variables(beta_schedule):
    alphas = 1.0 - beta_schedule; alphas_cumprod = tf.math.cumprod(alphas, axis=0)
    return {"betas": beta_schedule, "alphas_cumprod": alphas_cumprod, "sqrt_alphas_cumprod": tf.sqrt(alphas_cumprod), "sqrt_one_minus_alphas_cumprod": tf.sqrt(1.0 - alphas_cumprod)}

def forward_diffusion(x0, t, diff_vars):
    noise = tf.random.normal(shape=tf.shape(x0)); sqrt_alphas_cumprod_t = tf.gather(diff_vars["sqrt_alphas_cumprod"], t); sqrt_one_minus_alphas_cumprod_t = tf.gather(diff_vars["sqrt_one_minus_alphas_cumprod"], t)
    sqrt_alphas_cumprod_t = tf.reshape(sqrt_alphas_cumprod_t, [-1, 1, 1]); sqrt_one_minus_alphas_cumprod_t = tf.reshape(sqrt_one_minus_alphas_cumprod_t, [-1, 1, 1])
    return sqrt_alphas_cumprod_t * x0 + sqrt_one_minus_alphas_cumprod_t * noise, noise

In [None]:
# --- TensorFlow/Keras D-Va Model Architecture ---

class SE(layers.Layer):
    def __init__(self, in_channels, r=4, **kwargs):
        super().__init__(**kwargs); self.squeeze = layers.GlobalAveragePooling1D(); self.excitation = models.Sequential([layers.Dense(in_channels // r, activation='relu'), layers.Dense(in_channels, activation='sigmoid')])
    def call(self, inputs):
        x = self.squeeze(inputs); x = self.excitation(x); x = tf.expand_dims(x, axis=1); return inputs * x

In [None]:
# Encoder Residual Cell
class EncoderResidualCell(layers.Layer):
    def __init__(self, out_channels, stride=1, **kwargs):
        super().__init__(**kwargs)
        self.stride = stride
        self.conv_layers = None
        self.shortcut = None
        self.out_channels = out_channels

    def build(self, input_shape):
        in_channels = input_shape[-1]
        self.conv_layers = models.Sequential([
            layers.BatchNormalization(),
            layers.Activation('swish'),
            layers.Conv1D(self.out_channels, 3, padding='same', strides=self.stride),
            layers.BatchNormalization(),
            layers.Activation('swish'),
            layers.Conv1D(self.out_channels, 3, padding='same'),
            SE(self.out_channels)
        ])
        if self.stride > 1 or in_channels != self.out_channels:
            self.shortcut = layers.Conv1D(self.out_channels, 1, strides=self.stride)
        else:
            self.shortcut = layers.Lambda(lambda x: x)

    def call(self, inputs):
        return self.conv_layers(inputs) + self.shortcut(inputs)

In [None]:
# Decoder Residual Cell
class DecoderResidualCell(layers.Layer):
    def __init__(self, out_channels, scale_factor=2, **kwargs):
        super().__init__(**kwargs)
        self.scale_factor = scale_factor
        self.out_channels = out_channels
        self.upsample = layers.UpSampling1D(size=self.scale_factor)
        self.conv_layers = None
        self.shortcut = None

    def build(self, input_shape):
        in_channels = input_shape[-1]
        self.conv_layers = models.Sequential([
            layers.BatchNormalization(),
            layers.Activation('swish'),
            layers.Conv1D(self.out_channels, 3, padding='same'),
            layers.BatchNormalization(),
            layers.Activation('swish'),
            layers.Conv1D(self.out_channels, 3, padding='same'),
            SE(self.out_channels)
        ])
        self.shortcut = layers.Conv1D(self.out_channels, 1)

    def call(self, inputs):
        x_up = self.upsample(inputs)
        return self.conv_layers(x_up) + self.shortcut(x_up)

In [None]:
# Sampling layer for the VAE reparameterization trick
class Sampling(layers.Layer):
    def call(self, inputs):
        z_mean, z_log_var = inputs
        batch = tf.shape(z_mean)[0]
        dim = tf.shape(z_mean)[1]
        epsilon = tf.random.normal(shape=(batch, dim))
        return z_mean + tf.exp(0.5 * z_log_var) * epsilon

In [None]:
def build_nvae(sequence_length, latent_dim, num_features):
    encoder_input = layers.Input(shape=(sequence_length, num_features))
    x = EncoderResidualCell(32)(encoder_input)
    x = EncoderResidualCell(64, stride=2)(x)
    x = EncoderResidualCell(128, stride=2)(x)
    x = layers.GlobalAveragePooling1D()(x)
    z_mean = layers.Dense(latent_dim, name="z_mean")(x)
    z_log_var = layers.Dense(latent_dim, name="z_log_var")(x)
    z = Sampling()([z_mean, z_log_var])
    encoder = models.Model(encoder_input, [z_mean, z_log_var, z], name="encoder")

    latent_input = layers.Input(shape=(latent_dim,))
    x = layers.Dense(128)(latent_input)
    x = layers.Reshape((1, 128))(x)
    x = DecoderResidualCell(64, scale_factor=4)(x)
    x = DecoderResidualCell(32, scale_factor=2)(x)
    x = DecoderResidualCell(16, scale_factor=2)(x)
    x = layers.Conv1D(1, 1)(x)
    decoder_output = x
    decoder = models.Model(latent_input, decoder_output, name="decoder")
    return encoder, decoder

In [None]:
# The complete D-Va Model with custom training logic
class DVaModel(models.Model):
    def __init__(self, sequence_length, latent_dim, num_features, diff_vars, **kwargs):
        super().__init__(**kwargs)
        self.sequence_length = sequence_length
        self.latent_dim = latent_dim
        self.num_features = num_features
        self.diff_vars = diff_vars
        self.nvae_encoder, self.nvae_decoder = build_nvae(sequence_length, latent_dim, num_features)
        self.denoise_encoder, self.denoise_decoder = build_nvae(sequence_length, latent_dim, 1)
        self.total_loss_tracker = metrics.Mean(name="total_loss")
        self.mse_loss_tracker = metrics.Mean(name="mse_loss")
        self.kl_loss_tracker = metrics.Mean(name="kl_loss")
        self.dsm_loss_tracker = metrics.Mean(name="dsm_loss")

    @property
    def metrics(self):
        return [self.total_loss_tracker, self.mse_loss_tracker, self.kl_loss_tracker, self.dsm_loss_tracker]

    def train_step(self, data):
        x_batch, y_batch = data
        with tf.GradientTape() as tape:
            t = tf.random.uniform([tf.shape(x_batch)[0]], minval=0, maxval=len(self.diff_vars['betas']), dtype=tf.int32)
            xt, noise_x = forward_diffusion(x_batch, t, self.diff_vars)
            yt, noise_y = forward_diffusion(y_batch, t, self.diff_vars)
            z_mean, z_log_var, z = self.nvae_encoder(xt, training=True)
            y_recon = self.nvae_decoder(z, training=True)
            y_recon_reshaped = tf.reshape(y_recon, [-1, self.sequence_length, 1])
            _, _, z_denoise = self.denoise_encoder(y_recon_reshaped, training=True)
            denoise_grad_pred = self.denoise_decoder(z_denoise, training=True)
            mse_loss_fn = tf.keras.losses.MeanSquaredError()
            loss_mse = mse_loss_fn(yt, y_recon)
            loss_kl = -0.5 * tf.reduce_mean(tf.reduce_sum(1 + z_log_var - tf.square(z_mean) - tf.exp(z_log_var), axis=1))
            loss_dsm = mse_loss_fn(noise_y, denoise_grad_pred)
            total_loss = loss_mse + 0.1 * loss_kl + 0.5 * loss_dsm

        grads = tape.gradient(total_loss, self.trainable_weights)
        self.optimizer.apply_gradients(zip(grads, self.trainable_weights))

        self.total_loss_tracker.update_state(total_loss)
        self.mse_loss_tracker.update_state(loss_mse)
        self.kl_loss_tracker.update_state(loss_kl)
        self.dsm_loss_tracker.update_state(loss_dsm)

        return {m.name: m.result() for m in self.metrics}

    def predict_step(self, x):
        y_pred_noisy = self.nvae_decoder(self.nvae_encoder(x, training=False)[2], training=False)
        noise_est = self.denoise_decoder(self.denoise_encoder(y_pred_noisy, training=False)[2], training=False)
        return y_pred_noisy - noise_est

In [None]:
# --- Main Training and Evaluation Script (CORRECTED) ---
if __name__ == '__main__':
    cfg = Config()
    tf.random.set_seed(42)
    np.random.seed(42)

    # --- Part 1: Training ---
    try:
        x, y, scaler = load_and_preprocess_data(cfg.DATA_FILES, cfg.SEQUENCE_LENGTH)
        x_train, _, y_train, _ = train_test_split(x, y, test_size=0.2, shuffle=False)
        train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(1024).batch(cfg.BATCH_SIZE)
    except Exception as e:
        print(f"Error during data loading: {e}")
        exit()

    betas = cosine_beta_schedule(timesteps=cfg.DIFFUSION_STEPS)
    diff_vars = get_diffusion_variables(betas)
    dva_model = DVaModel(cfg.SEQUENCE_LENGTH, cfg.LATENT_DIM, cfg.NUM_FEATURES, diff_vars)
    dva_model.compile(optimizer=optimizers.AdamW(learning_rate=cfg.LEARNING_RATE))

    print("--- Starting Training (Predicting Returns) ---")
    dva_model.fit(train_dataset, epochs=cfg.EPOCHS)

    # --- Part 2: Evaluation ---
    print("\n--- Evaluating on Test Set for Each Stock Separately ---")

    for stock_file in cfg.DATA_FILES:
        try:
            stock_name = stock_file.split('.')[0]
            print(f"\n--- Results for {stock_name} ---")

            single_stock_list = [stock_file]
            x_stock, y_stock, stock_scaler = load_and_preprocess_data(single_stock_list, cfg.SEQUENCE_LENGTH)

            if len(x_stock) == 0:
                print(f"Skipping {stock_file}: Not enough data.")
                continue

            _, x_test_stock, _, y_test_stock = train_test_split(x_stock, y_stock, test_size=0.2, shuffle=False)

            if len(x_test_stock) == 0:
                print(f"Skipping {stock_file}: No test data available after split.")
                continue

            predicted_y_scaled = dva_model.predict_step(x_test_stock).numpy()
            actual_y_scaled = y_test_stock

            predicted_returns = stock_scaler.inverse_transform(predicted_y_scaled.reshape(-1, cfg.SEQUENCE_LENGTH))
            actual_returns = stock_scaler.inverse_transform(actual_y_scaled.reshape(-1, cfg.SEQUENCE_LENGTH))

            predicted_next_day_return = predicted_returns[:, 0]
            actual_next_day_return = actual_returns[:, 0]

            # --- Plotting ---
            plt.style.use('seaborn-v0_8-darkgrid')
            plt.figure(figsize=(18, 9))
            plt.plot(actual_next_day_return, color='blue', label='Actual Next-Day Return', alpha=0.7)
            plt.plot(predicted_next_day_return, color='red', label='Predicted Next-Day Return', linestyle='--', alpha=0.7)
            plt.title(f'D-Va Model: Predicted vs. Actual Returns for {stock_name}', fontsize=18)
            plt.xlabel(f'Time (Test Set for {stock_name})', fontsize=14)
            plt.ylabel('Percentage Return', fontsize=14)
            plt.axhline(0, color='grey', linestyle='--')
            plt.legend()
            plt.show()

            # --- Metrics ---
            mae = np.mean(np.abs(predicted_next_day_return - actual_next_day_return))
            rmse = np.sqrt(np.mean((predicted_next_day_return - actual_next_day_return)**2))
            print(f"Metrics for {stock_name}:")
            print(f"  Mean Absolute Error (MAE): {mae:.6f}")
            print(f"  Root Mean Squared Error (RMSE): {rmse:.6f}")

            trading_signals = np.sign(predicted_next_day_return)
            strategy_returns = trading_signals * actual_next_day_return

            if np.std(strategy_returns) > 0:
                sharpe_ratio = (np.mean(strategy_returns) / np.std(strategy_returns)) * np.sqrt(252)
            else:
                sharpe_ratio = 0.0
            print(f"  Annualized Sharpe Ratio: {sharpe_ratio:.4f}")

        except Exception as e:
            print(f"\nCould not process or plot for {stock_file}. Error: {e}")


Error during data loading: 'Cannot get left slice bound for non-unique label: np.int64(25)'
--- Starting Training (Predicting Returns) ---


NameError: name 'train_dataset' is not defined