# 🧠 Multiscale CNN for Financial Time-Series Classification
This notebook covers data preprocessing, image tensor generation, and model training using a multi-scale CNN with SE attention and dual outputs.

In [1]:
# 📦 1. Import Libraries
import os
import numpy as np
import pandas as pd
import zipfile
from sklearn.model_selection import train_test_split
from scipy.stats import zscore

import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Conv2D, Concatenate, GlobalAveragePooling2D, Dense,
    Reshape, Multiply
)
from tensorflow.keras.callbacks import EarlyStopping

2025-05-22 20:34:18.041215: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-22 20:34:18.084445: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# 📂 2. Data Loading & Preprocessing
# def load_data(base_path, k):
#     data_frames = []
#     for i in range(k):
#         file_path = os.path.join(base_path, f"train_{i}.csv")
#         df = pd.read_csv(file_path)
#         data_frames.append(df)
#     return pd.concat(data_frames, ignore_index=True)
def load_data_from_zip(zip_file_path, k):
    data_frames = []
    with zipfile.ZipFile(zip_file_path, 'r') as zf:
        for i in range(k):
            # Assuming files are named train_0.csv, train_1.csv, etc. within the zip
            file_name_in_zip = f"train_{i}.csv"
            try:
                with zf.open(file_name_in_zip) as f:
                    df = pd.read_csv(f)
                    data_frames.append(df)
            except KeyError:
                print(f"Warning: {file_name_in_zip} not found in zip file.")
                continue # Skip to the next file if not found

    if not data_frames:
        raise ValueError("No data files found in the zip archive.")

    return pd.concat(data_frames, ignore_index=True)

def preprocess_data(df, target_col='responder_6'):
    missing_ratio = df.isnull().mean()
    df.drop(columns=missing_ratio[missing_ratio > 0.4].index, inplace=True)
    df.fillna(df.median(numeric_only=True), inplace=True)
    feature_cols = [col for col in df.columns if col.startswith('feature_')]
    return df, feature_cols, target_col

def mark_noise(features, window_size=20, step=10, std_multiplier=3):
    n_samples, _ = features.shape
    noise_flags = np.zeros(n_samples, dtype=bool)
    for i in range(0, n_samples - window_size + 1, step):
        window = features[i:i + window_size]
        stds = np.std(window, axis=0)
        if np.any(stds > std_multiplier * np.std(features, axis=0)):
            noise_flags[i:i + window_size] = True
    return noise_flags

In [3]:
# 📈 3. Image Tensor Construction
def create_image_data(df, feature_cols, target_col, window_size=60, step=30, img_size=(60, 60)):
    features = df[feature_cols].apply(zscore).clip(-3, 3).values
    targets = df[target_col].values
    noise_labels = mark_noise(features, window_size, step)

    X_images, y_target, y_noise = [], [], []
    for i in range(0, len(df) - window_size + 1, step):
        window_feat = features[i:i + window_size]
        if window_feat.shape != img_size:
            window_feat = np.resize(window_feat, img_size)
        X_images.append(window_feat)
        y_target.append(targets[i + window_size - 1])
        y_noise.append(int(np.any(noise_labels[i:i + window_size])))

    X_images = np.array(X_images)[..., np.newaxis]
    return np.array(X_images), np.array(y_target), np.array(y_noise)

In [4]:
# 🧠 4. Multiscale CNN Model with SE and Dual Outputs
def build_multiscale_cnn(input_shape):
    inputs = Input(shape=input_shape)
    feature_small = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs)
    feature_large = Conv2D(32, (7, 7), activation='relu', padding='same')(inputs)
    combined = Concatenate(axis=-1)([feature_small, feature_large])

    se = GlobalAveragePooling2D()(combined)
    se = Dense(32, activation='relu')(se)
    se = Dense(64, activation='sigmoid')(se)
    se = Reshape((1, 1, 64))(se)
    weighted = Multiply()([combined, se])

    refined = Conv2D(64, (3, 3), activation='relu', padding='same')(weighted)
    pooled = GlobalAveragePooling2D()(refined)

    out_target = Dense(1, activation='sigmoid', name='target_output')(pooled)
    out_noise = Dense(1, activation='sigmoid', name='noise_output')(pooled)
    return Model(inputs, outputs=[out_target, out_noise])

In [5]:
# 🚀 5. Main Training Pipeline
base_path = "your_dataset_path"  # TODO: Replace with your actual path
df = load_data(base_path, k=2)
df, feature_cols, target_col = preprocess_data(df)
X, y_target, y_noise = create_image_data(df, feature_cols, target_col)

X_train, X_test, y_train_t, y_test_t, y_train_n, y_test_n = train_test_split(
    X, y_target, y_noise, test_size=0.2, random_state=42
)

model = build_multiscale_cnn(input_shape=(60, 60, 1))
model.compile(
    optimizer='adam',
    loss={'target_output': 'binary_crossentropy', 'noise_output': 'binary_crossentropy'},
    loss_weights={'target_output': 1.0, 'noise_output': 0.3},
    metrics={'target_output': 'accuracy', 'noise_output': 'accuracy'}
)
model.summary()
model.fit(
    X_train, {'target_output': y_train_t, 'noise_output': y_train_n},
    validation_data=(X_test, {'target_output': y_test_t, 'noise_output': y_test_n}),
    epochs=10,
    batch_size=64,
    callbacks=[EarlyStopping(patience=3, restore_best_weights=True)],
    verbose=1
)

NameError: name 'load_data' is not defined