In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import warnings
warnings.filterwarnings('ignore')

import tensorflow as tf
tf.get_logger().setLevel('ERROR')


In [3]:
import numpy as np
import pandas as pd
from sklearn.utils import shuffle
from sklearn.preprocessing import StandardScaler

signal_real = pd.read_csv("./combined-dataset/real-signal-injected.csv", header=None).astype(float)
signal_syn = pd.read_csv("./combined-dataset/synthetic-signal-injected.csv", header=None).astype(float)
noise_real = pd.read_csv("./combined-dataset/real-noise.csv", header=None).astype(float)
noise_syn = pd.read_csv("./combined-dataset/synthetic-noise.csv", header=None).astype(float)

signal_real["label"] = 1
signal_syn["label"] = 1
noise_real["label"] = 0
noise_syn["label"] = 0

train_data = pd.concat([signal_real, signal_syn, noise_real, noise_syn], axis = 0)
train_data = shuffle(train_data, random_state = 42)

X = train_data.iloc[:, :-1].values
y = train_data["label"].values.astype(np.float32)

noise_all = pd.concat([noise_real, noise_syn], axis = 0)

scaler = StandardScaler()
scaler.fit(noise_all.iloc[:, :-1])
X = scaler.transform(X)

X = np.expand_dims(X, axis=-1)

In [4]:
import numpy as np
import tensorflow as tf
from sklearn.metrics import roc_auc_score, classification_report
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input, Dense, Dropout, LayerNormalization, Conv1D, MaxPooling1D,
    GlobalAveragePooling1D, Add, MultiHeadAttention, BatchNormalization
)
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# ========= ✅ 1. Preprocessing =========
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.squeeze(-1))  # -> (N, 16384)
X_scaled = X_scaled[..., np.newaxis]            # -> (N, 16384, 1)

X_train, X_val, y_train, y_val = train_test_split(
    X_scaled, y, test_size=0.2, stratify=y, random_state=42
)

# ========= ✅ 2. Transformer Block =========
def transformer_block(x, head_size=32, num_heads=2, ff_dim=64, dropout=0.1):
    # Self-attention
    attn = MultiHeadAttention(num_heads=num_heads, key_dim=head_size)(x, x)
    x = Add()([x, attn])
    x = LayerNormalization()(x)

    # Feed-forward
    ff = Dense(ff_dim, activation='relu')(x)
    ff = Dense(x.shape[-1])(ff)
    x = Add()([x, ff])
    x = LayerNormalization()(x)
    return x

# ========= ✅ 3. Model Builder =========
def build_cnn_transformer(input_shape=(16384, 1)):
    inp = Input(shape=input_shape)

    # --- CNN feature extractor ---
    x = Conv1D(32, 16, strides=4, padding='same')(inp)   # (4096, 32)
    x = BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    x = MaxPooling1D(pool_size=4)(x)                     # (1024, 32)

    x = Conv1D(64, 8, strides=2, padding='same')(x)      # (512, 64)
    x = BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    x = MaxPooling1D(pool_size=2)(x)                     # (256, 64)

    # --- Transformer block ---
    x = transformer_block(x, head_size=32, num_heads=2, ff_dim=64, dropout=0.1)

    # --- Classification ---
    x = GlobalAveragePooling1D()(x)
    x = Dropout(0.3)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.3)(x)
    out = Dense(1, activation='sigmoid')(x)

    model = Model(inputs=inp, outputs=out)
    model.compile(optimizer=tf.keras.optimizers.Adam(1e-4),
                  loss='binary_crossentropy',
                  metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])
    return model

# ========= ✅ 4. Train =========
model = build_cnn_transformer()
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=50,
    batch_size=8,
    callbacks=[tf.keras.callbacks.EarlyStopping(patience=5, restore_best_weights=True)],
    verbose=1
)

# ========= ✅ 5. Evaluate =========
y_pred = model.predict(X_val).ravel()
print("ROC AUC:", roc_auc_score(y_val, y_pred))
print(classification_report(y_val, (y_pred > 0.5).astype(int)))


Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
ROC AUC: 0.915999276297433
              precision    recall  f1-score   support

         0.0       0.75      0.94      0.83      1792
         1.0       0.92      0.68      0.78      1792

    accuracy                           0.81      3584
   macro avg       0.83      0.81      0.81      3584
weighted avg       0.83      0.81      0.81      3584



In [6]:
model.save("final_cnn_transformer_model.h5")