### Project Phase 1

Since SParck cannt deal with .h5 (or HDF5) file format (Hierarchical Data Format version 5) I was forced to mount my Databricks with the storage account directly, instead of grantin reading acces with the normal way we used to do with Spark.

I took the solution from: "https://www.databricks.com/notebooks/cnn-car-class/load-images-in-hdf5.html"

And in order to mount it, I had the problem that mounting a drive (dbutils.fs.mount) changes the file system for everyone on the cluster. Therefore, for security reasons, Databricks blocks this command on clusters shared by multiple users unless the cluster is "Dedicated" (assigned to a Single User).

so I went back a step, and changed the Access mode to single user from the cluster settings page, and I have assigned it to my account.
after that I was able to do the mounting and interact with the data.

In [0]:
# Add the Storage Account, Container, and reference the secret to pass the SAS Token
MOUNTPOINT = "/mnt/databricks/demo"
STORAGE_ACCOUNT = "scadatalake"
CONTAINER = "lakehouse"
SASTOKEN = "**********D"


# Do not change these values
SOURCE = "wasbs://{container}@{storage_acct}.blob.core.windows.net/".format(container=CONTAINER, storage_acct=STORAGE_ACCOUNT)
URI = "fs.azure.sas.{container}.{storage_acct}.blob.core.windows.net".format(container=CONTAINER, storage_acct=STORAGE_ACCOUNT)

try:
  dbutils.fs.mount(
    source=SOURCE,
    mount_point=MOUNTPOINT,
    extra_configs={URI:SASTOKEN})
except Exception as e:
  if "Directory already mounted" in str(e):
    pass # Ignore error if already mounted.
  else:
    raise e
print("Success.")


Success.


### Project Phase 2
in this part we will buld the model and train it 


Imports and Environment Setup

In [0]:
%pip install tensorflow

Collecting tensorflow
  Downloading tensorflow-2.20.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.5 kB)
Collecting absl-py>=1.0.0 (from tensorflow)
  Downloading absl_py-2.3.1-py3-none-any.whl.metadata (3.3 kB)
Collecting flatbuffers>=24.3.25 (from tensorflow)
  Downloading flatbuffers-25.9.23-py2.py3-none-any.whl.metadata (875 bytes)
Collecting gast!=0.5.0,!=0.5.1,!=0.5.2,>=0.2.1 (from tensorflow)
  Downloading gast-0.7.0-py3-none-any.whl.metadata (1.5 kB)
Collecting google_pasta>=0.1.1 (from tensorflow)
  Downloading google_pasta-0.2.0-py3-none-any.whl.metadata (814 bytes)
Collecting libclang>=13.0.0 (from tensorflow)
  Downloading libclang-18.1.1-py2.py3-none-manylinux2010_x86_64.whl.metadata (5.2 kB)
Collecting opt_einsum>=2.3.2 (from tensorflow)
  Downloading opt_einsum-3.4.0-py3-none-any.whl.metadata (6.3 kB)
Collecting protobuf>=5.28.0 (from tensorflow)
  Downloading protobuf-6.33.1-cp39-abi3-manylinux2014_x86_64.whl.metadata (593 bytes)
Collecting ter

In [0]:
%restart_python

In [0]:
import os
import sys
import h5py
import numpy as np
import tensorflow as tf

from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.layers import (Flatten, Dense, Input, Conv1D, MaxPooling1D, 
                                     GlobalAveragePooling1D, GlobalMaxPooling1D, 
                                     AveragePooling1D, BatchNormalization, Activation, Add, add)
from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

print(f"TensorFlow Version: {tf.__version__}")
print(f"Keras Version: {tf.keras.__version__}")

TensorFlow Version: 2.20.0
Keras Version: 3.12.0


Configuration & Parameters

In [0]:
# --- CRITICAL FIX: Disable HDF5 File Locking ---
# This prevents the 'errno = 22' crash on Databricks/Azure storage
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

# --- File Paths ---
ASCAD_DB_FILE = "/dbfs/mnt/databricks/demo/raw/ASCAD_data/ASCAD_databases/ASCAD.h5"
MODEL_SAVE_FILE = "./best_cnn_model.h5"

# --- Hyperparameters ---
NETWORK_TYPE = "cnn_best" 
BATCH_SIZE = 200
EPOCHS = 2
VALIDATION_SPLIT = 0.1 # Use 10% of data for validation
EARLY_STOPPING = True  # Stop if model stops improving

Helper Functions (Data Loading)

In [0]:
def check_file_exists(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"Error: provided file path '{file_path}' does not exist!")

def load_ascad(ascad_database_file, load_metadata=False):
    check_file_exists(ascad_database_file)
    try:
        in_file = h5py.File(ascad_database_file, "r")
    except:
        raise ValueError(f"Error: can't open HDF5 file '{ascad_database_file}' (it might be malformed)")

    # Load profiling traces (Training Data)
    X_profiling = np.array(in_file['Profiling_traces/traces'], dtype=np.float32) # Standardize to float32 for TF
    Y_profiling = np.array(in_file['Profiling_traces/labels'])
    
    # Load attacking traces (Test Data - optional for training but good to have)
    X_attack = np.array(in_file['Attack_traces/traces'], dtype=np.float32)
    Y_attack = np.array(in_file['Attack_traces/labels'])
    
    print(f"Loaded Profiling Traces: {X_profiling.shape}")
    print(f"Loaded Profiling Labels: {Y_profiling.shape}")

    if load_metadata == False:
        return (X_profiling, Y_profiling), (X_attack, Y_attack)
    else:
        return (X_profiling, Y_profiling), (X_attack, Y_attack), (in_file['Profiling_traces/metadata'], in_file['Attack_traces/metadata'])

# Helper for handling multi-label outputs (needed for ResNet architectures)
def multilabel_to_categorical(Y):
    y = {}
    y['alpha_output'] = to_categorical(Y['alpha_mask'], num_classes=256)
    y['beta_output'] = to_categorical(Y['beta_mask'], num_classes=256)
    for i in range(16):
        y['sbox_'+str(i)+'_output'] = to_categorical(Y['sbox_masked'][:,i], num_classes=256)
    for i in range(16):
        y['permind_'+str(i)+'_output'] = to_categorical(Y['perm_index'][:,i], num_classes=16)
    return y

Model Architectures

In [0]:
### CNN Best model (Optimized for SCA)
def cnn_best(classes=256, input_dim=700):
    input_shape = (input_dim, 1)
    img_input = Input(shape=input_shape)
    
    # Block 1
    x = Conv1D(64, 11, activation='relu', padding='same', name='block1_conv1')(img_input)
    x = AveragePooling1D(2, strides=2, name='block1_pool')(x)
    # Block 2
    x = Conv1D(128, 11, activation='relu', padding='same', name='block2_conv1')(x)
    x = AveragePooling1D(2, strides=2, name='block2_pool')(x)
    # Block 3
    x = Conv1D(256, 11, activation='relu', padding='same', name='block3_conv1')(x)
    x = AveragePooling1D(2, strides=2, name='block3_pool')(x)
    # Block 4
    x = Conv1D(512, 11, activation='relu', padding='same', name='block4_conv1')(x)
    x = AveragePooling1D(2, strides=2, name='block4_pool')(x)
    # Block 5
    x = Conv1D(512, 11, activation='relu', padding='same', name='block5_conv1')(x)
    x = AveragePooling1D(2, strides=2, name='block5_pool')(x)
    
    # Classification block
    x = Flatten(name='flatten')(x)
    x = Dense(4096, activation='relu', name='fc1')(x)
    x = Dense(4096, activation='relu', name='fc2')(x)
    x = Dense(classes, activation='softmax', name='predictions')(x)

    inputs = img_input
    model = Model(inputs, x, name='cnn_best')
    optimizer = RMSprop(learning_rate=0.00001)
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

Training Execution Logic

In [0]:
def train_and_save(X_profiling, Y_profiling, network_type, save_path, epochs=75, batch_size=200):
    
    # 1. Determine Input Shape
    input_dim = len(X_profiling[0])
    print(f"Detected Input Dimension (Trace Length): {input_dim}")
    
    # 2. Reshape Data for CNN (Samples, TimeSteps, Channels)
    if network_type in ["cnn", "cnn_best", "cnn_best2", "multi_resnet"]:
        Reshaped_X = X_profiling.reshape((X_profiling.shape[0], X_profiling.shape[1], 1))
    else: # MLP
        Reshaped_X = X_profiling

    # 3. One-Hot Encode Labels
    # Assuming standard classification of 256 byte values
    y_encoded = to_categorical(Y_profiling, num_classes=256)

    # 4. Initialize Model
    if network_type == "mlp":
        model = mlp_best(input_dim=input_dim)
    elif network_type == "cnn_best":
        model = cnn_best(input_dim=input_dim)
    elif network_type == "cnn_best2":
        model = cnn_best2(input_dim=input_dim)
    elif network_type == "multi_resnet":
        model = resnet_v1((input_dim, 1), 19)
    else:
        print(f"Error: Unknown network type {network_type}")
        return None

    model.summary()

    # 5. Define Callbacks
    callbacks = [
        ModelCheckpoint(save_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    ]
    
    if EARLY_STOPPING:
        callbacks.append(EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True))

    # 6. Train
    print(f"Starting training for {epochs} epochs...")
    history = model.fit(
        x=Reshaped_X, 
        y=y_encoded, 
        batch_size=batch_size, 
        verbose=1, 
        validation_split=VALIDATION_SPLIT, 
        epochs=epochs, 
        callbacks=callbacks
    )
    
    print(f"Training complete. Model saved to {save_path}")
    return history, model

Training Execution Logic updated to handel .keras format correctly

In [0]:
def train_and_save(X_profiling, Y_profiling, network_type, save_path, epochs=75, batch_size=200):
    
    # 1. Determine Input Shape
    input_dim = len(X_profiling[0])
    
    # 2. Reshape Data
    if network_type in ["cnn", "cnn_best", "cnn_best2", "multi_resnet"]:
        Reshaped_X = X_profiling.reshape((X_profiling.shape[0], X_profiling.shape[1], 1))
    else: 
        Reshaped_X = X_profiling

    # 3. One-Hot Encode Labels
    y_encoded = to_categorical(Y_profiling, num_classes=256)

    # 4. Initialize Model
    if network_type == "mlp":
        model = mlp_best(input_dim=input_dim)
    elif network_type == "cnn_best":
        model = cnn_best(input_dim=input_dim)
    elif network_type == "cnn_best2":
        model = cnn_best2(input_dim=input_dim)
    elif network_type == "multi_resnet":
        model = resnet_v1((input_dim, 1), 19)
    else:
        print(f"Error: Unknown network type {network_type}")
        return None

    # 5. Define Callbacks
    # Note: We save to the local driver path first (save_path)
    callbacks = [
        ModelCheckpoint(save_path, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max')
    ]
    
    if EARLY_STOPPING:
        callbacks.append(EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True))

    # 6. Train
    print(f"Starting training for {epochs} epochs...")
    history = model.fit(
        x=Reshaped_X, 
        y=y_encoded, 
        batch_size=batch_size, 
        verbose=1, 
        validation_split=VALIDATION_SPLIT, 
        epochs=epochs, 
        callbacks=callbacks
    )
    
    print(f"Training complete. Model saved to {save_path}")
    return history, model

RUN

In [0]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

# 1. CRITICAL: Force Disable HDF5 Locking again (just to be safe)
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

# 2. DEFINITELY use .keras this time
# We save to /tmp/ first to ensure we are on the local driver disk, 
# preventing ANY interaction with Azure Blob storage during training.
SAFE_MODEL_PATH = "/tmp/best_cnn_model.keras" 

print(f"Force-saving model to: {SAFE_MODEL_PATH}")

# 3. Re-define the training logic to ensure it uses SAFE_MODEL_PATH
def run_safe_training():
    # Load Data (assuming load_ascad is already defined in previous cells)
    print("Loading Data...")
    (X_profiling, Y_profiling), (X_attack, Y_attack) = load_ascad(ASCAD_DB_FILE)
    
    input_dim = len(X_profiling[0])
    Reshaped_X = X_profiling.reshape((X_profiling.shape[0], X_profiling.shape[1], 1))
    y_encoded = to_categorical(Y_profiling, num_classes=256)

    # Re-initialize model
    print("Initializing CNN...")
    model = cnn_best(input_dim=input_dim) # Assuming cnn_best is defined

    # Callbacks
    callbacks = [
        # Explicitly using the .keras path
        ModelCheckpoint(SAFE_MODEL_PATH, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'),
        EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    ]

    print("Starting Training...")
    history = model.fit(
        x=Reshaped_X, 
        y=y_encoded, 
        batch_size=200, 
        verbose=1, 
        validation_split=0.1, 
        epochs=2, 
        callbacks=callbacks
    )
    return history

# 4. Execute
history = run_safe_training()

# 5. Move the file to your permanent storage after training finishes
final_storage_path = "/dbfs/mnt/databricks/demo/raw/ASCAD_data/trained_models/best_cnn_model.keras"

# Check if directory exists, if not create it
if not os.path.exists(os.path.dirname(final_storage_path)):
    os.makedirs(os.path.dirname(final_storage_path))

print(f"Moving model to permanent storage: {final_storage_path}")
dbutils.fs.cp(f"file:{SAFE_MODEL_PATH}", final_storage_path)
print("Success!")

Force-saving model to: /tmp/best_cnn_model.keras
Loading Data...
Loaded Profiling Traces: (50000, 700)
Loaded Profiling Labels: (50000,)
Initializing CNN...
Starting Training...
Epoch 1/2
[1m  1/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m16:13[0m 4s/step - accuracy: 0.0100 - loss: 5.6035[1m  2/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:46[0m 2s/step - accuracy: 0.0075 - loss: 5.5906 [1m  3/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:33[0m 2s/step - accuracy: 0.0067 - loss: 5.5841[1m  4/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:23[0m 2s/step - accuracy: 0.0059 - loss: 5.5827[1m  5/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:17[0m 2s/step - accuracy: 0.0055 - loss: 5.5824

In [0]:
import os
import tensorflow as tf
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
from tensorflow.keras.utils import to_categorical

# 1. CRITICAL: Force Disable HDF5 Locking again (just to be safe)
os.environ["HDF5_USE_FILE_LOCKING"] = "FALSE"

# 2. DEFINITELY use .keras this time
# We save to /tmp/ first to ensure we are on the local driver disk, 
# preventing ANY interaction with Azure Blob storage during training.
SAFE_MODEL_PATH = "/tmp/best_cnn_model.keras" 

print(f"Force-saving model to: {SAFE_MODEL_PATH}")

# 3. Re-define the training logic to ensure it uses SAFE_MODEL_PATH
def run_safe_training():
    # Load Data (assuming load_ascad is already defined in previous cells)
    print("Loading Data...")
    (X_profiling, Y_profiling), (X_attack, Y_attack) = load_ascad(ASCAD_DB_FILE)
    
    input_dim = len(X_profiling[0])
    Reshaped_X = X_profiling.reshape((X_profiling.shape[0], X_profiling.shape[1], 1))
    y_encoded = to_categorical(Y_profiling, num_classes=256)

    # Re-initialize model
    print("Initializing CNN...")
    model = cnn_best(input_dim=input_dim) # Assuming cnn_best is defined

    # Callbacks
    callbacks = [
        # Explicitly using the .keras path
        ModelCheckpoint(SAFE_MODEL_PATH, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max'),
        EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
    ]

    print("Starting Training...")
    history = model.fit(
        x=Reshaped_X, 
        y=y_encoded, 
        batch_size=200, 
        verbose=1, 
        validation_split=0.1, 
        epochs=75, 
        callbacks=callbacks
    )
    return history

# 4. Execute
history = run_safe_training()

# 5. Move the file to your permanent storage after training finishes
final_storage_path = "/dbfs/mnt/databricks/demo/raw/ASCAD_data/trained_models/best_cnn_model_75.keras"

# Check if directory exists, if not create it
if not os.path.exists(os.path.dirname(final_storage_path)):
    os.makedirs(os.path.dirname(final_storage_path))

print(f"Moving model to permanent storage: {final_storage_path}")
dbutils.fs.cp(f"file:{SAFE_MODEL_PATH}", final_storage_path)
print("Success!")

Force-saving model to: /tmp/best_cnn_model.keras
Loading Data...
Loaded Profiling Traces: (50000, 700)
Loaded Profiling Labels: (50000,)
Initializing CNN...
Starting Training...
Epoch 1/75
[1m  1/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m19:21[0m 5s/step - accuracy: 0.0050 - loss: 5.5478[1m  2/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:17[0m 2s/step - accuracy: 0.0037 - loss: 5.5561 [1m  3/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:10[0m 2s/step - accuracy: 0.0031 - loss: 5.5674[1m  4/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:26[0m 2s/step - accuracy: 0.0026 - loss: 5.5727[1m  5/225[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m8:20[0m 2s/step - accuracy: 0.0025 - loss: 5.5760