# Segmentation methodology for Chameleon

In [1]:
# This code has been tested on Python 3.11.7
! pip install -r requirements.txt




[notice] A new release of pip is available: 25.0.1 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


In [7]:
import numpy as np
from tqdm.auto import tqdm

from CNN.train import train 
from inference_pipeline.sliding_window_classification import getModule, classifyTrace, saveClassification
from inference_pipeline.segmentation import *
from CNN.build_dataset_chameleon import createSubsets
from inference_pipeline.debug import *
from matplotlib import pyplot as plt
from inference_pipeline.heuristic import removeFalseNegatives, removeFalsePositives, removeFalsePositives_basic

## CNN dataset creation

In [3]:
chameleon_path = "datasets/" + "BASE" + "/" # Patterns: BASE DFS RD MRP CHF
dataset_out_path = "temporary_files"

In [4]:
createSubsets(chameleon_path, dataset_out_path) # TODO add check if subset already exists

Creating dataset:   0%|          | 0/240 [00:00<?, ?it/s]

## CNN training

Each CNN is configure thanks to a YALM configuration file.  
You can set different module hyper-parameters as well as the dataset, the logger, and the experiment configurations.  
Default configuration are in `CNN/configs` directory, both for Neputune logger and for the experiment. 

> Some mandatory YALM parameters must be set for each experiment.  
> - Set `log_dir` in the `experiment.yaml` files.  
> - Set `dataset_dir` in the `data.yaml` files to the output folder used by `createSubsets()`.  

> Neptune logging and Neptune.ai account are optional and can be enabled or disabled as desired.

In [11]:
config_folder = "CNN/configs/chameleon_mrp/" # /path/to/experiment/config/folder/
train(config_folder)

Seed set to 314159
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
`Trainer(limit_train_batches=1.0)` was configured so 100% of the batches per epoch will be used..
`Trainer(limit_val_batches=1.0)` was configured so 100% of the batches will be used..


temporary_files\ResNet_v1_2025-11-05_22-21-56


NeptuneInvalidApiTokenException: 

----NeptuneInvalidApiTokenException------------------------------------------------

The provided API token is invalid.
Make sure you copied and provided your API token correctly.

You can get it or check if it is correct here:
    - https://app.neptune.ai/get_my_api_token

There are two options to add it:
    - specify it in your code
    - set it as an environment variable in your operating system.

CODE
Pass the token to the init_run() function via the api_token argument:
    neptune.init_run(project='WORKSPACE_NAME/PROJECT_NAME', api_token='YOUR_API_TOKEN')

ENVIRONMENT VARIABLE (Recommended option)
or export or set an environment variable depending on your operating system:

    Linux/Unix
    In your terminal run:
        export NEPTUNE_API_TOKEN="YOUR_API_TOKEN"

    Windows
    In your CMD run:
        set NEPTUNE_API_TOKEN="YOUR_API_TOKEN"

and skip the api_token argument of the init_run() function:
    neptune.init_run(project='WORKSPACE_NAME/PROJECT_NAME')

You may also want to check the following docs page:
    - https://docs.neptune.ai/setup/setting_api_token/

Need help?-> https://docs.neptune.ai/getting_help


## Sliding Window Classification

In [None]:
SID = "<Neptune_SID>"
chameleon_file = "temporary_files/trace_file.h5"
output_file = "temporary_files/output_file.npy"

module = getModule(SID)

Compute and save segmentation as file `output_file`.  
Function _classifyTrace_ has a few parameters to configure based on the experiment:

- `stride`: Define the stride to use for the sliding window.
- `window_size`: Define the size of the sliding window itself.

In [None]:
window_size = 20_000
stride = 100

In [None]:
classifications = classifyTrace(chameleon_file, module, stride, window_size, batch_size=1024)
saveClassification(classifications, output_file)

## Screening

Compute the segmenation and find the starting sample of each cryptographic operation.

In [None]:
classification_file = "temporary_files/classification_output_file"

labels = loaderGt(chameleon_file)
classifications = np.load(classification_file, mmap_mode='r')

gts_starts = []
gts_ends = []
for label in labels:
    gts_starts.append(label['start'])
    gts_ends.append(label['end'])

init_min_distance = 150_000

In [None]:
major_filter_size = 150
CPs = []

for classification, gt, gt_e in tqdm(zip(classifications, gts_starts, gts_ends), total=len(classifications)):
    CPs.append(segment(classification, major_filter_size, stride, init_min_distance))

Remove false positive and false negatives from the identified COs

In [None]:
starts = []
ends = []

for idx in tqdm(range(len(CPs))):
    s, e = removeFalsePositives_basic(CPs[idx]['starts'], CPs[idx]['ends'])
    s, e = removeFalseNegatives(s, e, classifications[idx])
    starts.append(s)
    ends.append(e)

In [None]:
fnrs_s, fprs_s = [],[]
fnrs_e, fprs_e = [],[]

for idx in tqdm(range(len(CPs))):
    fpr, fnr = errorRate(gts_starts[idx], np.asarray(starts[idx]), stride)
    fprs_s.append(fpr)
    fnrs_s.append(fnr)
    fpr, fnr = errorRate(gts_ends[idx], np.asarray(ends[idx]), stride)
    fprs_e.append(fpr)
    fnrs_e.append(fnr)
    
print(f"Total start mean FPR: {round(np.mean(fprs_s)*100,2)}%")
print(f"Total start mean FNR: {round(np.mean(fnrs_s)*100,2)}%")
print(f"Total end mean FPR: {round(np.mean(fprs_e)*100,2)}%")
print(f"Total end mean FNR: {round(np.mean(fnrs_e)*100,2)}%")

## Visualization

Visualize the detected COs and ground truth (GT) for a given trace.

**First Subplot**: Draws black vertical lines for detected COs' _starts_ and red dashed vertical lines for GT.  
**Second Subplot**: Draws grey vertical lines for detected COs' _ends_ and yellow dashed vertical lines for GT.  
**Third Subplot**: Plots classification for each point in the trace.

In [None]:
n_trace = 0
center = 100_000
margin = 20_000
lim = (center-margin, center+margin)

fig, ax = plt.subplots(3, figsize=(13, 7))
plt.rcParams.update({'font.size': 18})
fig.tight_layout(pad=2.0)

ax[0].set_xlim(lim)
for sample in starts[n_trace][1:]:
    ax[0].axvline(x=sample, color='black', linestyle='-')
ax[0].axvline(x=starts[n_trace][0], color='black', linestyle='-', label="Found CO's start")

for sample in gts_starts[n_trace][1:]:
    ax[0].axvline(x=sample//stride, color='r', linestyle='--')
ax[0].axvline(x=gts_starts[n_trace][0]//stride, color='r', linestyle='--', label='Start GT')
ax[0].legend(loc='lower right', bbox_to_anchor=(1, 0.95))


ax[1].set_xlim(lim)
for sample in ends[n_trace][1:]:
    ax[1].axvline(x=sample, color='grey', linestyle='-')
ax[1].axvline(x=ends[n_trace][0], color='grey', linestyle='-', label="Found CO's end")

for sample in gts_ends[n_trace][1:]:
    ax[1].axvline(x=sample//stride, color='y', linestyle='--')
ax[1].axvline(x=gts_ends[n_trace][0]//stride, color='y', linestyle='--', label='End GT')
ax[1].legend(loc='lower right', bbox_to_anchor=(1, 0.95))


ax[2].set_xlim(lim)
ax[2].plot(np.argmax(classifications[n_trace], axis=1))