The first step is to import the requiered packages

In [88]:
from typing import List

import numpy as np
import tensorflow as tf
import h5py
import os
from tensorflow.python.keras.utils.np_utils import to_categorical
from keras.src.backend.tensorflow.sparse import zeros_int8
from tensorflow import Tensor

Then we get our dataset, for this we use a dataset consisting of 256 traces per key, and we have 256 keys. This will fit into a lot of memories, and will be suitable for our experimentation. If the datasets increase in size we will have to
split how we load it

In [89]:
#må byttes med der du har lagret filene
file_path = "C:\\Users\\Kaspar\\ChipWhisperer5_64\\cw\\home\\portable\\chipwhisperer\\jupyter\\courses\\sca101\\TINYAES_test_set.hdf5"
with h5py.File(file_path, "r") as h5_file:
    plaintext_set = h5_file['data'][:65536]
    trace_set = h5_file['trace'][:65536]
    key_set = h5_file['key'][:65536]
    sub_byte_out_set = h5_file['sub_byte_out'][:65536]

Before saving the dataset into groups using hdf5 we have to feature scale our trace into values from -1 to 1. This is a tip from scaaml to make the models converge. We also add another dimension to it to make it fit in a Conv1D layer


In [90]:
trace_set = tf.keras.layers.Rescaling(1. / 127.5, offset=-1)(trace_set)
trace_set = tf.expand_dims(trace_set, axis=-1)

We pre compute our attack point, these are the key, sub byte in and sub byte out. We already have key and sub byte out from the capture, but we need to compute sub_byte in

In [91]:
num_traces = len(trace_set)
num_bytes = 16
sub_byte_in_set = np.zeros((num_traces, 16), dtype=np.uint8)

for i in range(num_traces):
    for byte_index in range(num_bytes):
        pts=plaintext_set[i][byte_index] ^ key_set[i,byte_index]
        sub_byte_in_set[i,byte_index] = pts
#check if this works
sub_byte_in_set = sub_byte_in_set


The next step is to transpose the matrixes so that they are on the form [byte][values]. this is done because we want to train one model for each byte. model_0_byte is trained on byte 0 for all the traces.

In [92]:
sub_byte_out = np.transpose(sub_byte_out_set, (1, 0))
sub_byte_in = np.transpose(sub_byte_in_set, (1, 0))
key = np.transpose(key_set, (1, 0))


When this is done we create a dataset, and create groups representing a shard in it. The groups will consist of every trace for each key, and the name is the key made into hex. This is done to avoid using the same keys on train, test and validation sets

In [93]:
type(trace_set)
trace_set.shape

TensorShape([65536, 5000, 1])

In [94]:
num_shards = 256
num_traces_per_shard = 256
file_path = "\\...\\DAT255_SCA\\datasets"

#index to mark start and stop for slicing
start_index = 0
stop_index = num_traces_per_shard
dataset_name = "train" #change depending on the set you create

#create a new h5py file that will contain the shards
f = h5py.File(f"{dataset_name}.hdf5", "w")

#Loop trough the dataset, creating groups for every key
for i in range(num_shards):
# create a group for each key, the name of the group will be the key in hex. This is refering to the original key
#array, different indexing is needed if you want to use key
    group_name = key_set[start_index].tobytes().hex()
#Create one group representing a shard

    group = f.create_group(group_name, track_order=True)
#oppretter datasettene
#rad, kolonne, steg vi vil ha alle rader, 256 kolonner av gangen. Også vurder å ta to categorical senere i prosessen
#Må treffe på fordelingen her
    group.create_dataset("traces", data = trace_set[start_index:stop_index, :, :])
    group.create_dataset("key", data = key[:, start_index:stop_index])
    group.create_dataset("sub_bytes_in", data = sub_byte_in[:, start_index:stop_index])
    group.create_dataset("sub_bytes_out", data = sub_byte_out[:, start_index:stop_index])
#må også ha trace slik at den kan hentes
    start_index += num_traces_per_shard
    stop_index += num_traces_per_shard
f.close()

In [95]:
f.close()

Once this is done we take a look at the groups and datasets stored, and make sure they look like we intend

In [96]:
f = h5py.File(f"{dataset_name}.hdf5", "r")
i=0
for group in f.keys():
    print(group)
    i+=1
    print(i)
    print()
    for dset in f[group].keys():
        print(dset)
f.close()

00188cf7c652e31c927dccd9de2df46f
1

traces
key
sub_bytes_in
sub_bytes_out
00758e21e4ed0d201211cb50078bd41f
2

traces
key
sub_bytes_in
sub_bytes_out
00aa14ce6c6558677e26acdbb63dbd16
3

traces
key
sub_bytes_in
sub_bytes_out
01012ead5f3e6e67491421a81a2f5a9d
4

traces
key
sub_bytes_in
sub_bytes_out
026dcdc39bbe8016995335663c1d121a
5

traces
key
sub_bytes_in
sub_bytes_out
04560a81b9637968a2863c516a75fa71
6

traces
key
sub_bytes_in
sub_bytes_out
057f22c57b24ad84838cdf1073d7b969
7

traces
key
sub_bytes_in
sub_bytes_out
058d8373da382cfd6a0175af2bcab5fc
8

traces
key
sub_bytes_in
sub_bytes_out
0669f8c19f6a11362ab2efbfebe9f8d2
9

traces
key
sub_bytes_in
sub_bytes_out
07e6ed0a27b0e94bc49ee8f259530ebd
10

traces
key
sub_bytes_in
sub_bytes_out
080eca1c41d3cd7857f643d12cd69af9
11

traces
key
sub_bytes_in
sub_bytes_out
0862ddb694f235419e6a8f4fa85f9d89
12

traces
key
sub_bytes_in
sub_bytes_out
08759b15c982ab556767de5e2fcadfea
13

traces
key
sub_bytes_in
sub_bytes_out
0912811620839f4aa64c34b0cea045a9
1

we can see that we have 256 different groups, representing shard

When we want to fetch data from the datasets, w

In [101]:
shard = []
attack_byte = 0
attack_point = "sub_bytes_in"
max_trace_length = 5000
num_traces = 256
full_key = False
x_List: List[Tensor] = []
y_List: List[Tensor] = []

#need to open the file to read any info
with (h5py.File(f"{dataset_name}.hdf5", "r")) as f:
    for group in f.keys():
        group_name = f[group]

        x_shard = group_name["traces"][:num_traces_per_shard, :5000, :]
        x_shard = tf.convert_to_tensor(x_shard, dtype="float32")

        y_shard = group_name["sub_bytes_in"][attack_byte]
        y_shard = y_shard[:num_traces_per_shard]
        y_shard = tf.keras.utils.to_categorical(y_shard, 256)
        y_shard = tf.convert_to_tensor(y_shard, dtype="uint8")


        x_List.append(x_shard)
        y_List.append(y_shard)
    x: Tensor = tf.concat(x_List, axis=0)
    y: Tensor = tf.concat(y_List, axis=0)


In [86]:
f.close()

In [104]:
y


<tf.Tensor: shape=(65536, 256), dtype=uint8, numpy=
array([[0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)>

In [106]:
x_train = x

In [107]:
y_train = y

In [109]:
y_train.shape

TensorShape([65536, 256])

In [111]:
print(x_train[0])

tf.Tensor(
[[-0.07450974]
 [ 0.5529413 ]
 [ 0.3176471 ]
 ...
 [ 1.        ]
 [ 0.21568632]
 [ 0.6156864 ]], shape=(5000, 1), dtype=float32)


In [115]:
print(y_train[3])

tf.Tensor(
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0], shape=(256,), dtype=uint8)


In [None]:

with h5py.File(f"{dataset_name}_byte0.hdf5", "w") as f:
    f.create_dataset("x_train_byte0", data=x_train.numpy())
    f.create_dataset("y_train_byte0", data=y_train.numpy())
