# Purpose of this notebook 

This notebook provides a demonstration of how NNBits is used to construct the ensemble distinguisher presented in our manuscript, section 4.1 for SPECK 32/64 round 7.


<div class="alert-info">
Please note that this is only a demo and you will need to adjust the paths at the positions indicated by `# <========= adjust this path`.
</div>


# Create the dataset 

To create new data for round 7 please run:

```python 
nr = 7 
n  = 10**7

from gohr.gohr import *

X, Y = make_train_data(n, nr);
X_val, Y_val = make_train_data(n//10, nr);

np.save('../D7/Round7/data_train_full.npy', X); # <========= adjust this path
np.save('../D7/Round7/data_train_labels.npy', Y); # <========= adjust this path
np.save('../D7/Round7/data_test_full.npy', X_val); # <========= adjust this path
np.save('../D7/Round7/data_test_labels.npy', Y_val); # <========= adjust this path
```

## Load round 7 data 

We assume the data has already been created:

In [3]:
import numpy as np

X = np.load('../D7/Round7/data_train_full.npy') # <========= adjust this path
Y = np.load('../D7/Round7/data_train_labels.npy') # <========= adjust this path
X_val = np.load('../D7/Round7/data_test_full.npy') # <========= adjust this path
Y_val = np.load('../D7/Round7/data_test_labels.npy') # <========= adjust this path

## Save two NNBits compatible versions of the dataset 

### Version 1: Training and validation data for labels Y=[0,1] 

In [None]:
#combine the data: concatenate rows
final   = np.r_[X, X_val]
#save final
np.save('../D7/Round7/combined.npy', final) # <========= adjust this path

### Version 2: Training and validation data for labels Y=[1] 

In [4]:
#--------------------------------------------------#
# select only not-random samples to train the ensemble
X = X[Y==1] 
X_val = X_val[Y_val==1]

#combine the data: concatenate rows
final   = np.r_[X, X_val]

#save final
np.save('../D7/Round7/combined_Y1.npy', final) # <========= adjust this path

# Train ensemble (Y=1 data)

In [5]:
savepath = f'gohr_ensemble_longR7run1'

In [6]:
import toml
import numpy as np
import matplotlib.pyplot as plt

from nnbits.filemanager import FileManager
from nnbits.bitanalysis import get_X

In [7]:
F = FileManager(savepath)

datapath = '/opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round7/combined_Y1.npy' # <========= adjust this path
cfgdict = {'DATAPATH': datapath, 
            # ensemble settings
            'NEURAL_NETWORK_MODEL': 'gohr', 
            'NEURAL_NETWORKS': 64,  
            'PREDICT_LABEL': False, 
            'SELECT_BITS_STRATEGY': 'target',
            'TARGET_BITS': [0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], 
            'INPUT_DATA_OP': 'zero',
            # hardware settings
            'N_GPUS': 4,
            'N_ACTORS_PER_GPU': 4,
            'GPU_PER_ACTOR': 0.25,
            'CPU_PER_ACTOR': 5, 
            # training settings
            'N_EPOCHS': 200,
            'N_TRAIN': 4997120,
            'N_VAL': 499712,
            'BATCHSIZE': 4096,
            'EARLY_STOPPING': False, 
            'SAVE_BEST_WEIGHTS': True}

with open(F.filename_config(), 'w') as configfile:
    toml.dump(cfgdict, configfile)
    
print("="*len(datapath))
print(datapath)
    
#!python -m nnbits.run --savepath '{savepath}'

/opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round7/combined_Y1.npy


In [8]:
!python -m nnbits.run --savepath 'gohr_ensemble_longR7run1

2023-02-07_06h31m08s 	 started to load data from harddisk...
2023-02-07_06h31m11s 	 finished.
|                             | value                                                                                                                                                                                                                                                                                                                                                                                  |
|:----------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| DATAPATH                    | /opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round

# Test ensemble (Y=0 and Y=1)

In [9]:
F = FileManager(savepath)

datapath = '/opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round7/combined.npy' # <========= adjust this path
cfgdict = {'DATAPATH': datapath, 
            # ensemble settings
            'NEURAL_NETWORK_MODEL': 'gohr', 
            'NEURAL_NETWORKS': 64,  
            'PREDICT_LABEL': False, 
            'SELECT_BITS_STRATEGY': 'target',
            'TARGET_BITS': [0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
            17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
            34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50,
            51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63], 
            'INPUT_DATA_OP': 'zero',
            # hardware settings
            'N_GPUS': 4,
            'N_ACTORS_PER_GPU': 4,
            'GPU_PER_ACTOR': 0.25,
            'CPU_PER_ACTOR': 5, 
            # testing settings 
            'N_TEST': 11_000_000, 
            'TEST_ONLY': True, 
            # training settings
            'N_EPOCHS': 2,
            'N_TRAIN': 0,
            'N_VAL': 0,
            'BATCHSIZE': 4096,
            'EARLY_STOPPING': False, 
            'SAVE_BEST_WEIGHTS': True
             }

with open(F.filename_config(), 'w') as configfile:
    toml.dump(cfgdict, configfile)
    
print("="*len(datapath))
print(datapath) 

/opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round7/combined.npy


In [10]:
!python -m nnbits.run --savepath 'gohr_ensemble_longR7run1

2023-02-07_11h14m56s 	 started to load data from harddisk...
2023-02-07_11h15m03s 	 finished.
|                             | value                                                                                                                                                                                                                                                                                                                                                                                  |
|:----------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| DATAPATH                    | /opt/cryptanalysis_servers_shared_folder/NNBits/D7/Round

# Train and evaluate MLP

In [11]:
#---------- Create MLP Model -----------
from gohr.gohr import bs, LearningRateScheduler, cyclic_lr

def create_model(d1=64, d2=64, reg_param=1e-5):
        # --- prepare GPU
    import tensorflow as tf
    gpus = tf.config.experimental.list_physical_devices("GPU")
    tf.config.experimental.set_visible_devices(gpus[0], 'GPU')
    from keras.models import Model
    from keras.layers import Dense, Conv1D, Conv2D, Input, Reshape, Permute, Add, Flatten, BatchNormalization, \
        Activation, Lambda
    from keras.layers import Concatenate, MaxPooling2D
    from keras.regularizers import l2
    from keras import backend as K
    import numpy as np
    
    inp = Input(shape=(64,))
    dense1 = Dense(d1,kernel_regularizer=l2(reg_param))(inp);
    dense1 = BatchNormalization()(dense1);
    dense1 = Activation('relu')(dense1);
    dense2 = Dense(d2, kernel_regularizer=l2(reg_param))(dense1);
    dense2 = BatchNormalization()(dense2);
    dense2 = Activation('relu')(dense2);
    out = Dense(1, activation='sigmoid', kernel_regularizer=l2(reg_param))(dense2);
    model = Model(inputs=inp, outputs=out);
    return(model);

N_TRAIN = 10_000_000
N_EVAL = 1_000_000

#--------- Training Data ---------
Y = np.load('../D7/Round7/data_train_labels.npy')[:N_TRAIN] # <========= adjust this path

# load the correctness of predictions from single neural networks: 
X = np.zeros((N_TRAIN, 64))

for network_id in np.arange(64):
    #dist_id = best_dist_ids[network_id]
    #savepath = savepaths[dist_id]
    filename = f'{savepath}/test_accuracies_bit_by_bit/{network_id}_testing.npy'
    x = np.load(filename)
    X[:,network_id] = x[:N_TRAIN]

#--------- Test Data ---------
Y_eval = np.load('../D7/Round7/data_test_labels.npy')[:N_EVAL] # <========= adjust this path

#Y_eval = Y_eval[1:] # remove the first entry 
# predictions from single neural networks: 
X_eval = np.zeros((N_EVAL, 64))

for network_id in np.arange(64):
    #dist_id = best_dist_ids[network_id]
    #savepath = savepaths[dist_id]
    filename = f'{savepath}/test_accuracies_bit_by_bit/{network_id}_testing.npy'
    x = np.load(filename)
    X_eval[:,network_id] = x[N_TRAIN:]

num_epochs = 200

model = create_model(reg_param=10**-5);
model.compile(optimizer='adam',loss='mse',metrics=['acc']);

#create learnrate schedule
lr = LearningRateScheduler(cyclic_lr(10,0.002, 0.0001));

#train and evaluate
h = model.fit(X, Y, epochs=num_epochs, batch_size=bs, callbacks=[lr], validation_data=(X_eval, Y_eval))

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [12]:
print("Best validation accuracy: ", np.max(h.history['val_acc']));

Best validation accuracy:  0.6012279987335205


# Validation accuracy of MLP / Ensemble distinguisher = 60.1% 