In [None]:
# This code is to implement deep fingerprinting model for website fingerprinting attacks
# Deep Fingerprinting: Undermining Website Fingerprinting Defenses with Deep Learning

from keras import backend as K
from utility import LoadDataWalkieTalkieCW
from Model_WalkieTalkie import DFNet
import random
from keras.utils import np_utils
from keras.optimizers import Adamax
import numpy as np
import os

random.seed(0)
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

# Use only CPU

description = "Training and evaluating DF model for closed-world scenario on Walkie-Talkie dataset"
print(description)

In [None]:
# Training the DF model
NB_EPOCH = 30
print("Number of Epoch: ", NB_EPOCH)
BATCH_SIZE = 128
VERBOSE = 2
LENGTH = 5000
OPTIMIZER = Adamax(lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

NB_CLASSES = 100
INPUT_SHAPE = (LENGTH, 1)

# Data: shuffled and split between train and test sets
print("Loading and preparing data for training, and evaluating the model")
X_train, y_train, X_valid, y_valid, X_test, y_test = LoadDataWalkieTalkieCW()
# Please refer to the dataset format in readme
K.set_image_dim_ordering("tf") # tf is tensorflow

# Convert data as float32 type
X_train = X_train.astype('float32')
X_valid = X_valid.astype('float32')
X_test = X_test.astype('float32')
y_train = y_train.astype('float32')
y_valid = y_valid.astype('float32')
y_test = y_test.astype('float32')

# we need a [Length x 1] x n shape as input to the DF CNN (Tensorflow)
X_train = X_train[:, :, np.newaxis]
X_valid = X_valid[:, :, np.newaxis]
X_test = X_test[:, :, np.newaxis]

print(X_train.shape[0], 'train samples')
print(X_valid.shape[0], 'validation samples')
print(X_test.shape[0], 'test samples')

# Convert class vectors to categorical classes matrices
y_train = np_utils.to_categorical(y_train, NB_CLASSES)
y_valid = np_utils.to_categorical(y_valid, NB_CLASSES)
y_test = np_utils.to_categorical(y_test, NB_CLASSES)

# Building and training model
print("Building and training DF model")

model = DFNet.build(input_shape=INPUT_SHAPE, classes=NB_CLASSES)

model.compile(loss="categorical_crossentropy", optimizer=OPTIMIZER,
              metrics=["accuracy"])
print("Model compiled")

# Start training
history = model.fit(X_train, y_train,
                    batch_size=BATCH_SIZE, epochs=NB_EPOCH,
                    verbose=VERBOSE, validation_data=(X_valid, y_valid))

# Start evaluating model with testing data
score_test = model.evaluate(X_test, y_test, verbose=VERBOSE)
print("Testing accuracy:", score_test[1])

# Top N prediction
top_N = 2 # Specify top_N = n; n is top-n prediction
print("Start evaluating Top-%s Accuracy", %top_N)
result = model.predict(X_test, verbose=2) # result는 모델의 예측 확률을 나타내는 리스트. 각 항목은 하나의 샘플에 대한 softmax 확률 벡터.
count=0
total=0
actual_y=y_test
for i in range(len(result)):
  prob_vec = sorted(result[i]) # 각 샘플의 softmax 확률 벡터 오름차순 정렬.
  highest_probs = prob_vec[-top_N:] # pick top_N(two) highest probabilities in softmax
  top_list=[] # 상위 확률에 해당하는 클래스 인덱스를 저장할 리스트.
  for prob in highest_probs:
    top_list.append(list(result[i]).index(prob))
  actual_label = list(actual_y[i]).index(1) # convert from one-hot-vector back to actual label. 원-핫 벡터에서 실제 레이블에 해당하는 인덱스(값이 1인 위치)를 반환.
  if actual_label in top_list:
    count = count + 1
  total = total + 1

print("Top-%s Accuracy: %f ", %(top_N, float(count)/total)