In [56]:
import tensorflow as tf
import os
import random
import numpy as np
from tensorflow.keras.layers import Dense, Flatten, Conv1D,Dropout
from tensorflow.keras import Model
from tensorflow import keras

In [57]:
class readData():
    def __init__(self, root="D:\\code\\pycode\\DoHtunnel\\dataset\\finalData\\model1", should_pad=False):
        self.root = root
        self.should_pad = should_pad
        self.pcaps = []
        num = 0
        for f in os.listdir(root):
            full_path = os.path.join(self.root, f)
            # drop the few outlier pcaps that were over 50KB
            if os.path.getsize(full_path) < 50_000:
                self.pcaps.append(full_path)
        random.shuffle(self.pcaps)
        self.max_len = -1
        for pcap in self.pcaps:
            # *2 since we are representing them in hex
            self.max_len = max(self.max_len, os.path.getsize(pcap))
            # drop the few outlier pcaps that were over 50KB
            if(self.max_len > 30_000):
                self.max_len=30000
                break

    def __len__(self):
        return len(self.pcaps)

    def getitem(self, pcapath):
        # pcap = self.pcaps[idx]
        b=[]
        nums=0
        with open(pcapath, 'rb') as f:
            for i in f.read():
                b.append(int(i) / 255)
                nums+=1
                if(nums>=self.max_len):
                    break
        if self.should_pad and len(b)<self.max_len:
            b += [0] * (self.max_len - len(b))

        if pcapath.split('\\')[7].split('_')[0] == "web":
            traffic_type = tf.constant(0)
        else:
            traffic_type = tf.constant(1)
        b = np.expand_dims(b, axis=1)                
        return tf.constant(b), traffic_type

In [58]:
dataset=readData(should_pad=True)

dataX=[]
dataY=[]
print(len(dataset))
num=0
for pcap in dataset.pcaps:
    x,y=dataset.getitem(pcap)
    dataX.append(x)
    dataY.append(y)
    num+=1
    if(num%1000==0):
        print(num)

lengthData=len(dataX)
train_rate=0.7
bj=int(lengthData*train_rate)
x_train = dataX[0:bj]
y_train = dataY[0:bj]
x_test = dataX[bj:lengthData]
y_test = dataY[bj:lengthData]


18592
1000
2000
3000
4000
5000
6000
7000
8000
9000
10000
11000
12000
13000
14000
15000
16000
17000
18000


In [59]:
lentrain=len(x_train)
lentest=len(x_test)

In [60]:
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10).batch(32)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(10).batch(32)

In [61]:
class MyModel(Model):
  def __init__(self,maxlen):
    super(MyModel, self).__init__()  
    self.conv1 = Conv1D(5,3,input_shape=(maxlen,1),activation='relu')
    self.flatten = Flatten()
    self.d1 = Dense(128, activation='relu')
    self.dr2 = Dropout(0.5)    
    self.d2 = Dense(2)

  def call(self, x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    x = self.dr2(x)
    return self.d2(x)

  @tf.function
  def serve(self,x):
    x = self.conv1(x)
    x = self.flatten(x)
    x = self.d1(x)
    x = self.dr2(x)
    return self.d2(x)

model = MyModel(dataset.max_len)

In [62]:
#交叉熵
loss_object = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

optimizer = tf.keras.optimizers.Adam()

In [63]:
train_loss = tf.keras.metrics.Mean(name='train_loss')
train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='train_accuracy')
train_falsePositives = tf.keras.metrics.FalsePositives(name='train_falsePositives')
train_falseNegatives = tf.keras.metrics.FalseNegatives(name='train_falseNegatives')

test_loss = tf.keras.metrics.Mean(name='test_loss')
test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy(name='test_accuracy')
test_falsePositives = tf.keras.metrics.FalsePositives(name='test_falsePositives')
test_falseNegatives = tf.keras.metrics.FalseNegatives(name='test_falseNegatives')

In [64]:
@tf.function
def train_step(lines, labels):
  with tf.GradientTape() as tape:
    # training=True is only needed if there are layers with different
    # behavior during training versus inference (e.g. Dropout).
    predictions = model(lines, training=True)
    loss = loss_object(labels, predictions)
  gradients = tape.gradient(loss, model.trainable_variables)
  optimizer.apply_gradients(zip(gradients, model.trainable_variables))

  train_loss(loss)
  train_accuracy(labels, predictions)
  train_falsePositives(labels, tf.argmax(predictions, axis=1))
  train_falseNegatives(labels, tf.argmax(predictions, axis=1))

In [65]:
@tf.function
def test_step(lines, labels):
  # training=False is only needed if there are layers with different
  # behavior during training versus inference (e.g. Dropout).
  predictions = model(lines, training=False)
  t_loss = loss_object(labels, predictions)

  test_loss(t_loss)
  test_accuracy(labels, predictions)
  test_falsePositives(labels, tf.argmax(predictions, axis=1))
  test_falseNegatives(labels, tf.argmax(predictions, axis=1))

In [66]:
tf.config.experimental_run_functions_eagerly(True)

In [67]:
EPOCHS = 3

for epoch in range(EPOCHS):
  # Reset the metrics at the start of the next epoch
  train_loss.reset_states()
  train_accuracy.reset_states()
  test_loss.reset_states()
  test_accuracy.reset_states()

  js=0
  for lines, labels in train_ds:
    js+=1
    if(js%100==0):
        print(js)
    train_step(lines, labels)

  for test_lines, test_labels in test_ds:
    test_step(test_lines, test_labels)

  print(
    f'Epoch {epoch + 1}, '
    f'Loss: {train_loss.result()}, '
    f'Accuracy: {train_accuracy.result() * 100}, '
    f'FalsePositives: {train_falsePositives.result()/lentrain}, '
    f'FalseNegatives: {train_falseNegatives.result()/lentrain}, '
    f'Test Loss: {test_loss.result()}, '
    f'Test Accuracy: {test_accuracy.result() * 100}, '
    f'Test FalsePositives: {test_falsePositives.result()/lentest},'
    f'Test FalseNegatives: {test_falseNegatives.result()/lentest},'
  )



To change all layers to have dtype float64 by default, call `tf.keras.backend.set_floatx('float64')`. To change just this layer, pass dtype='float64' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

100
200
300
400
Epoch 1, Loss: 0.5382095575332642, Accuracy: 82.403564453125, FalsePositives: 0.10288919508457184, FalseNegatives: 0.07307515293359756, Test Loss: 0.23447586596012115, Test Accuracy: 93.65364074707031, Test FalsePositives: 0.043922554701566696,Test FalseNegatives: 0.019541054964065552,
100
200
300
400
Epoch 2, Loss: 0.2559759020805359, Accuracy: 87.55186462402344, FalsePositives: 0.15929000079631805, FalseNegatives: 0.1411556750535965, Test Loss: 0.13808800280094147, Test Accuracy: 94.85478210449219, Test FalsePositives: 0.08336321264505386,Test FalseNegatives: 0.031552527099847794,
100
200
300
400
Epoch 3, Loss: 0.19279968738555908, Accuracy: 91.48609161376953, FalsePositiv

In [45]:
# 保存模型
signature = {
    'my_predict': model.serve.get_concrete_function(tf.TensorSpec(shape=[None,30000, 1], dtype=tf.float64)),
}
tf.saved_model.save(model, 'D:\\code\\pycode\\DoHtunnel\\save_model1',signatures=signature)


INFO:tensorflow:Assets written to: D:\code\pycode\DoHtunnel\save_model1\assets
