In [13]:
import os
import sys
import pathlib
import pickle

from scapy.all import *  # noqa
import numpy as np
import matplotlib.pyplot as plt
import pandas
from tqdm import tqdm

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torch.utils.data import random_split
from tensorboardX import SummaryWriter

from logger import logger

dirname = pathlib.Path.cwd()
plt.set_loglevel('info')

if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    print("using cuda:", torch.cuda.get_device_name(0))
else:
    torch.set_default_tensor_type(torch.FloatTensor)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


using cuda: NVIDIA GeForce MX150


In [14]:
class PcapDataset(Dataset):

    def __init__(self, vpn=False, type=False, app=False, test=False) -> None:
        super().__init__()

        self.vpn = vpn
        self.type = type
        self.app = app
        self.test = test

        with open("./data/packets.pickle", 'rb') as file:
            data = file.read()
            self.packets = pickle.loads(data)

    def __len__(self):
        return len(self.packets)

    def get_type_count(self):
        if self.vpn:
            return 2
        if self.type:
            return 6
        if self.app:
            return 16
        raise Exception("unknown classify type")

    def __getitem__(self, index):
        if index >= len(self.packets):
            raise StopIteration

        row = self.packets[index]

        vpn, ty, app, content = row

        target = torch.zeros(self.get_type_count())

        if self.vpn:
            label = vpn
        elif self.type:
            label = ty - 1
        elif self.app:
            label = app - 1

        target[label] = 1.0

        if len(content) < 1024:
            content += b'\0' * (1024 - len(content))

        content = content[:1024]

        assert(len(content) == 1024)

        data = np.frombuffer(content, dtype=np.uint8, count=1024)

        image = torch.tensor(data.reshape(32, 32).copy()) / 255.0
        image = image.view(1, 32, 32)

        return label, image, target


In [15]:

class Classifier(nn.Module):

    def __init__(self, type_count):
        super().__init__()

        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=1),
            nn.LeakyReLU(0.02, inplace=True),
            # nn.ReLU(inplace=True),
            # nn.BatchNorm2d(32),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(32, 64, kernel_size=3, stride=1),
            nn.LeakyReLU(0.02, inplace=True),
            # nn.ReLU(inplace=True),
            # nn.BatchNorm2d(64),
            nn.MaxPool2d(2, 2),

            nn.Conv2d(64, 64, kernel_size=3, stride=1),
            nn.LeakyReLU(0.02, inplace=True),
            # nn.ReLU(inplace=True),
            # nn.BatchNorm2d(64),

            nn.Flatten(), # 1024

            # nn.LazyLinear(256),
            # nn.Dropout(p=0.05),
            # nn.LeakyReLU(0.02, inplace=True),

            # # nn.LazyLinear(128),
            # # nn.Dropout(p=0.05),
            # # nn.LeakyReLU(0.02, inplace=True),

            nn.LazyLinear(64),
            # nn.Dropout(p=0.05),
            nn.LeakyReLU(0.02, inplace=True),
            # nn.ReLU(inplace=True),

            nn.LazyLinear(type_count),
            # nn.Sigmoid(),
        )

        # self.loss_function = nn.MSELoss()
        self.loss_function = nn.CrossEntropyLoss()

        # self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.01)

        self.right = 0
        self.total = 0

    def forward(self, inputs):
        return self.model(inputs)

    def train(self, label, inputs, targets):
        outputs = self.forward(inputs)

        loss = self.loss_function(outputs, targets)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        predicted = torch.max(outputs.data, 1)[1]

        self.right += (predicted == label).sum()
        self.total += len(label)

        return loss


In [16]:
kwargs = {
    'vpn': False,
    'type': False,
    'app': True,
}

epoch = 50
test_percent = 0.05

writer = SummaryWriter()

dataset = PcapDataset(**kwargs)
c = Classifier(dataset.get_type_count())

total_count = len(dataset)
test_count = int(total_count * test_percent)
train_count = total_count - test_count


label, image, target = dataset[0]
print(image.device, target.device)

output = c.forward(image.view(1, 1, 32, 32))
print(output.shape)

trainset, testset = random_split(
    dataset,
    [train_count, test_count],
    torch.Generator(device=device))

loader = DataLoader(
    dataset=trainset,
    batch_size=512,
    shuffle=True,
    generator=torch.Generator(device=device),
    drop_last=True,
)

for var in range(epoch):

    tq = tqdm(loader)

    c.total = 0
    c.right = 0
    for label, image, target in tq:
        loss = c.train(label, image, target)
        acc = c.right / c.total

        writer.add_scalar("loss", loss.item())
        writer.add_scalar("acc", acc)

        tq.set_postfix(epoch=f"{var}", acc='%.6f' % acc)

    tq.close()


# 测试数据
total = 0
right = 0
tq = tqdm(testset)

for label, image, target in tq:
    outputs = c.forward(image.view(1, 1, 32, 32))
    total += 1
    if outputs.argmax() == label:
        right += 1
    writer.add_scalar("tacc", right / total)
    tq.set_postfix(acc='%.6f' % (right / total))

tq.close()

print(right, total, right / total)




cuda:0 cuda:0
torch.Size([1, 16])


100%|██████████| 16/16 [00:03<00:00,  4.81it/s, acc=0.154419, epoch=0]
100%|██████████| 16/16 [00:03<00:00,  5.20it/s, acc=0.208618, epoch=1]
100%|██████████| 16/16 [00:03<00:00,  4.98it/s, acc=0.209717, epoch=2]
100%|██████████| 16/16 [00:03<00:00,  4.97it/s, acc=0.217407, epoch=3]
100%|██████████| 16/16 [00:03<00:00,  4.84it/s, acc=0.228882, epoch=4]
100%|██████████| 16/16 [00:03<00:00,  4.91it/s, acc=0.244385, epoch=5]
100%|██████████| 16/16 [00:03<00:00,  4.78it/s, acc=0.280640, epoch=6]
100%|██████████| 16/16 [00:03<00:00,  4.69it/s, acc=0.302246, epoch=7]
100%|██████████| 16/16 [00:04<00:00,  3.20it/s, acc=0.332642, epoch=8]
100%|██████████| 16/16 [00:04<00:00,  3.37it/s, acc=0.347656, epoch=9]
100%|██████████| 16/16 [00:04<00:00,  3.43it/s, acc=0.384521, epoch=10]
100%|██████████| 16/16 [00:04<00:00,  3.51it/s, acc=0.424438, epoch=11]
100%|██████████| 16/16 [00:04<00:00,  3.44it/s, acc=0.457153, epoch=12]
100%|██████████| 16/16 [00:04<00:00,  3.21it/s, acc=0.486206, epoch=13]
10

310 455 0.6813186813186813
