In [None]:
import os
import sys
import pathlib
import sqlite3
import random
import time

from scapy.all import *  # noqa
import numpy as np
import matplotlib.pyplot as plt
import pandas
from tqdm import tqdm

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

from logger import logger

dirname = pathlib.Path.cwd()
plt.set_loglevel('info')

In [None]:
if torch.cuda.is_available():
    torch.set_default_tensor_type(torch.cuda.FloatTensor)
    print("using cuda:", torch.cuda.get_device_name(0))
else:
    torch.set_default_tensor_type(torch.FloatTensor)

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [None]:
conn = sqlite3.connect("./data/packets.db")
cur = conn.cursor()

In [None]:

class PcapDataset(Dataset):

    def __init__(self, vpn=False, type=True, app=False, cnn=False, test=False) -> None:
        super().__init__()

        self.vpn = vpn
        self.type = type
        self.app = app
        self.test = test
        self.cnn = cnn

        if self.test:
            database = "./data/test_packets.db"
        else:
            database = "./data/train_packets.db"

        self.conn = sqlite3.connect(database)
        self.cur = self.conn.cursor()

        self.cur.execute('''SELECT COUNT(1) FROM "packets"''')
        self.count = self.cur.fetchone()[0]

        # self.count = 10000

        self.indices = list(range(self.count))
        random.shuffle(self.indices)

    def __len__(self):
        return self.count

    def get_type_count(self):
        if self.vpn:
            return 2
        if self.type:
            return 6
        if self.app:
            return 16
        raise Exception("unknown classify type")

    def __getitem__(self, index):
        if index >= self.count:
            raise StopIteration

        index = self.indices[index]

        SQL = f'''SELECT vpn, (type - 1), (app - 1), content FROM packets ORDER BY id LIMIT 1 OFFSET ?'''
        self.cur.execute(SQL, (index, ))
        row = self.cur.fetchone()
        if not row:
            raise StopIteration

        vpn, type, app, content = row

        target = torch.zeros(self.get_type_count())

        if self.vpn:
            label = vpn
        elif self.type:
            label = type
        elif self.app:
            label = app

        target[label] = 1.0

        if len(content) < 1024:
            content += b'\0' * (1024 - len(content))

        content = content[:1024]

        assert(len(content) == 1024)

        data = np.frombuffer(content, dtype=np.uint8, count=1024)

        if self.cnn:
            image = torch.tensor(data.reshape(32, 32).copy()) / 255.0
            image = image.view(1, 32, 32)
        else:
            image = torch.tensor(data.copy()) / 255.0

        # image = data.reshape(32, 32)
        return label, image, target


In [None]:

class Classifier(nn.Module):

    def __init__(self, type_count):
        super().__init__()

        # self.model = nn.Sequential(
        #     nn.Linear(1024, 300),
        #     nn.LeakyReLU(0.02),
        #     nn.Linear(300, 100),
        #     nn.LeakyReLU(0.02),
        #     nn.Linear(100, 50),
        #     nn.LeakyReLU(0.02),
        #     nn.Linear(50, type_count),
        #     nn.Sigmoid(),
        # )

        self.model = nn.Sequential(
            nn.Conv2d(1, 32, kernel_size=3, stride=2),
            nn.LeakyReLU(0.02),
            nn.BatchNorm2d(32),

            nn.Conv2d(32, 64, kernel_size=3, stride=2),
            nn.LeakyReLU(0.02),
            nn.BatchNorm2d(64),

            nn.Conv2d(64, 64, kernel_size=3, stride=2),
            nn.LeakyReLU(0.02),
            nn.BatchNorm2d(64),

            nn.Flatten(),
            nn.LazyLinear(64),
            nn.LeakyReLU(0.02),
            nn.LazyLinear(type_count),
            nn.Sigmoid(),
        )

        self.loss_function = nn.MSELoss()
        # self.loss_function = nn.CrossEntropyLoss()

        # self.optimizer = torch.optim.SGD(self.parameters(), lr=0.01)
        self.optimizer = torch.optim.Adam(self.parameters(), lr=0.01)

        self.counter = 0
        self.progress = []

    def forward(self, inputs):
        return self.model(inputs)

    def train(self, inputs, targets):
        outputs = self.forward(inputs)

        loss = self.loss_function(outputs, targets)

        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()

        self.counter += 1
        if self.counter % 100 == 0:
            self.progress.append(loss.item())
        return loss

    def plot_progress(self):
        df = pandas.DataFrame(self.progress, columns=['loss'])
        df.plot(ylim=(0, 1.0), figsize=(16, 8), alpha=0.1, marker='.',
                grid=True, yticks=(0, 0.25, 0.5))


In [None]:
kwargs = {
    'app': True,
    'cnn': True,
}

epoch = 5

dataset = PcapDataset(**kwargs)

c = Classifier(dataset.get_type_count())

loader = DataLoader(
    dataset=dataset,
    batch_size=16,
    drop_last=True,
)

for _ in range(epoch):

    tq = tqdm(loader)

    for label, image, target in tq:
        loss = c.train(image, target)

    tq.close()

c.plot_progress()


In [None]:
testset = PcapDataset(test=True, **kwargs)

total = 0
right = 0
tq = tqdm(dataset)

for label, image, target in tq:
    outputs = c.forward(image.view(1, 1, 32, 32))
    total += 1
    if outputs.argmax() == label:
        right += 1
    tq.set_postfix(acc='%.6f' % (right / total))

tq.close()

print(right, total, right / total)
