In [None]:
import sys
import torch
import string
import random
import time
import math
import matplotlib.pyplot as plt
from tqdm import tqdm
from IPython.display import clear_output

In [None]:
device = torch.device("mps")

In [None]:
all_categories = ["normal", "attack"]
n_categories = len(all_categories)
category_lines = {}

PATH_NORMAL = "dataset/normal.txt"
PATH_ATTACK = "dataset/attack.txt"


def readLines(filename: str) -> list[str]:
    lines = open(filename, encoding="utf-8").read().strip().split("\n")
    return lines

def removeBlankLines(file_path: str) -> None:
    with open(file_path, 'r') as f:
        lines = [line for line in f if line.strip()]

    with open(file_path, 'w') as f:
        f.writelines(lines)

removeBlankLines(PATH_NORMAL)
removeBlankLines(PATH_ATTACK)
lines_normal = readLines(PATH_NORMAL)  # list_of_normal
lines_attack = readLines(PATH_ATTACK)  # list_of_attack

category_lines["normal"] = lines_normal
category_lines["attack"] = lines_attack

In [None]:
all_char = string.printable
n_all_char = len(all_char)


def char2index(char: str):
    return all_char.find(char)


def char2tensor(char: str):
    tensor = torch.zeros(1, n_all_char)
    tensor[0][char2index(char)] = 1
    return tensor


def line2tensor(line: str):
    tensor = torch.zeros(len(line), 1, n_all_char)
    for i, char in enumerate(line):
        tensor[i][0][char2index(char)] = 1
    return tensor

In [None]:
class RNN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super().__init__()

        self.hidden_size = hidden_size

        self.i2h = torch.nn.Linear(input_size + hidden_size, hidden_size)
        self.i2o = torch.nn.Linear(input_size + hidden_size, output_size)
        self.softmax = torch.nn.LogSoftmax(dim=1)

    def forward(self, input, hidden):
        combined = torch.cat((input, hidden), 1)
        hidden = self.i2h(combined)
        output = self.i2o(combined)
        output = self.softmax(output)
        return output, hidden

    def initHidden(self):
        return torch.zeros(1, self.hidden_size)


n_hidden = 128
rnn = RNN(n_all_char, n_hidden, n_categories)

In [None]:
def categoryFromOutput(output):
    top_n, top_i = output.topk(1)
    category_i = top_i[0].item()
    return all_categories[category_i], category_i


def randomChoice(l):
    return l[random.randint(0, len(l) - 1)]


def randomTrainingExample():
    category = randomChoice(all_categories)
    line = randomChoice(category_lines[category])
    category_tensor = torch.tensor([all_categories.index(category)], dtype=torch.long)
    line_tensor = line2tensor(line)
    return category, line, category_tensor, line_tensor

In [None]:
criterion = torch.nn.NLLLoss()
learning_rate = 0.00002


def train(category_tensor, line_tensor):
    hidden = rnn.initHidden()

    rnn.zero_grad()

    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)
    loss = criterion(output, category_tensor)
    loss.backward()

    # Add parameters' gradients to their values, multiplied by learning rate
    for p in rnn.parameters():
        if p.grad is not None:
            p.data.add_(p.grad.data, alpha=-learning_rate)

    return output, loss.item()

In [None]:
n_iters = 500000
print_every = 500
plot_every = 5000
save_every = n_iters / 5

current_loss = 0
all_losses = []

plt.figure()

for iter in tqdm(range(1, n_iters + 1), file=sys.stdout):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    output, loss = train(category_tensor, line_tensor)
    current_loss += loss

    # Print ``iter`` number, loss, name and guess
    if iter % print_every == 0:
        guess, guess_i = categoryFromOutput(output)
        correct = "YES" if guess == category else "NO (%s)" % category
        tqdm.write(
            f"{iter} {iter / n_iters * 100:.0f}% {loss:.4f} {line} | {guess} {correct}"
        )

    # Add current loss avg to list of losses
    if iter % plot_every == 0:
        all_losses.append(current_loss / plot_every)
        current_loss = 0
        clear_output(wait=True)
        plt.plot(all_losses)
        plt.show()

    if iter % save_every == 0:
            state = {"state": rnn.state_dict(), "epoch": iter}
            filename = "pt/" + str(iter) + "epo.pt"
            torch.save(state, filename)

In [None]:
plt.plot(all_losses)

In [None]:
def train_env(line_tensor):
    hidden = rnn.initHidden()
    rnn.zero_grad()
    for i in range(line_tensor.size()[0]):
        output, hidden = rnn(line_tensor[i], hidden)
    return output


def evaluate(line):
    testdatatotensor = line2tensor(line)
    testoutput = train_env(testdatatotensor)[0]
    return categoryFromOutput(testoutput)[0]


correct = 0
total = 10000
for epoch in tqdm(range(total)):
    category, line, category_tensor, line_tensor = randomTrainingExample()
    predicted = evaluate(line)
    if predicted == category:
        correct += 1
print("accuracy on test set: %d %% " % (100 * correct / total))