In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import string
import pickle

from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from torch.utils.data import Dataset, DataLoader, ConcatDataset, Subset

In [2]:
ALPHABET = string.ascii_lowercase + string.digits + "."
char2idx = {c: i + 1 for i, c in enumerate(ALPHABET)}  # padding=0
idx2char = {i: c for c, i in char2idx.items()}  # Reverse mapping index -> character
vocab_size = len(char2idx) + 1

MAX_LEN = 50


def domain_to_tensor(domain):
    arr = [char2idx.get(c, 0) for c in domain.lower()][:MAX_LEN]
    arr += [0] * (MAX_LEN - len(arr))
    return torch.tensor(arr, dtype=torch.long)


def tensor_to_domain(tensor):
    domain = "".join(idx2char.get(idx, "") for idx in tensor.tolist() if idx > 0)  # Ignore padding (0)
    return domain


def load_dataset(file_path):
    with open(file_path, 'rb') as file:
        dataloader = pickle.load(file)
    print(f"DataLoader loaded from {file_path}.")
    return dataloader


class DomainDataset(Dataset):
    def __init__(self, samples):
        self.samples = samples

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        dom, lbl = self.samples[idx]
        x = domain_to_tensor(dom)
        return x, lbl

In [3]:
num_epochs = 10
batch_size = 32
learning_rate = 0.00001
momentum = 0.9

In [4]:
    benign_train_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/benign_train.pkl")
    benign_test_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/benign_test.pkl")
    dga_1_train_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_1_train.pkl")
    dga_1_test_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_1_test.pkl")
    dga_2_train_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_2_train.pkl")
    dga_2_test_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_2_test.pkl")
    dga_3_train_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_3_train.pkl")
    dga_3_test_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_3_test.pkl")
    dga_4_train_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_4_train.pkl")
    dga_4_test_ds = load_dataset("/home/dung/Downloads/Model/test/domain2/dga_4_test.pkl")

    train_ds = ConcatDataset([benign_train_ds, dga_1_train_ds, dga_2_train_ds, dga_3_train_ds, dga_4_train_ds])
    train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

    test_ds = ConcatDataset([benign_test_ds, dga_1_test_ds, dga_2_test_ds, dga_3_test_ds, dga_4_test_ds])
    test_loader = DataLoader(test_ds, batch_size=batch_size, shuffle=False)

DataLoader loaded from /home/dung/Downloads/Model/test/domain2/benign_train.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/benign_test.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_1_train.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_1_test.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_2_train.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_2_test.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_3_train.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_3_test.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_4_train.pkl.
DataLoader loaded from /home/dung/Downloads/Model/test/domain2/dga_4_test.pkl.


In [5]:
def  take_mini_data ( dataset, n ) :
    lock = len(dataset )
    n = min ( n, lock )
    indices = list(range(n)) 
    return Subset(dataset, indices)
df_train = take_mini_data(train_ds,1000)
df_test  = take_mini_data(test_ds, 1000)
df_train_loader = DataLoader(df_train, batch_size = batch_size, shuffle = True  )
df_test_loader = DataLoader (df_test, batch_size = batch_size, shuffle= False)


AttributeError: 'Subset' object has no attribute 'columns'