## URLGAN

In [None]:
import pickle
import numpy as np
import csv
import pandas as pd
from keras_preprocessing import sequence
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.autograd import Variable
import torch.nn.functional as F
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix

oc_path = "/home/qidai/phishing_DGPD/optional_chars.pkl"
f = open(oc_path, "rb")
optional_chars =  pickle.load(f)
print(optional_chars)

In [None]:
def domain2vector(domain, optional_chars):
    vector = []
    for d in domain:
        tmp = []
        for c in d:
            if c in optional_chars:
                tmp.append(optional_chars[c])
            else:
                tmp.append(89)
        vector.append(tmp)
    vector = sequence.pad_sequences(vector, maxlen=255, padding = "post", truncating = "post", value = 0)
    results = []
    for url in vector:
        cur = []
        for i in range(len(url)):
            temp = [0 for i in range(90)]
            temp[url[i]] = 1
            cur.append(temp)
        results.append(cur)
    return results

In [None]:
def get_csv_file(path):
    reader = pd.read_csv(path)
    url = reader.urls.to_numpy()
    return url
legitimate_url = get_csv_file("./data/URLGAN_legitimate_url.csv")
print(len(legitimate_url))

In [None]:
def sample_gumbel(shape, eps=1e-20):
    U = torch.rand(shape)
    if torch.cuda.is_available():
        U = U.cuda()
    return -torch.log(-torch.log(U + eps) + eps)

def gumbel_softmax_sample(logits, temperature = 0.1):
    y = logits + sample_gumbel(logits.size())
    return F.softmax(y / temperature, dim=-1)

def gumbel_softmax(logits, latent_dim = 255, categorical_dim = 90, temperature=0.75, hard=False):
    y = gumbel_softmax_sample(logits, temperature)
    
    if not hard:
        return y.view(-1, latent_dim, categorical_dim)

    shape = y.size()
    _, ind = y.max(dim=-1)
    y_hard = torch.zeros_like(y).view(-1, shape[-1])
    y_hard.scatter_(1, ind.view(-1, 1), 1)
    y_hard = y_hard.view(*shape)
    y_hard = (y_hard - y).detach() + y
    return y_hard.view(-1, latent_dim, categorical_dim)

In [None]:
class Generator(nn.Module):
    def __init__(self):
        super(Generator, self).__init__()
        self.layer1 = nn.Linear(1000, 25*12)
        self.layer2 = nn.Sequential(
            nn.ConvTranspose2d(1, 8, 3, (1, 1), padding=(0, 2), bias=False)
        )
        self.layer3 = nn.Sequential(
            nn.ConvTranspose2d(8, 8, 7, (3, 3), padding=(0, 2), bias=False)
        )
        self.layer4 = nn.Sequential(
            nn.ConvTranspose2d(8, 8, 9, (3, 3), padding=(4, 4), bias=False)
        )
        self.layer5 = nn.Sequential(
            nn.ConvTranspose2d(8, 1, 11, (1, 1), padding=(4, 4), bias=False)
        )
        self.layer6 = nn.Sequential(
            nn.ConvTranspose2d(1, 1, 1, (1, 1), padding=(0, 0), bias=False)
        )
        
        
    def forward(self, x):
        in1 = self.layer1(x)
        in2 = in1.reshape(in1.size(0),1, 25, 12)
        h1 = self.layer2(in2)
        h2 = self.layer3(h1)
        h3 = self.layer4(h2)
        h4 = self.layer5(h3)
        h5 = self.layer6(h4)
        out = gumbel_softmax(h5)
        return out

In [None]:
class Dscriminator(nn.Module):
    def __init__(self, num_classes=1):
        super(Dscriminator, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 4, kernel_size=5, stride=2, padding=2),
            nn.BatchNorm2d(4),
            nn.ReLU()
        )
        self.layer2 = nn.Sequential(
            nn.Conv2d(4, 4, kernel_size=7, stride=2, padding=2),
            nn.BatchNorm2d(4),
            nn.ReLU()
        )
        self.layer3 = nn.Sequential(
            nn.Conv2d(4, 4, kernel_size=9, stride=2, padding=2),
            nn.BatchNorm2d(4),
            nn.ReLU()
        )
        self.layer4 = nn.Sequential(
            nn.Conv2d(4, 1, kernel_size=11, stride=2, padding=2)
        )
        
        self.fc = nn.Sequential(
            nn.Linear(24, 10),
            nn.Linear(10, num_classes))
        
    def forward(self, x):
        x = x.unsqueeze(1)
        out_cnn = self.layer1(x)
        out_cnn = self.layer2(out_cnn)
        out_cnn = self.layer3(out_cnn)
        out_cnn = self.layer4(out_cnn)
        out_cnn = out_cnn.reshape(out_cnn.size(0), -1)
        out = self.fc(out_cnn)
        return out

In [None]:
learning_rate_D = 0.00005
learning_rate_G = 0.00001
batch_size = 512
num_epochs = 500
lambda_term = 100

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model_epoch = 237

# Generator
Generator_model = Generator().to(device)
# path_of_generator = "./model/URLGAN/Generator_"+str(model_epoch)
# checkpoint = torch.load(path_of_generator)
# Generator_model.load_state_dict(checkpoint['model_state_dict'])
# Generator_model.train()
Generator_optimizer = torch.optim.RMSprop(Generator_model.parameters(), lr=learning_rate_G)


# Descriminator
Dscriminator_model = Dscriminator().to(device)
# path_of_dscriminator = "./model/URLGAN/Dscriminator_"+str(98)
# checkpoint = torch.load(path_of_dscriminator)
# Dscriminator_model.load_state_dict(checkpoint['model_state_dict'])
# Dscriminator_model.train()
Dscriminator_optimizer = torch.optim.RMSprop(Dscriminator_model.parameters(), lr=learning_rate_D)

def reset_grad():
    Generator_optimizer.zero_grad()
    Dscriminator_optimizer.zero_grad()


In [None]:
def next_batch(num, data, labels):
    np.random.seed()
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)


def calculate_gradient_penalty(real_images, fake_images):
    eta = torch.FloatTensor(batch_size,1,1).uniform_(0,1).to(device)
    interpolated = eta * real_images + ((1 - eta) * fake_images).to(device)
    interpolated = Variable(interpolated, requires_grad=True)
    prob_interpolated = Dscriminator_model(interpolated)
    gradients = autograd.grad(outputs=prob_interpolated, inputs=interpolated,
                            grad_outputs=torch.ones(
                                prob_interpolated.size()).to(device),
                            create_graph=True, retain_graph=True)[0]
    grad_penalty = ((gradients.norm(2, dim=1) - 1) ** 2).mean() * lambda_term
    return grad_penalty

def ReCharacterization(data,valid_chars):
    data_char = []
    key_list = list(valid_chars.keys())
    val_list = list(valid_chars.values())
    for domain in data:
        domain_char = []
        for char in domain:
            if(char != 0):
                domain_char.append(key_list[val_list.index(char)])
            else:
                break
        data_char.append("".join(domain_char))
    return data_char


In [None]:

X_train = legitimate_url
y_train = [0 for i in range(len(X_train))]

total_step = len(X_train)//batch_size
len_train = len(X_train)
training_times = len_train//batch_size
one = torch.FloatTensor([1]).to(device)
mone = one * -1


for epoch in range(num_epochs):
    for i in range(training_times):

        # ================================================================== #
        #                      Train the discriminator                       #
        # ================================================================== #
        # real data loss
        batch_x, batch_y = next_batch(batch_size, X_train, y_train)
        batch_x = domain2vector(batch_x, optional_chars)
        batch_x_real = torch.tensor(batch_x).to(device).float()
        outputs = Dscriminator_model(batch_x_real)
        loss_real = outputs.mean(0).view(1)

        # fake data loss
        batch_x = torch.randn(batch_size, 1000).to(device).float()
        outputs = Generator_model(batch_x)
        batch_x_fake = outputs
        outputs = Dscriminator_model(outputs)
        loss_fake = outputs.mean(0).view(1)
#         with torch.backends.cudnn.flags(enabled=False):
        gradient_penalty = calculate_gradient_penalty(batch_x_real, batch_x_fake)
        d_loss = loss_real - loss_fake + gradient_penalty


        # Backward and optimize
        reset_grad()
        loss_real.backward(one)
        loss_fake.backward(mone)
        gradient_penalty.backward()
        Dscriminator_optimizer.step()


        # ================================================================== #
        #                          Train the Generator                       #
        # ================================================================== #
        for k in range(2):
            batch_x = torch.randn(batch_size, 1000).to(device).float()
            outputs = Generator_model(batch_x)
            outputs = Dscriminator_model(outputs)
            g_loss = outputs.mean(0).view(1)

            # Backward and optimize
            reset_grad()
            g_loss.backward(one)
            Generator_optimizer.step()

        if i%20 == 0:
            print('Epoch [{}/{}], Step [{}/{}], loss_real: {:.4f}, loss_fake: {:.4f}, d_loss: {:.4f}, gradient_penalty: {:.4f}, g_loss: {:.4f}' 
                  .format(epoch, num_epochs, i+1, total_step, loss_real.item(), loss_fake.item(), d_loss.item(), gradient_penalty.item(), g_loss.item()))

    if (epoch+1)%2 == 0:
        batch_x = torch.randn(5, 1000).to(device).float()
        outputs = Generator_model(batch_x)
        outputs = torch.squeeze(outputs)
        outputs = torch.argmax(outputs, dim=2)
        domain = ReCharacterization(outputs, optional_chars)
        print(domain)

    if (epoch+1)%10 == 0:
        state = {
            'model_state_dict': Generator_model.state_dict(),
        }
        torch.save(state, "./model/URLGAN/D_Generator1000_"+str(epoch+model_epoch))
        state = {
            'model_state_dict': Dscriminator_model.state_dict(),
        }
        torch.save(state, "./model/URLGAN/D_Dscriminator1000_"+str(epoch+model_epoch))

## Pre-traning

In [None]:
def get_csv_file(path):
    reader = pd.read_csv(path)
    url = reader.urls.to_numpy()
    return url

def get_txt_file(path):
    f = open(path)
    lines = f.readlines()
    lines = eval(lines[0])
    return lines
    
def get_csv_file_url(path):
    reader = pd.read_csv(path)
    url = reader.url.to_numpy()
    return url

phishing_url_2018 = get_csv_file("/home/qidai/phishing_DGPD/data/phishing_url_2018.csv")
phishing_url_2019 = get_csv_file("/home/qidai/phishing_DGPD/data/phishing_url_2019.csv")
phishing_url_2020 = get_csv_file("/home/qidai/phishing_DGPD/data/phishing_url_2020.csv")
phishing_url_2021 = get_csv_file("/home/qidai/phishing_DGPD/data/phishing_url_2021.csv")
phishing_url_2022 = get_csv_file("/home/qidai/phishing_DGPD/data/phishing_url_2022.csv")

legitimate_url_dgpd = get_csv_file("/home/qidai/phishing_DGPD/data/DGPD_legitimate_url.csv")
legitimate_url_url2vec = get_csv_file("/home/qidai/phishing_DGPD/data/URL2Vec_legitimate_url.csv")
legitimate_url_urlgan = get_csv_file("/home/qidai/phishing_DGPD/data/URLGAN_legitimate_url_origin.csv")



print(len(phishing_url_2018))
print(len(phishing_url_2019))
print(len(phishing_url_2020))
print(len(phishing_url_2021))
print(len(phishing_url_2022))
print(len(legitimate_url_dgpd))
print(len(legitimate_url_url2vec))
print(len(legitimate_url_urlgan))

In [None]:
def domain2vector(domain, optional_chars):
    vector = []
    for d in domain:
        tmp = []
        for c in d:
            if c in optional_chars:
                tmp.append(optional_chars[c])
            else:
                tmp.append(89)
        vector.append(tmp)
    vector = sequence.pad_sequences(vector, maxlen = 255, padding = "post", truncating = "post", value = 0)
    return vector

def domain2vector_ignore(domain, optional_chars):
    np.random.seed()
    vector = []
    for x in domain:
        cur = []
        for y in x:
            if y == " ":
                cur.append(0)
            elif y in optional_chars:
                cur.append(optional_chars[y])
            else:
                cur.append(89)
        vector.append(cur)
    vector = sequence.pad_sequences(vector, maxlen = 255, padding = "post", truncating = "post", value = 0)
    return vector

def add_mlm(data, rate = 0.15, optional_chars = optional_chars):
    np.random.seed()
    data_p = []
    pos_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        pos_p.append(idx[0:int(l*rate)])
        for j in range(int(l*rate)):
            uni = np.random.uniform(0, 1)
            if uni < 0.6:
                domain = domain[0:idx[j]] + ' ' + domain[idx[j]+1:]
            elif uni < 0.8:
                domain = domain[0:idx[j]] + list(optional_chars.keys())[np.random.randint(1, 89)] + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p, pos_p

def add_mask(data, rate = 0.15):
    np.random.seed()
    data_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        for j in range(int(l*rate)):
            domain = domain[0:idx[j]] + ' ' + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p

def add_random_char(data, rate = 0.15, optional_chars = optional_chars):
    np.random.seed()
    data_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        for j in range(int(l*rate)):
            domain = domain[0:idx[j]] + list(optional_chars.keys())[np.random.randint(1, 89)] + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p

In [None]:
training_set = []
training_set.extend(legitimate_url_url2vec[:2000000])

training_set_mask = []
training_label_mask = []

data_p, pos_p = add_mlm(training_set, 0.15)
training_set_mask.extend(data_p)
training_label_mask.extend(training_set)

for i in range(len(training_label_mask)):
    cur = ""
    for j in range(len(training_label_mask[i])):
        if j not in pos_p[i]:
            cur += " "
        else:
            cur += training_label_mask[i][j]
    training_label_mask[i] = cur
            
training_set_mask = domain2vector(training_set_mask, optional_chars)
training_label_mask = domain2vector_ignore(training_label_mask, optional_chars)


test_set_mask = []
test_label_mask = []



In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, num_hiddens, dropout, max_len=1000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.P = torch.zeros((1, max_len, num_hiddens))
        X = torch.arange(max_len, dtype=torch.float32).reshape(
            -1, 1) / torch.pow(10000, torch.arange(
            0, num_hiddens, 2, dtype=torch.float32) / num_hiddens)
        self.P[:, :, 0::2] = torch.sin(X)
        self.P[:, :, 1::2] = torch.cos(X)

    def forward(self, X):
        X = X + self.P[:, :X.shape[1], :].to(X.device)
        return self.dropout(X)

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.embedding = nn.Embedding(100,128)
        self.pos_encoding = PositionalEncoding(128, 0)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8, dropout=0.2, dim_feedforward=256)
        self.trm_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=6)
        self.output = nn.Linear(128,90)

    def forward(self,x):
        h1 = self.pos_encoding(self.embedding(x)).permute(1, 0, 2)
        h2 = self.trm_encoder(h1)
        output = self.output(h2).permute(1, 0, 2)
        return output

def next_batch(num, data, labels):
    '''Return a total of `num` random samples and labels. '''
    np.random.seed()
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 400
batch_size = 256
learning_rate = 1e-4

# Descriminator
model = Net().to(device)
# path_of_model = "./model/URL2Vec/DQ_URL2Vec"
# checkpoint = torch.load(path_of_model)
# model.load_state_dict(checkpoint['model_state_dict'])
# model.train()
model_criterion = nn.CrossEntropyLoss(ignore_index = 0)
model_optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)


X_train = training_set_mask
y_train = training_label_mask

total_step = len(X_train)//batch_size
len_train = len(X_train)
training_time = len_train//batch_size

for epoch in range(num_epochs):
    loss_rec = []
    for i in range(training_time):

        batch_x, batch_y = next_batch(batch_size, X_train, y_train)
        batch_x = torch.LongTensor(batch_x).to(device)
        batch_y = torch.LongTensor(batch_y).to(device).reshape(-1)
        
        # Forward pass
        outputs = model(batch_x).reshape(-1, 90)
        loss = model_criterion(outputs, batch_y)
        loss_rec.append(loss.item())
        
        # Backward and optimize
        model_optimizer.zero_grad()
        loss.backward()
        model_optimizer.step()

    if(epoch%1 == 0):
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.12f}'.format(epoch+1, num_epochs, i+1, total_step, np.mean(loss_rec)))
        state = {'model_state_dict': model.state_dict()}
        torch.save(state, "./model/DQ_URL2Vec_"+str(epoch+26))

## Detection

In [None]:
def domain2vector(domain, optional_chars):
    vector = []
    for d in domain:
        tmp = []
        for c in d:
            if c in optional_chars:
                tmp.append(optional_chars[c])
            else:
                tmp.append(89)
        vector.append(tmp)
    vector = sequence.pad_sequences(vector, maxlen = 255, padding = "post", truncating = "post", value = 0)
    return vector

def domain2vector_ignore(domain, optional_chars):
    np.random.seed()
    vector = []
    for x in domain:
        cur = []
        for y in x:
            if y == " ":
                cur.append(0)
            elif y in optional_chars:
                cur.append(optional_chars[y])
            else:
                cur.append(89)
        vector.append(cur)
    vector = sequence.pad_sequences(vector, maxlen = 255, padding = "post", truncating = "post", value = 0)
    return vector

def add_mlm(data, rate = 0.15, optional_chars = optional_chars):
    np.random.seed()
    data_p = []
    pos_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        pos_p.append(idx[0:int(l*rate)])
        for j in range(int(l*rate)):
            uni = np.random.uniform(0, 1)
            if uni < 0.6:
                domain = domain[0:idx[j]] + ' ' + domain[idx[j]+1:]
            elif uni < 0.8:
                domain = domain[0:idx[j]] + list(optional_chars.keys())[np.random.randint(1, 89)] + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p, pos_p

def add_mask(data, rate = 0.15):
    np.random.seed()
    data_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        for j in range(int(l*rate)):
            domain = domain[0:idx[j]] + ' ' + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p

def add_random_char(data, rate = 0.15, optional_chars = optional_chars):
    np.random.seed()
    data_p = []
    for i in range(len(data)):
        domain = data[i]
        l = len(domain)
        idx = np.arange(0, l)
        np.random.shuffle(idx)
        for j in range(int(l*rate)):
            domain = domain[0:idx[j]] + list(optional_chars.keys())[np.random.randint(1, 89)] + domain[idx[j]+1:]
        data_p.append(domain)
    return data_p

In [None]:
class PositionalEncoding(nn.Module):
    def __init__(self, num_hiddens, dropout, max_len=1000):
        super(PositionalEncoding, self).__init__()
        self.dropout = nn.Dropout(dropout)
        self.P = torch.zeros((1, max_len, num_hiddens))
        X = torch.arange(max_len, dtype=torch.float32).reshape(
            -1, 1) / torch.pow(10000, torch.arange(
            0, num_hiddens, 2, dtype=torch.float32) / num_hiddens)
        self.P[:, :, 0::2] = torch.sin(X)
        self.P[:, :, 1::2] = torch.cos(X)

    def forward(self, X):
        X = X + self.P[:, :X.shape[1], :].to(X.device)
        return self.dropout(X)

class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.embedding = nn.Embedding(100,128)
        self.pos_encoding = PositionalEncoding(128, 0)
        self.encoder_layer = nn.TransformerEncoderLayer(d_model=128, nhead=8, dropout=0.2, dim_feedforward=256)
        self.trm_encoder = nn.TransformerEncoder(self.encoder_layer, num_layers=6)
        self.output = nn.Linear(128,90)
        self.l1 = nn.Linear(90,32)
        self.l2 = nn.Linear(32, 1)
        self.l3 = nn.ReLU()
        
        self.l4 = nn.Linear(255, 64)
        self.l5 = nn.Linear(64, 1)
        self.l6 = nn.Sigmoid()
        

    def forward(self,x):
        h0 = self.embedding(x)
        h1 = self.pos_encoding(h0).permute(1, 0, 2)
        h2 = self.trm_encoder(h1)
        o0 = self.output(h2).permute(1, 0, 2)
        o1 = self.l3(self.l1(o0))
        o2 = self.l3(self.l2(o1))
        output = self.l6(self.l5(self.l4(o2.reshape(-1, 255))))
        return output

def next_batch(num, data, labels):
    np.random.seed()
    idx = np.arange(0 , len(data))
    np.random.shuffle(idx)
    idx = idx[:num]
    data_shuffle = [data[ i] for i in idx]
    labels_shuffle = [labels[ i] for i in idx]
    return np.asarray(data_shuffle), np.asarray(labels_shuffle)

In [None]:
training_set = []
training_set_p = []
training_label_p = []
training_set.extend(legitimate_url_dgpd[0:160000])
training_label = [0 for i in range(len(training_set))]

training_set_p.extend(add_mask(training_set, 0.15))
training_label_p.extend(training_label)
training_set_p.extend(add_random_char(training_set, 0.15))
training_label_p.extend(training_label)

training_set.extend(training_set_p)
training_label.extend(training_label_p)

training_set.extend(phishing_url_2018[0:160000])
training_label.extend([1 for i in range(len(training_set)-len(training_label))])

training_set = domain2vector(training_set, optional_chars)
training_label = training_label

test_set_2018 = []
test_set_2018.extend(legitimate_url_dgpd[160000:])
test_label_2018 = [0 for i in range(len(test_set_2018))]
test_set_2018.extend(phishing_url_2018[160000:])
test_label_2018.extend([1 for i in range(len(test_set_2018)-len(test_label_2018))])
test_set_2018 = domain2vector(test_set_2018, optional_chars)

test_set_2019 = []
test_set_2019.extend(legitimate_url_dgpd[160000:])
test_label_2019 = [0 for i in range(len(test_set_2019))]
test_set_2019.extend(phishing_url_2019[160000:])
test_label_2019.extend([1 for i in range(len(test_set_2019)-len(test_label_2019))])
test_set_2019 = domain2vector(test_set_2019, optional_chars)

test_set_2020 = []
test_set_2020.extend(legitimate_url_dgpd[160000:])
test_label_2020 = [0 for i in range(len(test_set_2020))]
test_set_2020.extend(phishing_url_2020[160000:])
test_label_2020.extend([1 for i in range(len(test_set_2020)-len(test_label_2020))])
test_set_2020 = domain2vector(test_set_2020, optional_chars)

test_set_2021 = []
test_set_2021.extend(legitimate_url_dgpd[160000:])
test_label_2021 = [0 for i in range(len(test_set_2021))]
test_set_2021.extend(phishing_url_2021[160000:])
test_label_2021.extend([1 for i in range(len(test_set_2021)-len(test_label_2021))])
test_set_2021 = domain2vector(test_set_2021, optional_chars)

test_set_2022 = []
test_set_2022.extend(legitimate_url_dgpd[160000:])
test_label_2022 = [0 for i in range(len(test_set_2022))]
test_set_2022.extend(phishing_url_2022[160000:])
test_label_2022.extend([1 for i in range(len(test_set_2022)-len(test_label_2022))])
test_set_2022 = domain2vector(test_set_2022, optional_chars)

In [None]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_epochs = 80
batch_size = 128
learning_rate = 1e-5

# Descriminator
model = Classifier().to(device)
model.train()
model_criterion = nn.BCELoss()
model_optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [None]:
for para in model.parameters():
    para.requires_grad = False
model.l1.weight.requires_grad = True
model.l1.bias.requires_grad = True
model.l2.weight.requires_grad = True
model.l2.bias.requires_grad = True
model.l4.weight.requires_grad = True
model.l4.bias.requires_grad = True
model.l5.weight.requires_grad = True
model.l5.bias.requires_grad = True
model.output.weight.requires_grad = True
model.output.bias.requires_grad = True

for para in model.trm_encoder.layers[5].parameters():
    para.requires_grad = True

X_train = training_set
y_train = training_label

total_step = len(X_train)//batch_size
len_train = len(X_train)
training_time = len_train//batch_size

for epoch in range(num_epochs):
    loss_rec = []
    for i in range(training_time):

        batch_x, batch_y = next_batch(batch_size, X_train, y_train)
        batch_x = torch.LongTensor(batch_x).to(device)
        batch_y = torch.Tensor(batch_y).to(device)
        batch_y = torch.unsqueeze(batch_y,1)
        

        # Forward pass
        outputs = model(batch_x)
        loss = model_criterion(outputs, batch_y)
        loss_rec.append(loss.item())
        
        # Backward and optimize
        model_optimizer.zero_grad()
        loss.backward()
        model_optimizer.step()

    if(epoch%1 == 0):
        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.12f}'.format(epoch+1, num_epochs, i+1, total_step, np.mean(loss_rec)))
