In [1]:
%pip install kagglehub

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.0 -> 25.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [2]:


from torch.utils.data import DataLoader

import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader

import pandas as pd

In [3]:
from senmodel.model.utils import *
from senmodel.metrics.nonlinearity_metrics import *
from senmodel.metrics.edge_finder import *
from senmodel.metrics.train_metrics import *
from senmodel.train.train import *

In [4]:
torch.manual_seed(0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

In [5]:
class SimpleFCN(nn.Module):
    def __init__(self, input_size=87, hidden_size=16):
        super(SimpleFCN, self).__init__()
        self.fc0 = nn.Linear(input_size, hidden_size)
        self.fc1 = nn.Linear(hidden_size, 2)
        self.act = nn.ReLU()

    def forward(self, x):
        # x = self.fc0(x)
        return self.fc1(self.act(self.fc0(x)))

In [6]:
hyperparams = {
    "num_epochs": 64,
    "batch_size": 256,
    "metric": AbsGradientEdgeMetric(nn.CrossEntropyLoss()),
    "aggregation_mode": "mean",
    "choose_thresholds": {"fc0": 0.2}, # 1.0 -> no edges, 0.0 -> all edges
    "choose_thresholds_del": {"fc0": 0.1}, # 1.0 -> all edges, 0.0 -> no edges
    "threshold": 0.95,
    "min_delta_epoch_replace": 8,
    "window_size": 50000000000000000,
    "lr": 2e-5,
    "delete_after": 2,    
    "task_type": "classification",
    "fully_connected": False,
    "max_to_replace": 900 # None -> no limit
}

name = ", ".join(
    f"{key}: {value.__class__.__name__ if key == 'metric' else value}"
    for key, value in hyperparams.items()
)

name

"num_epochs: 64, batch_size: 256, metric: AbsGradientEdgeMetric, aggregation_mode: mean, choose_thresholds: {'fc0': 0.2}, choose_thresholds_del: {'fc0': 0.1}, threshold: 0.95, min_delta_epoch_replace: 8, window_size: 50000000000000000, lr: 2e-05, delete_after: 2, task_type: classification, fully_connected: False, max_to_replace: 900"

In [7]:
import kagglehub

dname = "dataset_phishing.csv"
path = kagglehub.dataset_download("shashwatwork/web-page-phishing-detection-dataset")

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
df_data = pd.read_csv(f'{path}\\{dname}')
print(df_data.shape) 
print(df_data.head())
print(df_data[df_data["status"] == "legitimate"].shape)
print(df_data[df_data["status"] == "phishing"].shape)
df_data.loc[df_data["status"] == "legitimate", "status"] = 1
df_data.loc[df_data["status"] == "phishing", "status"] = 0
X = df_data.iloc[:, 1:-1]
Y = df_data.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)



(11430, 89)
                                                 url  length_url  \
0              http://www.crestonwood.com/router.php          37   
1  http://shadetreetechnology.com/V4/validation/a...          77   
2  https://support-appleld.com.secureupdate.duila...         126   
3                                 http://rgipt.ac.in          18   
4  http://www.iracing.com/tracks/gateway-motorspo...          55   

   length_hostname  ip  nb_dots  nb_hyphens  nb_at  nb_qm  nb_and  nb_or  ...  \
0               19   0        3           0      0      0       0      0  ...   
1               23   1        1           0      0      0       0      0  ...   
2               50   1        4           1      0      1       2      0  ...   
3               11   0        2           0      0      0       0      0  ...   
4               15   0        2           2      0      0       0      0  ...   

   domain_in_title  domain_with_copyright  whois_registered_domain  \
0                0    

In [9]:
class TabularDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.values, dtype=torch.float32)
        self.y = torch.tensor(y.astype(int).values, dtype=torch.long)  

    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

train_dataset = TabularDataset(X_train, y_train)
val_dataset = TabularDataset(X_test, y_test)

train_loader = DataLoader(train_dataset, batch_size=hyperparams['batch_size'], shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=hyperparams['batch_size'], shuffle=False)


In [10]:
model = SimpleFCN()
sparse_model = convert_dense_to_sparse_network(model, layers=[model.fc0], device=device)

In [11]:
import wandb

wandb.login()

[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mfedornigretuk[0m to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


True

In [12]:
wandb.finish()
run = wandb.init(
    project="senmodel-phishing",
    name=f"trash",
    config=hyperparams
)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=hyperparams['lr'], weight_decay=1e-4)
train_sparse_recursive(sparse_model, train_loader, train_loader, val_loader, criterion, optimizer, hyperparams, device)

100%|██████████| 36/36 [00:00<00:00, 211.47it/s]


Epoch 1/64, Train Loss: 7077.6168, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 205.40it/s]


Epoch 2/64, Train Loss: 7046.8763, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 221.93it/s]


Epoch 3/64, Train Loss: 7060.1835, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 190.49it/s]


Epoch 4/64, Train Loss: 7074.3368, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 229.10it/s]


Epoch 5/64, Train Loss: 7051.7802, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 204.12it/s]


Epoch 6/64, Train Loss: 7052.6687, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 228.80it/s]


Epoch 7/64, Train Loss: 7077.9003, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 170.90it/s]


Epoch 8/64, Train Loss: 7083.6850, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 206.26it/s]


Epoch 9/64, Train Loss: 7069.4711, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 242.65it/s]


Epoch 10/64, Train Loss: 7066.8008, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 224.66it/s]


Epoch 11/64, Train Loss: 7075.5727, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 239.87it/s]


Epoch 12/64, Train Loss: 7061.4820, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 239.74it/s]


Epoch 13/64, Train Loss: 7065.9682, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 169.09it/s]


Epoch 14/64, Train Loss: 7051.6250, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 212.17it/s]


Epoch 15/64, Train Loss: 7041.2006, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 235.02it/s]


Epoch 16/64, Train Loss: 7074.4011, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 220.56it/s]


Epoch 17/64, Train Loss: 7065.5657, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 150.06it/s]


Epoch 18/64, Train Loss: 7087.9373, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 187.85it/s]


Epoch 19/64, Train Loss: 7101.3749, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 167.31it/s]


Epoch 20/64, Train Loss: 7058.3342, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 202.07it/s]


Epoch 21/64, Train Loss: 7065.6110, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 152.80it/s]


Epoch 22/64, Train Loss: 7083.5790, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 183.15it/s]


Epoch 23/64, Train Loss: 7087.1387, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 223.75it/s]


Epoch 24/64, Train Loss: 7069.6874, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 185.80it/s]


Epoch 25/64, Train Loss: 7065.5377, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 213.63it/s]


Epoch 26/64, Train Loss: 7044.2616, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 191.23it/s]


Epoch 27/64, Train Loss: 7061.8160, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 209.90it/s]


Epoch 28/64, Train Loss: 7078.1976, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 176.51it/s]


Epoch 29/64, Train Loss: 7066.3212, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 182.56it/s]


Epoch 30/64, Train Loss: 7054.1434, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 238.63it/s]


Epoch 31/64, Train Loss: 7065.3712, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 201.01it/s]


Epoch 32/64, Train Loss: 7068.8837, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 229.29it/s]


Epoch 33/64, Train Loss: 7066.9326, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 233.59it/s]


Epoch 34/64, Train Loss: 7075.2984, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 231.19it/s]


Epoch 35/64, Train Loss: 7059.7258, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 189.17it/s]


Epoch 36/64, Train Loss: 7046.7878, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 209.14it/s]


Epoch 37/64, Train Loss: 7058.6000, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 136.60it/s]


Epoch 38/64, Train Loss: 7079.4838, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 200.85it/s]


Epoch 39/64, Train Loss: 7070.6259, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 227.83it/s]


Epoch 40/64, Train Loss: 7075.6765, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 205.80it/s]


Epoch 41/64, Train Loss: 7075.7381, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 196.75it/s]


Epoch 42/64, Train Loss: 7048.6733, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 279.67it/s]


Epoch 43/64, Train Loss: 7056.2525, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 223.78it/s]


Epoch 44/64, Train Loss: 7075.6009, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 196.67it/s]


Epoch 45/64, Train Loss: 7089.8661, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 226.72it/s]


Epoch 46/64, Train Loss: 7051.7108, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 208.38it/s]


Epoch 47/64, Train Loss: 7099.0725, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 202.71it/s]


Epoch 48/64, Train Loss: 7060.1869, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 204.85it/s]


Epoch 49/64, Train Loss: 7060.4485, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 199.01it/s]


Epoch 50/64, Train Loss: 7078.5170, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 223.94it/s]


Epoch 51/64, Train Loss: 7066.0529, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 219.70it/s]


Epoch 52/64, Train Loss: 7070.8763, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 184.58it/s]


Epoch 53/64, Train Loss: 7033.6881, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 220.74it/s]


Epoch 54/64, Train Loss: 7056.0066, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 238.87it/s]


Epoch 55/64, Train Loss: 7065.7869, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 238.70it/s]


Epoch 56/64, Train Loss: 7052.8734, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 227.02it/s]


Epoch 57/64, Train Loss: 7067.3038, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 212.24it/s]


Epoch 58/64, Train Loss: 7054.0096, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 202.41it/s]


Epoch 59/64, Train Loss: 7068.4892, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 228.32it/s]


Epoch 60/64, Train Loss: 7077.5995, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 233.64it/s]


Epoch 61/64, Train Loss: 7054.9407, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 228.70it/s]


Epoch 62/64, Train Loss: 7072.1771, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 201.02it/s]


Epoch 63/64, Train Loss: 7057.0996, Val Loss: 7371.4661, Val Accuracy: 0.4790


100%|██████████| 36/36 [00:00<00:00, 193.67it/s]


Epoch 64/64, Train Loss: 7060.4205, Val Loss: 7371.4661, Val Accuracy: 0.4790
