# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd /content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/src/train/

/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/src/train


In [3]:
!pip install datasets

Collecting datasets
  Downloading datasets-3.6.0-py3-none-any.whl.metadata (19 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Collecting multiprocess<0.70.17 (from datasets)
  Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)
Collecting fsspec<=2025.3.0,>=2023.1.0 (from fsspec[http]<=2025.3.0,>=2023.1.0->datasets)
  Downloading fsspec-2025.3.0-py3-none-any.whl.metadata (11 kB)
Downloading datasets-3.6.0-py3-none-any.whl (491 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m33.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m13.0 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading fsspec-2025.3.0-py3-none-any.whl 

In [4]:
!pip install torchinfo

Collecting torchinfo
  Downloading torchinfo-1.8.0-py3-none-any.whl.metadata (21 kB)
Downloading torchinfo-1.8.0-py3-none-any.whl (23 kB)
Installing collected packages: torchinfo
Successfully installed torchinfo-1.8.0


In [5]:
!pip install tensorboard



In [6]:
import warnings
from sklearn.exceptions import UndefinedMetricWarning
warnings.filterwarnings("ignore", category=UndefinedMetricWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

import os
import torch
import torch.distributed
import numpy as np
import utils
import random
from dataclasses import field, dataclass
from datasets.distributed import split_dataset_by_node
from typing import Optional
from copy import deepcopy
from torchinfo import summary
from torch.distributed.elastic.multiprocessing.errors import record

from transformers import (
    EvalPrediction,
    HfArgumentParser,
    TrainingArguments,
    EarlyStoppingCallback,
)

from sklearn.metrics import (
    f1_score,
    accuracy_score,
    precision_score,
    recall_score,
    top_k_accuracy_score,
    classification_report, confusion_matrix
)

from NetFoundDataCollator import DataCollatorForFlowClassification
from NetFoundModels import NetfoundFinetuningModel, NetfoundNoPTM
from NetFoundTrainer import NetfoundTrainer
from NetfoundConfig import NetfoundConfig, NetFoundTCPOptionsConfig, NetFoundLarge
from NetfoundTokenizer import NetFoundTokenizer

from utils import ModelArguments, CommonDataTrainingArguments, freeze, verify_checkpoint, \
    load_train_test_datasets, get_90_percent_cpu_count, get_logger, init_tbwriter, update_deepspeed_config, \
    LearningRateLogCallback

random.seed(42)
logger = get_logger(name=__name__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [7]:
import torch.optim as optim
import torch.nn as nn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Funtions

In [8]:
@dataclass
class FineTuningDataTrainingArguments(CommonDataTrainingArguments):
    """
    Arguments pertaining to what data we are going to input our model for training and eval.
    """

    num_labels: int = field(metadata={"help": "number of classes in the datasets"}, default=None)
    problem_type: Optional[str] = field(
        default=None,
        metadata={"help": "Override regression or classification task"},
    )
    p_val: float = field(
        default=0,
        metadata={
            "help": "noise rate"
        },
    )
    netfound_large: bool = field(
        default=False,
        metadata={
            "help": "Use the large configuration for netFound model"
        },
    )

In [9]:
def regression_metrics(p: EvalPrediction):
    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    label_ids = p.label_ids.astype(int)
    return {"loss": np.mean(np.absolute((logits - label_ids)))}

In [10]:
def classif_metrics(p: EvalPrediction, num_classes):
    logits = p.predictions[0] if isinstance(p.predictions, tuple) else p.predictions
    label_ids = p.label_ids.astype(int)
    weighted_f1 = f1_score(
        y_true=label_ids, y_pred=logits.argmax(axis=1), average="weighted", zero_division=0
    )
    weighted_prec = precision_score(
        y_true=label_ids, y_pred=logits.argmax(axis=1), average="weighted", zero_division=0
    )
    weighted_recall = recall_score(
        y_true=label_ids, y_pred=logits.argmax(axis=1), average="weighted", zero_division=0
    )
    accuracy = accuracy_score(y_true=label_ids, y_pred=logits.argmax(axis=1))
    logger.warning(classification_report(label_ids, logits.argmax(axis=1), digits=5))
    logger.warning(confusion_matrix(label_ids, logits.argmax(axis=1)))
    if num_classes > 3:
        logger.warning(f"top3:{top_k_accuracy_score(label_ids, logits, k=3, labels=np.arange(num_classes))}")
    if num_classes > 5:
        logger.warning(f"top5:{top_k_accuracy_score(label_ids, logits, k=5, labels=np.arange(num_classes))}")
    if num_classes > 10:
        logger.warning(f"top10:{top_k_accuracy_score(label_ids, logits, k=10, labels=np.arange(num_classes))}")
    return {
        "weighted_f1": weighted_f1,
        "accuracy": accuracy,
        "weighted_prec: ": weighted_prec,
        "weighted_recall": weighted_recall,
    }

# Load Model and Dataset

In [11]:
dataset_folder = r"/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/data/test/test_finetuning/final/combined/"
pretrained_model_path = r"/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/models/test/original_pretraining/pretrained_model/"
output_dir = r"/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/models/test/seperate_finetuning/"

In [12]:
parser = HfArgumentParser((ModelArguments, FineTuningDataTrainingArguments, TrainingArguments))
model_args, data_args, training_args = parser.parse_args_into_dataclasses(args=["--train_dir", dataset_folder, "--test_dir", dataset_folder, "--model_name_or_path", pretrained_model_path, "--output_dir", output_dir, "--report_to", "tensorboard", "--overwrite_output_dir", "--save_safetensors", "false", "--do_train", "--do_eval", "--eval_strategy", "epoch", "--save_strategy", "epoch", "--learning_rate", "0.01", "--num_train_epochs", "1", "--problem_type", "single_label_classification", "--num_labels", "6", "--load_best_model_at_end", "--netfound_large", "True"])
# utils.LOGGING_LEVEL = training_args.get_process_log_level()
utils.LOGGING_LEVEL = 10
logger.setLevel(10)

In [13]:
logger.info(f"model_args: {model_args}")
logger.info(f"data_args: {data_args}")
logger.info(f"training_args: {training_args}")

model_args: ModelArguments(model_name_or_path='/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/models/test/original_pretraining/pretrained_model/', metaFeatures=4, num_hidden_layers=12, num_attention_heads=12, hidden_size=768, no_ptm=False, freeze_flow_encoder=False, freeze_burst_encoder=False, freeze_embeddings=False, freeze_base=False)
INFO:__main__:model_args: ModelArguments(model_name_or_path='/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/models/test/original_pretraining/pretrained_model/', metaFeatures=4, num_hidden_layers=12, num_attention_heads=12, hidden_size=768, no_ptm=False, freeze_flow_encoder=False, freeze_burst_encoder=False, freeze_embeddings=False, freeze_base=False)
data_args: FineTuningDataTrainingArguments(train_dir='/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/data/test/test_finetuning/final/combined/', test_dir='/content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/data/test/test_finetuning/final/combined/', no_met

In [14]:
train_dataset, test_dataset = load_train_test_datasets(logger, data_args)

Loading datasets


Generating train split: 0 examples [00:00, ? examples/s]

In [15]:
len(train_dataset), len(test_dataset)

(1803, 1803)

In [16]:
config = NetFoundTCPOptionsConfig if data_args.tcpoptions else NetfoundConfig

In [17]:
config = config(num_hidden_layers=model_args.num_hidden_layers, num_attention_heads=model_args.num_attention_heads,
        hidden_size=model_args.hidden_size, no_meta=data_args.no_meta, flat=data_args.flat)

In [18]:
if data_args.netfound_large:
    config.hidden_size = NetFoundLarge().hidden_size
    config.num_hidden_layers = NetFoundLarge().num_hidden_layers
    config.num_attention_heads = NetFoundLarge().num_attention_heads

In [19]:
config.pretraining = False
config.num_labels = data_args.num_labels
config.problem_type = data_args.problem_type
testingTokenizer = NetFoundTokenizer(config=config)

In [20]:
training_config = deepcopy(config)
training_config.p = data_args.p_val
training_config.limit_bursts = data_args.limit_bursts
trainingTokenizer = NetFoundTokenizer(config=training_config)
additionalFields = None

In [21]:
params = {"batched": True}

In [22]:
train_dataset = train_dataset.map(function=trainingTokenizer, **params)
test_dataset = test_dataset.map(function=testingTokenizer, **params)

Map:   0%|          | 0/1803 [00:00<?, ? examples/s]

In [23]:
data_collator = DataCollatorForFlowClassification(config.max_burst_length)

In [24]:
logger.warning(f"Using weights from {model_args.model_name_or_path}")

Using weights from /content/drive/MyDrive/COL867/NetFound/Shiva_Folder/netFound/models/test/original_pretraining/pretrained_model/


In [25]:
original_model = NetfoundFinetuningModel.from_pretrained(model_args.model_name_or_path, config=config)

You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [26]:
model = freeze(original_model, model_args)

# Infer Model

In [27]:
saved_weights = os.path.join(output_dir, f'model_epoch_latest.pth')

In [28]:
weights = torch.load(saved_weights)
model.load_state_dict(weights)

<All keys matched successfully>

In [29]:
for x in train_dataset:
    break

In [30]:
px = data_collator([x])

In [31]:
py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'], input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])

In [32]:
py.logits

tensor([[ 0.0150, -0.1482,  0.2586,  0.0146, -0.0980, -0.1117]],
       grad_fn=<AddmmBackward0>)

# Train Model

In [33]:
model = model.to(device)

In [34]:
device

device(type='cuda')

In [35]:
model = model.train()

In [36]:
req_keys = ['labels', 'protocol', 'flow_duration', 'bytes', 'iats', 'input_ids', 'attention_mask', 'direction', 'pkt_count', 'total_bursts']

In [37]:
for p in model.base_transformer.parameters():
    p.requires_grad = False
for p in model.attentivePooling.parameters():
    p.requires_grad = False
for p in model.dropout.parameters():
    p.requires_grad = True
for p in model.hiddenLayer.parameters():
    p.requires_grad = True
for p in model.hiddenLayer2.parameters():
    p.requires_grad = True
for p in model.classifier.parameters():
    p.requires_grad = True

In [48]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.0001, momentum=0.9)
batch_size = 64

In [39]:
train_len = len(train_dataset)
train_inds = np.arange(train_len)
np.random.shuffle(train_inds)
train_inds = train_inds.tolist()

In [40]:
xl = []
for i, ind in enumerate(train_inds):
  x = train_dataset[ind]
  if i%batch_size != batch_size - 1:
    xx = {k: x[k] for k in x if k in req_keys}
    xl.append(xx)
    continue

  px = data_collator(xl)
  print(px['labels'])
  xl = []

tensor([0, 1, 0, 3, 3, 1, 3, 4, 4, 2, 0, 2, 2, 4, 4])
tensor([3, 4, 3, 5, 5, 5, 2, 0, 0, 3, 2, 3, 1, 1, 1])
tensor([3, 0, 4, 4, 2, 2, 3, 0, 1, 3, 4, 5, 3, 4, 0])
tensor([5, 2, 4, 5, 4, 2, 3, 1, 2, 1, 0, 3, 2, 2, 0])
tensor([0, 0, 2, 1, 2, 5, 4, 5, 1, 5, 0, 4, 4, 1, 4])
tensor([0, 3, 4, 5, 3, 4, 4, 5, 3, 3, 2, 3, 3, 0, 3])
tensor([1, 2, 1, 0, 1, 3, 0, 1, 0, 5, 1, 1, 4, 4, 3])
tensor([0, 2, 1, 5, 5, 1, 2, 1, 1, 0, 3, 5, 4, 0, 0])
tensor([1, 4, 2, 3, 5, 3, 2, 1, 4, 3, 5, 0, 4, 3, 2])
tensor([0, 2, 5, 4, 0, 3, 3, 2, 2, 4, 4, 4, 2, 0, 4])
tensor([3, 5, 3, 5, 2, 4, 2, 0, 2, 5, 1, 1, 0, 2, 5])
tensor([4, 3, 3, 2, 4, 3, 4, 1, 5, 4, 2, 5, 5, 4, 2])
tensor([2, 5, 5, 5, 4, 2, 5, 0, 5, 4, 3, 2, 3, 0, 5])
tensor([1, 1, 5, 4, 1, 1, 3, 3, 4, 1, 0, 3, 1, 2, 1])
tensor([1, 2, 3, 0, 5, 3, 3, 1, 4, 0, 2, 2, 1, 3, 5])
tensor([0, 0, 1, 1, 0, 4, 2, 0, 5, 5, 0, 4, 0, 3, 2])
tensor([2, 1, 1, 5, 2, 2, 4, 1, 1, 5, 2, 3, 4, 1, 2])
tensor([2, 1, 2, 1, 3, 4, 3, 4, 4, 2, 5, 5, 5, 2, 3])
tensor([4, 5, 4, 0, 2, 3, 1,

In [41]:
# py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'], input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
# loss = criterion(py.logits, px['labels'])
# loss.backward()
# optimizer.step()

In [None]:
for epoch in range(1):  # loop over the dataset multiple times
    running_loss = 0.0
    running_count = 0
    xl = []
    # for i, x in enumerate(train_dataset, 0):
    for i, ind in enumerate(train_inds, 0):
      x = train_dataset[ind]
      try:
        # get the inputs; data is a list of [inputs, labels]
        if i%batch_size != batch_size - 1:
          xx = {k: x[k] for k in x if k in req_keys}
          xl.append(xx)
          continue

        px = data_collator(xl)
        px = {a: b.to(device) for a,b in px.items()}
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
                   input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
        loss = criterion(py.logits, px['labels'])
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        running_count += 1
        xl = []
        # if i % (batch_size - 1)*10 == 9:
        print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / running_count:.3f} loss count: {running_count}')
        running_loss = 0.0
        running_count = 0
      except Exception as e:
        print(i, e)
        break

print('Finished Training')

[1,    16] loss: 1.766 loss count: 1
[1,    32] loss: 1.791 loss count: 1
[1,    48] loss: 1.831 loss count: 1
[1,    64] loss: 1.831 loss count: 1
[1,    80] loss: 1.773 loss count: 1
[1,    96] loss: 1.774 loss count: 1
[1,   112] loss: 1.796 loss count: 1
[1,   128] loss: 1.791 loss count: 1
[1,   144] loss: 1.714 loss count: 1
[1,   160] loss: 1.863 loss count: 1
[1,   176] loss: 1.842 loss count: 1
[1,   192] loss: 1.771 loss count: 1
[1,   208] loss: 1.774 loss count: 1
[1,   224] loss: 1.698 loss count: 1
[1,   240] loss: 1.817 loss count: 1
[1,   256] loss: 1.809 loss count: 1
[1,   272] loss: 1.792 loss count: 1
[1,   288] loss: 1.748 loss count: 1
[1,   304] loss: 1.798 loss count: 1
[1,   320] loss: 1.864 loss count: 1
[1,   336] loss: 1.856 loss count: 1
[1,   352] loss: 1.793 loss count: 1
[1,   368] loss: 1.848 loss count: 1
[1,   384] loss: 1.815 loss count: 1
[1,   400] loss: 1.740 loss count: 1
[1,   416] loss: 1.816 loss count: 1
[1,   432] loss: 1.819 loss count: 1
[

In [42]:
ground_truths = []
predictions = []
# since we're not training, we don't need to calculate the gradients for our outputs
xl = []
with torch.no_grad():
    for i, x in enumerate(test_dataset, 0):
      try:
        # get the inputs; data is a list of [inputs, labels]
        if i%batch_size != batch_size - 1:
          xx = {k: x[k] for k in x if k in req_keys}
          xl.append(xx)
          continue

        px = data_collator(xl)
        px = {a: b.to(device) for a,b in px.items()}

        # forward + backward + optimize
        py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
                   input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
        # the class with the highest energy is what we choose as prediction
        labels = px['labels']
        outputs = py.logits
        _, predicted = torch.max(outputs, 1)
        ground_truths += labels.cpu().numpy().tolist()
        predictions += predicted.cpu().numpy().tolist()
        xl = []
      except Exception as e:
        print(i, e)
        pass

# Calculate accuracy
accuracy = accuracy_score(ground_truths, predictions)
print('Accuracy: %.3f' % accuracy)

# Calculate precision
precision = precision_score(ground_truths, predictions, average='weighted')
print('Precision: %.3f' % precision)

# Calculate recall
recall = recall_score(ground_truths, predictions, average='weighted')
print('Recall: %.3f' % recall)

Accuracy: 0.188
Precision: 0.316
Recall: 0.188


In [43]:
print(classification_report(ground_truths, predictions, target_names=['0', '1', '2', '3', '4', '5']))

              precision    recall  f1-score   support

           0       0.20      0.06      0.10       274
           1       0.21      0.01      0.03       274
           2       0.18      0.93      0.30       294
           3       0.20      0.05      0.08       269
           4       0.50      0.00      0.01       285
           5       0.58      0.02      0.05       284

    accuracy                           0.19      1680
   macro avg       0.31      0.18      0.09      1680
weighted avg       0.32      0.19      0.09      1680



In [44]:
# torch.save(model.state_dict(), os.path.join(output_dir, 'model_epoch_0.pth'))

In [45]:
prev_accuracy = accuracy

In [None]:
for epoch in range(1, 50):  # loop over the dataset multiple times
    running_loss = 0.0
    running_count = 0
    xl = []
    # for i, x in enumerate(train_dataset, 0):
    for i, ind in enumerate(train_inds, 0):
      x = train_dataset[ind]
      try:
        # get the inputs; data is a list of [inputs, labels]
        if i%batch_size != batch_size - 1:
          xx = {k: x[k] for k in x if k in req_keys}
          xl.append(xx)
          continue

        px = data_collator(xl)
        px = {a: b.to(device) for a,b in px.items()}
        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
                   input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
        loss = criterion(py.logits, px['labels'])
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        running_count += 1
        xl = []
        # if i % (batch_size - 1)*10 == 9:
      except Exception as e:
        print(i, e)
        break
    print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / running_count:.3f} loss count: {running_count}')

    ground_truths = []
    predictions = []
    # since we're not training, we don't need to calculate the gradients for our outputs
    xl = []
    with torch.no_grad():
        for i, x in enumerate(test_dataset, 0):
          try:
            # get the inputs; data is a list of [inputs, labels]
            if i%batch_size != batch_size - 1:
              xx = {k: x[k] for k in x if k in req_keys}
              xl.append(xx)
              continue

            px = data_collator(xl)
            px = {a: b.to(device) for a,b in px.items()}

            # forward + backward + optimize
            py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
                      input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
            # the class with the highest energy is what we choose as prediction
            labels = px['labels']
            outputs = py.logits
            _, predicted = torch.max(outputs, 1)
            ground_truths += labels.cpu().numpy().tolist()
            predictions += predicted.cpu().numpy().tolist()
            xl = []
          except Exception as e:
            print(i, e)
            pass

    # Calculate accuracy
    accuracy = accuracy_score(ground_truths, predictions)
    print('Epoch: %d, Accuracy: %.3f' % (epoch, accuracy))

    # Calculate precision
    precision = precision_score(ground_truths, predictions, average='weighted')
    print('Epoch: %d, Precision: %.3f' % (epoch, precision))

    # Calculate recall
    recall = recall_score(ground_truths, predictions, average='weighted')
    print('Epoch: %d, Recall: %.3f' % (epoch, recall))

    print("Epoch: %d"%epoch)
    print(classification_report(ground_truths, predictions, target_names=['0', '1', '2', '3', '4', '5']))

    if prev_accuracy < accuracy:
      prev_accuracy = accuracy
      torch.save(model.state_dict(), os.path.join(output_dir, f'model_epoch_latest.pth'))
      print("Stored:", epoch)
print('Finished Training')

[2,  1803] loss: 1.787 loss count: 28
Epoch: 1, Accuracy: 0.200
Epoch: 1, Precision: 0.229
Epoch: 1, Recall: 0.200
Epoch: 1
              precision    recall  f1-score   support

           0       0.16      0.07      0.10       288
           1       0.26      0.08      0.12       287
           2       0.19      0.68      0.30       309
           3       0.21      0.06      0.09       283
           4       0.22      0.23      0.22       299
           5       0.33      0.05      0.09       298

    accuracy                           0.20      1764
   macro avg       0.23      0.19      0.15      1764
weighted avg       0.23      0.20      0.16      1764

Stored: 1
[3,  1803] loss: 1.786 loss count: 28
Epoch: 2, Accuracy: 0.189
Epoch: 2, Precision: 0.186
Epoch: 2, Recall: 0.189
Epoch: 2
              precision    recall  f1-score   support

           0       0.19      0.12      0.15       288
           1       0.18      0.09      0.12       287
           2       0.19      0.46   

In [None]:
# for epoch in range(1, 10):  # loop over the dataset multiple times
#     running_loss = 0.0
#     running_count = 0
#     xl = []
#     # for i, x in enumerate(train_dataset, 0):
#     for i, ind in enumerate(train_inds, 0):
#       x = train_dataset[ind]
#       try:
#         # get the inputs; data is a list of [inputs, labels]
#         if i%batch_size != batch_size - 1:
#           xx = {k: x[k] for k in x if k in req_keys}
#           xl.append(xx)
#           continue

#         px = data_collator(xl)
#         px = {a: b.to(device) for a,b in px.items()}
#         # zero the parameter gradients
#         optimizer.zero_grad()

#         # forward + backward + optimize
#         py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
#                    input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
#         loss = criterion(py.logits, px['labels'])
#         loss.backward()
#         optimizer.step()

#         # print statistics
#         running_loss += loss.item()
#         running_count += 1
#         xl = []
#         # if i % (batch_size - 1)*10 == 9:
#       except Exception as e:
#         print(i, e)
#         break
#     print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / running_count:.3f} loss count: {running_count}')

#     ground_truths = []
#     predictions = []
#     # since we're not training, we don't need to calculate the gradients for our outputs
#     xl = []
#     with torch.no_grad():
#         for i, x in enumerate(test_dataset, 0):
#           try:
#             # get the inputs; data is a list of [inputs, labels]
#             if i%batch_size != batch_size - 1:
#               xx = {k: x[k] for k in x if k in req_keys}
#               xl.append(xx)
#               continue

#             px = data_collator(xl)
#             px = {a: b.to(device) for a,b in px.items()}

#             # forward + backward + optimize
#             py = model(labels = px['labels'], protocol = px['protocol'], flow_duration = px['flow_duration'], bytes = px['bytes'], iats = px['iats'],
#                       input_ids = px['input_ids'], attention_mask = px['attention_mask'], direction = px['direction'], pkt_count = px['pkt_count'])
#             # the class with the highest energy is what we choose as prediction
#             labels = px['labels']
#             outputs = py.logits
#             _, predicted = torch.max(outputs, 1)
#             ground_truths += labels.cpu().numpy().tolist()
#             predictions += predicted.cpu().numpy().tolist()
#             xl = []
#           except Exception as e:
#             print(i, e)
#             pass

#     # Calculate accuracy
#     accuracy = accuracy_score(ground_truths, predictions)
#     print('Epoch: %d, Accuracy: %.3f' % (epoch, accuracy))

#     # Calculate precision
#     precision = precision_score(ground_truths, predictions, average='weighted')
#     print('Epoch: %d, Precision: %.3f' % (epoch, precision))

#     # Calculate recall
#     recall = recall_score(ground_truths, predictions, average='weighted')
#     print('Epoch: %d, Recall: %.3f' % (epoch, recall))

#     print("Epoch: %d"%epoch)
#     print(classification_report(ground_truths, predictions, target_names=['0', '1', '2', '3', '4', '5']))

#     if prev_accuracy < accuracy:
#       prev_accuracy = accuracy
#       torch.save(model.state_dict(), os.path.join(output_dir, f'model_epoch_latest.pth'))
#       print("Stored:", epoch)
# print('Finished Training')

[2,  1803] loss: 1.795 loss count: 112
Epoch: 1, Accuracy: 0.184
Epoch: 1, Precision: 0.230
Epoch: 1, Recall: 0.184
Epoch: 1
              precision    recall  f1-score   support

           0       0.24      0.05      0.08       274
           1       0.33      0.01      0.03       274
           2       0.18      0.96      0.30       294
           3       0.17      0.01      0.01       269
           4       0.00      0.00      0.00       285
           5       0.47      0.02      0.05       284

    accuracy                           0.18      1680
   macro avg       0.23      0.18      0.08      1680
weighted avg       0.23      0.18      0.08      1680

Stored: 1
[3,  1803] loss: 1.793 loss count: 112
Epoch: 2, Accuracy: 0.177
Epoch: 2, Precision: 0.132
Epoch: 2, Recall: 0.177
Epoch: 2
              precision    recall  f1-score   support

           0       0.18      0.05      0.07       274
           1       0.09      0.00      0.01       274
           2       0.18      0.92 

In [None]:
from google.colab import runtime
runtime.unassign()