In [1]:
import pandas as pd
import torch
import numpy as np
from transformers import AutoConfig, AutoModelForSequenceClassification
from transformers import CanineForSequenceClassification, CanineTokenizer, BertTokenizer
from torch.utils.data import Dataset
from datasets import load_metric
from sklearn.metrics import roc_auc_score
from scipy.special import softmax
from transformers import TrainingArguments
from transformers import Trainer
from sklearn.utils import resample
import torch.nn as nn

In [2]:
def compute_metrics(eval_pred):
    precision = load_metric("precision")
    recall = load_metric("recall")
    f1 = load_metric("f1")
    acc = load_metric("accuracy")
    mcc = load_metric("matthews_correlation")
    #auc = load_metric("auc")
    
    logits, labels = eval_pred
    predictions = np.argmax(logits, axis=-1)
    precision = precision.compute(predictions=predictions, average = "macro", references=labels)["precision"]
    recall = recall.compute(predictions=predictions, average = "macro", references=labels)["recall"]
    f1 = f1.compute(predictions=predictions, average = "macro", references=labels)["f1"]
    acc = acc.compute(predictions=predictions, references=labels)["accuracy"]
    mcc = mcc.compute(predictions=predictions, references=labels)["matthews_correlation"]
    auc = roc_auc_score(labels, softmax(logits, axis=1), multi_class='ovo', average='macro')
    return {"precision": precision, "recall": recall, "acc": acc, "mcc": mcc, "f1": f1, "auc":auc}

In [3]:
class MalwareDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

In [4]:
class RandomTransformerClassifier:
    def __init__(self, trainingset, validationset, testset, num_classes, epochs=10, batch_size=8, model_name='google/canine-s', max_sequence_length=2048):
        self.trainingset = trainingset
        self.validationset = validationset
        self.testset = testset
        self.num_classes = num_classes
        self.model_name = model_name
        if 'google/canine' == model_name.split('-')[0]:
            self.tokenizer = CanineTokenizer.from_pretrained(self.model_name)
        else:
            self.tokenizer = BertTokenizer.from_pretrained(self.model_name)
            
        self.tokenizer.model_max_length = max_sequence_length
        self.base_model = AutoModelForSequenceClassification.from_pretrained(self.model_name, num_labels=num_classes)
        self.training_args = TrainingArguments(
        output_dir='./results',          # output directory
        #do_train=True,
        do_eval=True,
        evaluation_strategy='epoch',
        save_strategy='epoch',
        load_best_model_at_end=True,
        metric_for_best_model='eval_auc',
        greater_is_better=True,
        fp16=True,
        num_train_epochs=epochs,              # total number of training epochs
        per_device_train_batch_size=batch_size,  # batch size per device during training
        per_device_eval_batch_size=batch_size,   # batch size for evaluation
        lr_scheduler_type='cosine',
        warmup_steps=500,                # number of warmup steps for learning rate scheduler
        weight_decay=0.01,               # strength of weight decay
        dataloader_num_workers=16,
        #logging_dir='./logs',            # directory for storing logs
        #logging_steps=10,
    )
        
    def fit(self, n_estimators=2):
        self.validation_preds = []
        self.test_preds = []
        for i in range(n_estimators):
            #bagging_trainset = self.trainingset.sample(frac=1, replace=True)
            bagging_trainset = resample(self.trainingset, replace=True, stratify=self.trainingset['class'])
            train_encodings = self.tokenizer(self.get_list_strs(bagging_trainset.api), padding="max_length", truncation=True, return_tensors="pt")
            val_encodings = self.tokenizer(self.get_list_strs(self.validationset.api), padding="max_length", truncation=True, return_tensors="pt")
            test_encodings = self.tokenizer(self.get_list_strs(self.testset.api), padding="max_length", truncation=True, return_tensors="pt")
            
            trainset = MalwareDataset(train_encodings, bagging_trainset['class'].values)
            valset = MalwareDataset(val_encodings, self.validationset['class'].values)
            testset = MalwareDataset(test_encodings, self.testset['class'].values)
            
            trainer = WeightedLossTrainer(
                model=self.base_model, args=self.training_args, train_dataset=trainset, eval_dataset=valset,
                compute_metrics=compute_metrics
            )
            
            trainer.train()
            
            val_preds = trainer.predict(valset)
            test_preds = trainer.predict(testset)
            self.validation_preds.append(val_preds)
            self.test_preds.append(test_preds)
            del bagging_trainset
            del train_encodings
            del val_encodings
            del test_encodings
            del trainset
            del valset
            del testset
            del trainer
            torch.cuda.empty_cache()
            print(f'{i + 1}. estimator is done....')
    def get_preds(self):
        return self.validation_preds, self.test_preds
    
    def get_metrics(self):
        #np.argmax((softmax(val[0].predictions, axis=1) + softmax(val[1].predictions, axis=1))/2, axis=1)
        val_preds = np.zeros_like(self.validation_preds[0].predictions)
        for val in self.validation_preds:
            val_preds += softmax(val.predictions, axis=1)
        
        val_logits = val_preds / len(self.validation_preds)
        val_preds = np.argmax(val_preds / len(self.validation_preds), axis=1)
        
        test_preds = np.zeros_like(self.test_preds[0].predictions)
        for test in self.test_preds:
            test_preds += softmax(test.predictions, axis=1)
        
        test_logits = test_preds / len(self.test_preds)
        test_preds = np.argmax(test_preds / len(self.test_preds), axis=1)
        precision = load_metric("precision")
        recall = load_metric("recall")
        f1 = load_metric("f1")
        acc = load_metric("accuracy")
        mcc = load_metric("matthews_correlation")
    
        val_precision = precision.compute(predictions=val_preds, average = "macro", references=self.validationset["class"].values)["precision"]
        val_recall = recall.compute(predictions=val_preds, average = "macro", references=self.validationset["class"].values)["recall"]
        val_f1 = f1.compute(predictions=val_preds, average = "macro", references=self.validationset["class"].values)["f1"]
        val_acc = acc.compute(predictions=val_preds, references=self.validationset["class"].values)["accuracy"]
        val_mcc = mcc.compute(predictions=val_preds, references=self.validationset["class"].values)["matthews_correlation"]
        val_auc = roc_auc_score(self.validationset["class"].values, softmax(val_logits, axis=1), multi_class='ovo', average='macro')
        
        test_precision = precision.compute(predictions=test_preds, average = "macro", references=self.testset["class"].values)["precision"]
        test_recall = recall.compute(predictions=test_preds, average = "macro", references=self.testset["class"].values)["recall"]
        test_f1 = f1.compute(predictions=test_preds, average = "macro", references=self.testset["class"].values)["f1"]
        test_acc = acc.compute(predictions=test_preds, references=self.testset["class"].values)["accuracy"]
        test_mcc = mcc.compute(predictions=test_preds, references=self.testset["class"].values)["matthews_correlation"]
        test_auc = roc_auc_score(self.testset["class"].values, softmax(test_logits, axis=1), multi_class='ovo', average='macro')

        return {"val_precision": val_precision, "val_recall": val_recall, "val_acc": val_acc, "val_mcc": val_mcc, "val_f1": val_f1, "val_auc":val_auc}, {"test_precision": test_precision, "test_recall": test_recall, "test_acc": test_acc, "test_mcc": test_mcc, "test_f1": test_f1, "test_auc":test_auc}
        

        
    def get_list_strs(self, df):
        lst_str = []
        for i in range(len(df)):
            str_ = df.values[i]
            lst_str.append(str_)
        return lst_str
    

In [5]:
malware_calls_df = pd.read_csv("../datasets/GraphApiNames.csv")

In [6]:
malware_calls_df.columns = ["api", "class"]

In [7]:
malware_calls_df.head()

Unnamed: 0,api,class
0,NtClose NtOpenKey NtQueryValueKey NtClose NtOp...,Downloader
1,GetSystemTimeAsFileTime NtAllocateVirtualMemor...,Downloader
2,SetErrorMode OleInitialize LdrGetDllHandle Ldr...,Downloader
3,SetErrorMode OleInitialize LdrGetDllHandle Ldr...,Downloader
4,GetSystemTimeAsFileTime NtAllocateVirtualMemor...,Downloader


In [8]:
malware_calls_df["class"].value_counts()

Trojan        31979
Adware         5444
Downloader     1948
Ransomware      404
Agent           220
Riskware        216
Backdoor        135
Dropper         118
Virus           102
Name: class, dtype: int64

In [9]:
malware_calls_df = malware_calls_df[malware_calls_df["class"] != "Undefined"]

In [10]:
CAT2IDX = {
    'Trojan': 0,
    'Adware': 1,
    'Downloader': 2,
    'Ransomware': 3,
    'Agent': 4,
    'Riskware': 5,
    'Backdoor': 6,
    'Dropper': 7,
    'Virus': 8
}

IDX2CAT = {
    0:'Trojan',
    1:'Adware',
    2:'Downloader',
    3:'Ransomware',
    4:'Agent',
    5:'Riskware',
    6:'Backdoor',
    7:'Dropper',
    8:'Virus'   
}

In [11]:
malware_calls_df['class'] = malware_calls_df['class'].map(lambda x: CAT2IDX[x])

In [12]:
from sklearn.model_selection import train_test_split
X_train, X_test = train_test_split(malware_calls_df,
test_size=0.2, random_state=75, stratify = malware_calls_df['class'])

In [13]:
X_train.head()

Unnamed: 0,api,class
32871,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0
37975,NtAllocateVirtualMemory SetErrorMode LoadStrin...,0
2269,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,1
8251,NtDuplicateObject LdrGetDllHandle LdrGetProced...,5
30750,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0


In [14]:
X_train, X_val = train_test_split(X_train, test_size=0.2, random_state=75, stratify = X_train['class'])

In [15]:
X_train.head()

Unnamed: 0,api,class
34919,GetSystemTimeAsFileTime GetSystemInfo NtCreate...,0
32251,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0
39529,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0
35779,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0
16297,GetSystemTimeAsFileTime LdrLoadDll LdrGetProce...,0


In [16]:
class_weights = (1 - (malware_calls_df['class'].value_counts().sort_index() / len(malware_calls_df))).values
class_weights = torch.from_numpy(class_weights).float().to("cuda")
class_weights

tensor([0.2117, 0.8658, 0.9520, 0.9900, 0.9946, 0.9947, 0.9967, 0.9971, 0.9975],
       device='cuda:0')

In [17]:
from transformers import Trainer
class WeightedLossTrainer(Trainer):
    def compute_loss(self, model, inputs, return_outputs=False):
        outputs = model(**inputs)
        logits = outputs.get("logits")
        labels = inputs.get("labels")
        loss_func = nn.CrossEntropyLoss(weight=class_weights)
        loss = loss_func(logits, labels)
        return (loss, outputs) if return_outputs else loss

In [18]:
classifier = RandomTransformerClassifier(X_train, X_val, X_test, num_classes=9, 
                                         epochs=20, batch_size=16, max_sequence_length=256) 
                                         #model_name = "bert-base-cased")
                                         
                                        

Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Using unk_token, but it is not set yet.
Some weights of CanineForSequenceClassification were not initialized from the model checkpoint at google/canine-s and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [19]:
classifier.fit(n_estimators=2)

Using amp fp16 backend
***** Running training *****
  Num examples = 25961
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 16240
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


Epoch,Training Loss,Validation Loss,Precision,Recall,Acc,Mcc,F1,Auc
1,1.1429,0.913344,0.332996,0.3178,0.849484,0.543858,0.322848,0.712092
2,0.8387,0.828855,0.338979,0.3234,0.854876,0.559974,0.328908,0.791609
3,0.8073,0.859531,0.331535,0.324883,0.846403,0.525956,0.323682,0.758102
4,0.7638,0.806887,0.421527,0.363689,0.849792,0.553845,0.371967,0.779911
5,0.7729,0.822006,0.427269,0.365184,0.844708,0.543453,0.342339,0.785783
6,0.7547,0.829104,0.384056,0.341528,0.84024,0.53873,0.349359,0.783549
7,0.7402,0.81565,0.377638,0.375344,0.849638,0.555806,0.374757,0.804333
8,0.7339,0.795936,0.497856,0.388075,0.852873,0.564969,0.411063,0.804346
9,0.714,0.808915,0.493319,0.397549,0.810815,0.500507,0.413582,0.807342
10,0.6962,0.816007,0.441617,0.396021,0.849022,0.559135,0.407357,0.80381


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-812
Configuration saved in ./results/checkpoint-812/config.json
Model weights saved in ./results/checkpoint-812/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-1624
Configuration saved in ./results/checkpoint-1624/config.json
Model weights saved in ./results/checkpoint-1624/pytorch_model.bin
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-2436
Configuration saved in ./results/checkpoint-2436/config.json
Model weights saved in ./results/checkpoint-2436/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-3248
Configuration saved in ./results/checkpoint-3248/config.json
Model weights saved in ./results/checkpoint-3248/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-4060
Configuration saved in ./results/checkpoint-4060/config.json
Model weights saved in ./results/checkpoint-4060/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-4872
Configuration saved in ./results/checkpoint-4872/config.json
Model weights saved in ./results/checkpoint-4872/pytorch_model.bin
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-5684
Configuration saved in ./results/checkpoint-5684/config.json
Model weights saved in ./results/checkpoint-5684/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-6496
Configuration saved in ./results/checkpoint-6496/config.json
Model weights saved in ./results/checkpoint-6496/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-7308
Configuration saved in ./results/checkpoint-7308/config.json
Model weights saved in ./results/checkpoint-7308/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-8120
Configuration saved in ./results/checkpoint-8120/config.json
Model weights saved in ./results/checkpoint-8120/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-8932
Configuration saved in ./results/checkpoint-8932/config.json
Model weights saved in ./results/checkpoint-8932/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-9744
Configuration saved in ./results/checkpoint-9744/config.json
Model weights saved in ./results/checkpoint-9744/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-10556
Configuration saved in ./results/checkpoint-10556/config.json
Model weights saved in ./results/checkpoint-10556/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-11368
Configuration saved in ./results/checkpoint-11368/config.json
Model weights saved in ./results/checkpoint-11368/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


Saving model checkpoint to ./results/checkpoint-12180
Configuration saved in ./results/checkpoint-12180/config.json
Model weights saved in ./results/checkpoint-12180/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-12992
Configuration saved in ./results/checkpoint-12992/config.json
Model weights saved in ./results/checkpoint-12992/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-13804
Configuration saved in ./results/checkpoint-13804/config.json
Model weights saved in ./results/checkpoint-13804/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-14616
Configuration saved in ./results/checkpoint-14616/config.json
Model weights saved in ./results/checkpoint-14616/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-15428
Configuration saved in ./results/checkpoint-15428/config.json
Model weights saved in ./results/checkpoint-15428/pytorch_model.bin
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-16240
Configuration saved in ./results/checkpoint-16240/config.json
Model weights saved in ./results/checkpoint-16240/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-16240 (score: 0.8107783687195773).
***** Running Prediction *****
  Num examples = 6491
  Batch size = 32
  import sys


***** Running Prediction *****
  Num examples = 8114
  Batch size = 32
  import sys


1. estimator is done....


Using amp fp16 backend
***** Running training *****
  Num examples = 25961
  Num Epochs = 20
  Instantaneous batch size per device = 16
  Total train batch size (w. parallel, distributed & accumulation) = 32
  Gradient Accumulation steps = 1
  Total optimization steps = 16240
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


Epoch,Training Loss,Validation Loss,Precision,Recall,Acc,Mcc,F1,Auc
1,0.7372,0.81689,0.454557,0.35646,0.810507,0.492608,0.364678,0.792913
2,0.7538,0.795573,0.397143,0.39738,0.848714,0.557707,0.395627,0.814021
3,0.7385,0.822908,0.423172,0.386371,0.81821,0.51146,0.388208,0.816509
4,0.7477,0.813141,0.447692,0.405299,0.852719,0.564988,0.419418,0.811175
5,0.7436,0.812833,0.381866,0.391077,0.851487,0.56359,0.38274,0.81454
6,0.7293,0.80968,0.488082,0.425316,0.844092,0.552197,0.424165,0.811237
7,0.7135,0.840898,0.415459,0.418774,0.816515,0.510295,0.411876,0.796196
8,0.6978,0.825391,0.406023,0.422755,0.816515,0.510584,0.410616,0.815592
9,0.7018,0.844548,0.458763,0.43324,0.840086,0.542131,0.435401,0.804777
10,0.6972,0.836155,0.435737,0.415145,0.847173,0.554318,0.417439,0.813535


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  _warn_prf(average, modifier, msg_start, len(result))
Saving model checkpoint to ./results/checkpoint-812
Configuration saved in ./results/checkpoint-812/config.json
Model weights saved in ./results/checkpoint-812/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-1624
Configuration saved in ./results/checkpoint-1624/config.json
Model weights saved in ./results/checkpoint-1624/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-2436
Configuration saved in ./results/checkpoint-2436/config.json
Model weights saved in ./results/checkpoint-2436/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-3248
Configuration saved in ./results/checkpoint-3248/config.json
Model weights saved in ./results/checkpoint-3248/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-4060
Configuration saved in ./results/checkpoint-4060/config.json
Model weights saved in ./results/checkpoint-4060/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-4872
Configuration saved in ./results/checkpoint-4872/config.json
Model weights saved in ./results/checkpoint-4872/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-5684
Configuration saved in ./results/checkpoint-5684/config.json
Model weights saved in ./results/checkpoint-5684/pytorch_model.bin
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-6496
Configuration saved in ./results/checkpoint-6496/config.json
Model weights saved in ./results/checkpoint-6496/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-7308
Configuration saved in ./results/checkpoint-7308/config.json
Model weights saved in ./results/checkpoint-7308/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-8120
Configuration saved in ./results/checkpoint-8120/config.json
Model weights saved in ./results/checkpoint-8120/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-8932
Configuration saved in ./results/checkpoint-8932/config.json
Model weights saved in ./results/checkpoint-8932/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-9744
Configuration saved in ./results/checkpoint-9744/config.json
Model weights saved in ./results/checkpoint-9744/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-10556
Configuration saved in ./results/checkpoint-10556/config.json
Model weights saved in ./results/checkpoint-10556/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-11368
Configuration saved in ./results/checkpoint-11368/config.json
Model weights saved in ./results/checkpoint-11368/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-12180
Configuration saved in ./results/checkpoint-12180/config.json
Model weights saved in ./results/checkpoint-12180/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-12992
Configuration saved in ./results/checkpoint-12992/config.json
Model weights saved in ./results/checkpoint-12992/pytorch_model.bin
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-13804
Configuration saved in ./results/checkpoint-13804/config.json
Model weights saved in ./results/checkpoint-13804/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-14616
Configuration saved in ./results/checkpoint-14616/config.json
Model weights saved in ./results/checkpoint-14616/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-15428
Configuration saved in ./results/checkpoint-15428/config.json
Model weights saved in ./results/checkpoint-15428/pytorch_model.bin
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys


  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
***** Running Evaluation *****
  Num examples = 6491
  Batch size = 32
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
  import sys
Saving model checkpoint to ./results/checkpoint-16240
Configuration saved in ./results/checkpoint-16240/config.json
Model weights saved in ./results/checkpoint-16240/pytorch_model.bin


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./results/checkpoint-8932 (score: 0.8199670544255049).
***** Running Prediction *****
  Num examples = 6491
  Batch size = 32
  import sys


***** Running Prediction *****
  Num examples = 8114
  Batch size = 32
  import sys


2. estimator is done....


In [20]:
val, test = classifier.get_metrics()

In [21]:
val

{'val_precision': 0.43842148358666283,
 'val_recall': 0.42447677558703084,
 'val_acc': 0.8254506239408411,
 'val_mcc': 0.5235035905531275,
 'val_f1': 0.4238843079877546,
 'val_auc': 0.811729091809667}

In [22]:
test

{'test_precision': 0.44505920322185666,
 'test_recall': 0.43491572216126323,
 'test_acc': 0.8238846438254868,
 'test_mcc': 0.5207811619036313,
 'test_f1': 0.4309990038797732,
 'test_auc': 0.796821295956517}