In [2]:
#!pip install seqeval

In [1]:
!nvidia-smi

Sat Apr  2 17:21:13 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.54       Driver Version: 510.54       CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla V100-PCIE...  Off  | 00000001:00:00.0 Off |                    0 |
| N/A   30C    P0    36W / 250W |      0MiB / 16384MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  Tesla V100-PCIE...  Off  | 00000002:00:00.0 Off |                    0 |
| N/A   29C    P0    36W / 250W |      0MiB / 16384MiB |      2%      Default |
|       

In [2]:
import numpy as np
import pandas as pd
import transformers
from torch.utils.data import Dataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForTokenClassification, BertTokenizer, BertConfig, BertModel
from transformers import AdamW, get_linear_schedule_with_warmup
import torch
import json
import os
import time
from torch.nn.parallel import DataParallel
from sklearn.metrics import f1_score
from collections import defaultdict
from torch import cuda

In [3]:
def def_value():
    return 'O'


def read_data(path):
    with open(path) as f:
        sent_dict = {}
        label_dict = {}
        count = 0
        for line in f:
            if line.isspace():
                continue
            if '|' in line and len(line.split('|')) == 3 and (line.split('|')[1] == 'a' or line.split('|')[1] == 't'):
                idx, _, sentence = line.split('|')
                sent_dict[idx] = sent_dict.get(idx, '') + ' ' + sentence
            else:
                idx, start_pos, end_pos, word, label, _ = line.split('\t')
                if idx not in label_dict:
                    label_dict[idx] = defaultdict(def_value)
                    for i in range(int(start_pos), int(end_pos)):
                        label_dict[idx][i] = label  
                else:
                    for i in range(int(start_pos), int(end_pos)):
                        label_dict[idx][i] = label
                        
    idx_col, word_col, label_col = [], [], []
    for idx in sent_dict:
        sentence = sent_dict[idx].replace('\n', '')
        
        char_seq = 0
        for word in sentence.split(' ')[1:]:
            label = label_dict[idx][char_seq]
            if word and word[0] == '(':
                label = label_dict[idx][char_seq + 1]
            char_seq += len(word) + 1
            
            idx_col.append(idx)
            word_col.append(word)
            label_col.append(label)
    
    df = pd.DataFrame(list(zip(idx_col, word_col, label_col)),
               columns =['sentence_id', 'word', 'label'])
    return df


class SentenceGetter(object):
    
    def __init__(self, dataset):
        self.n_sent = 1
        self.dataset = dataset
        self.empty = False
        agg_func = lambda x: [(w, t) for w, t in zip(x["word"].values.tolist(),
                                                        x["label"].values.tolist())]
        self.grouped = self.dataset.groupby("sentence_id").apply(agg_func)
        self.sentences = [s for s in self.grouped]
    
    def get_next(self):
        try:
            sentence = self.grouped["Sentence: {}".format(self.n_sent)]
            self.n_sent += 1
            return sentence
        except:
            return None
        
        
# Creating new lists and dicts that will be used at a later stage for reference and processing
def get_data(df, label_vals):
    getter = SentenceGetter(df)
    label2idx = {value: key for key, value in enumerate(label_vals)}
    sentences = [' '.join([s[0] for s in sentence]) for sentence in getter.sentences]
    labels = [[s[1] for s in sentence] for sentence in getter.sentences]
    labels = [[label2idx.get(l) for l in label] for label in labels]
    return sentences, labels

In [4]:
class CustomDataset(Dataset):
    def __init__(self, tokenizer, sentences, labels, max_len):
        self.len = len(sentences)
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __getitem__(self, index):
        sentence = str(self.sentences[index])
        inputs = self.tokenizer.encode_plus(
            sentence,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True,
            return_token_type_ids=True
        )
        ids = inputs['input_ids']
        mask = inputs['attention_mask']
        label = self.labels[index]
        label.extend([4]*200)
        label=label[:200]

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'tags': torch.tensor(label, dtype=torch.long)
        } 
    
    def __len__(self):
        return self.len

class BERTClass(torch.nn.Module):
    def __init__(self, model_path):
        super(BERTClass, self).__init__()
        self.bert = transformers.BertForTokenClassification.from_pretrained(model_path, 
                                                                            num_labels=18,
                                                                            )
    def forward(self, ids, mask, labels):
        output = self.bert(ids, mask, labels = labels)

        return output

def train(epoch):
    model.train()
    for step, data in enumerate(training_loader, 0):
        ids = data['ids'].to(device, dtype = torch.long)
        mask = data['mask'].to(device, dtype = torch.long)
        targets = data['tags'].to(device, dtype = torch.long)

        loss = model(ids, mask, labels = targets)[0]
        
        optimizer.zero_grad()
        
        loss.sum().backward()
        optimizer.step()
        
        if step % 10==0:
            print(f'Epoch: {epoch}  Step: {step}  Loss: {loss.sum()}')
            
def valid(model, testing_loader, label_vals):
    model.eval()
    eval_loss = 0
    predictions , true_labels = [], []
    nb_eval_steps = 0
    with torch.no_grad():
        for _, data in enumerate(testing_loader, 0):
            ids = data['ids'].to(device, dtype = torch.long)
            mask = data['mask'].to(device, dtype = torch.long)
            targets = data['tags'].to(device, dtype = torch.long)

            output = model(ids, mask, labels=targets)
            loss, logits = output[:2]
            logits = logits.detach().cpu().numpy()
            label_ids = targets.to('cpu').numpy()
            predictions.extend([list(p) for p in np.argmax(logits, axis=2)])
            true_labels.append(label_ids)
            eval_loss += loss.mean().item()
            nb_eval_steps += 1
        eval_loss = eval_loss/nb_eval_steps
        print("Validation loss: {}".format(eval_loss))
        pred_tags = [label_vals[p_i] for p in predictions for p_i in p]
        valid_tags = [label_vals[l_ii] for l in true_labels for l_i in l for l_ii in l_i]
        score = f1_score(pred_tags, valid_tags, average='micro')
        print("F1-Score: {}".format(round(score, 4)))
    return pred_tags, valid_tags, score

In [5]:
device = 'cuda' if cuda.is_available() else 'cpu'

df_train = read_data('./NCBI-disease/NCBItrainset_corpus.txt')
df_valid = read_data('./NCBI-disease/NCBIdevelopset_corpus.txt')
df_test = read_data('./NCBI-disease/NCBItestset_corpus.txt')

label_vals = list(df_train["label"].value_counts().keys())
label2idx = {value: key for key, value in enumerate(label_vals)}

train_sentences, train_labels = get_data(df_train, label_vals)
valid_sentences, valid_labels = get_data(df_valid, label_vals)
test_sentences, test_labels = get_data(df_test, label_vals)

In [6]:
model_paths = ['bert-base-uncased',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/'
              ]

In [7]:
# Defining some key variables that will be used later on in the training
MAX_LEN = 200
TRAIN_BATCH_SIZE = 32
VALID_BATCH_SIZE = 4
EPOCHS = 4
LEARNING_RATE = 5e-05

start = time.perf_counter()

for model_path in model_paths:
    
    tokenizer_path = ('bert-base-uncased' if model_path == 'bert-base-uncased'\
                  else '../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt')
    tokenizer = BertTokenizer.from_pretrained(tokenizer_path)

    model_name = model_path if model_path == 'bert-base-uncased' else model_path.split('/')[-2].split('.')[0]
    model_stats = {'model_name':model_name,
                   'seeds':[],
                   'batch_size':TRAIN_BATCH_SIZE,
                   'epochs':EPOCHS,
                   'metric':'f1-score (micro)',
                   'scores': [],
                   'mean_score':0
                    }
    for num in range(1,6):
        
        training_set = CustomDataset(tokenizer, train_sentences, train_labels, MAX_LEN)
        #valid_set = CustomDataset(tokenizer, valid_sentences, valid_labels, MAX_LEN)
        test_set = CustomDataset(tokenizer, test_sentences, test_labels, MAX_LEN)
        
        training_loader = DataLoader(training_set, batch_size=TRAIN_BATCH_SIZE, shuffle=True, num_workers=8)
        #valid_loader = DataLoader(valid_set, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=8)
        test_loader = DataLoader(test_set, batch_size=VALID_BATCH_SIZE, shuffle=True, num_workers=8)
        
        model = BERTClass(model_path)
        model = DataParallel(model)
        model.to(device)
        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
        print(f'Using model {model_name}, with tokenizer {tokenizer_path}')
        
        for epoch in range(EPOCHS):
            train(epoch)
        pred_tags, test_tags, score = valid(model, test_loader, label_vals)
        model_stats['scores'].append(round(score, 6))
        #pred_tags, valid_tags = valid(model, valid_loader, label_vals)
        torch.cuda.empty_cache()
        time.sleep(3)
        print(f'Training run {num} completed.')
        print()
        
    print('Logging model stats....')
    print()
    final_score = np.round(np.mean(model_stats['scores']), 4)
    model_stats['mean_score'] = final_score
    with open('logs/NER_stats.txt', 'a') as f:
        f.write(json.dumps(model_stats))
        f.write('\n')
        
end = time.perf_counter() - start
print(f'Total Training/Eval time: {round(end, 2)} seconds')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Using model bert-base-uncased, with tokenizer bert-base-uncased




Epoch: 0  Step: 0  Loss: 5.687420845031738
Epoch: 0  Step: 10  Loss: 1.7327806949615479
Epoch: 1  Step: 0  Loss: 1.3148291110992432
Epoch: 1  Step: 10  Loss: 1.149648666381836
Epoch: 2  Step: 0  Loss: 1.107922911643982
Epoch: 2  Step: 10  Loss: 0.9656139612197876
Epoch: 3  Step: 0  Loss: 0.8845291137695312
Epoch: 3  Step: 10  Loss: 1.067922830581665
Validation loss: 0.5210551851987839
F1-Score: 0.8458
Training run 1 completed.



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Using model bert-base-uncased, with tokenizer bert-base-uncased




Epoch: 0  Step: 0  Loss: 5.509517669677734
Epoch: 0  Step: 10  Loss: 1.5289348363876343
Epoch: 1  Step: 0  Loss: 1.3666505813598633
Epoch: 1  Step: 10  Loss: 1.3123486042022705
Epoch: 2  Step: 0  Loss: 1.0871326923370361
Epoch: 2  Step: 10  Loss: 1.1183075904846191
Epoch: 3  Step: 0  Loss: 0.8956232070922852
Epoch: 3  Step: 10  Loss: 0.8684400320053101
Validation loss: 0.52109020113945
F1-Score: 0.847
Training run 2 completed.



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Using model bert-base-uncased, with tokenizer bert-base-uncased




Epoch: 0  Step: 0  Loss: 5.263585090637207
Epoch: 0  Step: 10  Loss: 1.6403026580810547
Epoch: 1  Step: 0  Loss: 1.3779706954956055
Epoch: 1  Step: 10  Loss: 1.4398229122161865
Epoch: 2  Step: 0  Loss: 1.1944172382354736
Epoch: 2  Step: 10  Loss: 1.0910563468933105
Epoch: 3  Step: 0  Loss: 1.0459060668945312
Epoch: 3  Step: 10  Loss: 0.9561759233474731
Validation loss: 0.5206994640827179
F1-Score: 0.8394
Training run 3 completed.



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Using model bert-base-uncased, with tokenizer bert-base-uncased




Epoch: 0  Step: 0  Loss: 5.705776214599609
Epoch: 0  Step: 10  Loss: 1.4801115989685059
Epoch: 1  Step: 0  Loss: 1.3735594749450684
Epoch: 1  Step: 10  Loss: 1.0405910015106201
Epoch: 2  Step: 0  Loss: 0.9718479514122009
Epoch: 2  Step: 10  Loss: 0.992163360118866
Epoch: 3  Step: 0  Loss: 0.965697169303894
Epoch: 3  Step: 10  Loss: 0.9491474628448486
Validation loss: 0.5049722898006439
F1-Score: 0.8472
Training run 4 completed.



Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTokenClassification were not initialized from the model checkpoint at bert-base-u

Using model bert-base-uncased, with tokenizer bert-base-uncased




Epoch: 0  Step: 0  Loss: 6.083489418029785
Epoch: 0  Step: 10  Loss: 1.4887809753417969
Epoch: 1  Step: 0  Loss: 1.48544442653656
Epoch: 1  Step: 10  Loss: 1.3792879581451416
Epoch: 2  Step: 0  Loss: 1.107362985610962
Epoch: 2  Step: 10  Loss: 0.985801100730896
Epoch: 3  Step: 0  Loss: 0.940099835395813
Epoch: 3  Step: 10  Loss: 0.9363573789596558
Validation loss: 0.5169443321228028
F1-Score: 0.8356
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTok

Using model model-trained-0-3531-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.176520824432373
Epoch: 0  Step: 10  Loss: 1.0837037563323975
Epoch: 1  Step: 0  Loss: 1.0709788799285889
Epoch: 1  Step: 10  Loss: 1.0175144672393799
Epoch: 2  Step: 0  Loss: 0.899137020111084
Epoch: 2  Step: 10  Loss: 1.0591070652008057
Epoch: 3  Step: 0  Loss: 0.8320268392562866
Epoch: 3  Step: 10  Loss: 0.8605939149856567
Validation loss: 0.4625757098197937
F1-Score: 0.8678
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTok

Using model model-trained-0-3531-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 7.144320487976074
Epoch: 0  Step: 10  Loss: 1.1516921520233154
Epoch: 1  Step: 0  Loss: 1.0236661434173584
Epoch: 1  Step: 10  Loss: 1.0558438301086426
Epoch: 2  Step: 0  Loss: 0.7569860219955444
Epoch: 2  Step: 10  Loss: 1.0348730087280273
Epoch: 3  Step: 0  Loss: 0.7869874238967896
Epoch: 3  Step: 10  Loss: 0.8950877785682678
Validation loss: 0.44284523963928224
F1-Score: 0.8737
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTok

Using model model-trained-0-3531-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.038453102111816
Epoch: 0  Step: 10  Loss: 0.9828178882598877
Epoch: 1  Step: 0  Loss: 1.003914475440979
Epoch: 1  Step: 10  Loss: 1.045363187789917
Epoch: 2  Step: 0  Loss: 0.9457418918609619
Epoch: 2  Step: 10  Loss: 0.9361658096313477
Epoch: 3  Step: 0  Loss: 0.7678549289703369
Epoch: 3  Step: 10  Loss: 0.7457766532897949
Validation loss: 0.4402932631969452
F1-Score: 0.869
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTok

Using model model-trained-0-3531-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.123995304107666
Epoch: 0  Step: 10  Loss: 1.089279294013977
Epoch: 1  Step: 0  Loss: 1.0557209253311157
Epoch: 1  Step: 10  Loss: 0.9836486577987671
Epoch: 2  Step: 0  Loss: 0.8785923719406128
Epoch: 2  Step: 10  Loss: 0.9308173656463623
Epoch: 3  Step: 0  Loss: 0.872560441493988
Epoch: 3  Step: 10  Loss: 0.876224160194397
Validation loss: 0.49477944493293763
F1-Score: 0.8666
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForTok

Using model model-trained-0-3531-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 7.0498199462890625
Epoch: 0  Step: 10  Loss: 1.1392158269882202
Epoch: 1  Step: 0  Loss: 1.0418723821640015
Epoch: 1  Step: 10  Loss: 0.8802346587181091
Epoch: 2  Step: 0  Loss: 0.8278154134750366
Epoch: 2  Step: 10  Loss: 0.8145420551300049
Epoch: 3  Step: 0  Loss: 0.9953644275665283
Epoch: 3  Step: 10  Loss: 0.8801654577255249
Validation loss: 0.45299997806549075
F1-Score: 0.8706
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForT

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.761045455932617
Epoch: 0  Step: 10  Loss: 1.2753090858459473
Epoch: 1  Step: 0  Loss: 1.1585495471954346
Epoch: 1  Step: 10  Loss: 0.9526417255401611
Epoch: 2  Step: 0  Loss: 0.8694533109664917
Epoch: 2  Step: 10  Loss: 0.7739043831825256
Epoch: 3  Step: 0  Loss: 0.7288327217102051
Epoch: 3  Step: 10  Loss: 0.7251293659210205
Validation loss: 0.44182080507278443
F1-Score: 0.8795
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForT

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.752429962158203
Epoch: 0  Step: 10  Loss: 1.2748037576675415
Epoch: 1  Step: 0  Loss: 1.2442469596862793
Epoch: 1  Step: 10  Loss: 1.0283081531524658
Epoch: 2  Step: 0  Loss: 0.9913196563720703
Epoch: 2  Step: 10  Loss: 0.774103045463562
Epoch: 3  Step: 0  Loss: 0.9567851424217224
Epoch: 3  Step: 10  Loss: 0.6703081130981445
Validation loss: 0.44857399106025697
F1-Score: 0.8728
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForT

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.941545486450195
Epoch: 0  Step: 10  Loss: 1.1746726036071777
Epoch: 1  Step: 0  Loss: 1.1387711763381958
Epoch: 1  Step: 10  Loss: 0.9998651742935181
Epoch: 2  Step: 0  Loss: 0.7578157782554626
Epoch: 2  Step: 10  Loss: 0.800089955329895
Epoch: 3  Step: 0  Loss: 0.6854862570762634
Epoch: 3  Step: 10  Loss: 0.6026679873466492
Validation loss: 0.4542917287349701
F1-Score: 0.8786
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForT

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.060126781463623
Epoch: 0  Step: 10  Loss: 1.2926037311553955
Epoch: 1  Step: 0  Loss: 1.065826177597046
Epoch: 1  Step: 10  Loss: 0.9457899332046509
Epoch: 2  Step: 0  Loss: 0.7496519088745117
Epoch: 2  Step: 10  Loss: 0.7237745523452759
Epoch: 3  Step: 0  Loss: 0.7779240608215332
Epoch: 3  Step: 10  Loss: 0.8325852155685425
Validation loss: 0.44782627940177916
F1-Score: 0.8758
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForT

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.031376361846924
Epoch: 0  Step: 10  Loss: 1.3102623224258423
Epoch: 1  Step: 0  Loss: 1.0752805471420288
Epoch: 1  Step: 10  Loss: 0.8903945684432983
Epoch: 2  Step: 0  Loss: 0.9251581430435181
Epoch: 2  Step: 10  Loss: 0.8307806849479675
Epoch: 3  Step: 0  Loss: 0.6949150562286377
Epoch: 3  Step: 10  Loss: 0.6901861429214478
Validation loss: 0.4597280889749527
F1-Score: 0.8786
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.253313064575195
Epoch: 0  Step: 10  Loss: 1.4128987789154053
Epoch: 1  Step: 0  Loss: 0.9794008135795593
Epoch: 1  Step: 10  Loss: 1.0067834854125977
Epoch: 2  Step: 0  Loss: 0.7918365597724915
Epoch: 2  Step: 10  Loss: 0.7996829748153687
Epoch: 3  Step: 0  Loss: 0.681629478931427
Epoch: 3  Step: 10  Loss: 0.7882630825042725
Validation loss: 0.48265872716903685
F1-Score: 0.857
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.729779243469238
Epoch: 0  Step: 10  Loss: 1.1355373859405518
Epoch: 1  Step: 0  Loss: 1.0580153465270996
Epoch: 1  Step: 10  Loss: 0.8669025897979736
Epoch: 2  Step: 0  Loss: 0.803413987159729
Epoch: 2  Step: 10  Loss: 0.852226734161377
Epoch: 3  Step: 0  Loss: 0.6994971036911011
Epoch: 3  Step: 10  Loss: 0.6726678609848022
Validation loss: 0.4481345200538635
F1-Score: 0.8723
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.405369281768799
Epoch: 0  Step: 10  Loss: 1.364654779434204
Epoch: 1  Step: 0  Loss: 1.0936577320098877
Epoch: 1  Step: 10  Loss: 1.1336878538131714
Epoch: 2  Step: 0  Loss: 0.9382357001304626
Epoch: 2  Step: 10  Loss: 0.883196234703064
Epoch: 3  Step: 0  Loss: 0.6918751001358032
Epoch: 3  Step: 10  Loss: 0.7894693613052368
Validation loss: 0.4704786694049835
F1-Score: 0.8759
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.940945625305176
Epoch: 0  Step: 10  Loss: 1.2618517875671387
Epoch: 1  Step: 0  Loss: 1.1855392456054688
Epoch: 1  Step: 10  Loss: 0.9962645769119263
Epoch: 2  Step: 0  Loss: 0.8198496699333191
Epoch: 2  Step: 10  Loss: 0.8521735668182373
Epoch: 3  Step: 0  Loss: 0.6587133407592773
Epoch: 3  Step: 10  Loss: 0.7292495965957642
Validation loss: 0.4568700981140137
F1-Score: 0.8682
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.495365142822266
Epoch: 0  Step: 10  Loss: 1.1687381267547607
Epoch: 1  Step: 0  Loss: 1.1701271533966064
Epoch: 1  Step: 10  Loss: 1.022147297859192
Epoch: 2  Step: 0  Loss: 0.8440830707550049
Epoch: 2  Step: 10  Loss: 0.8157178163528442
Epoch: 3  Step: 0  Loss: 0.6536878347396851
Epoch: 3  Step: 10  Loss: 0.7143787741661072
Validation loss: 0.4679556852579117
F1-Score: 0.876
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.937840461730957
Epoch: 0  Step: 10  Loss: 1.1916557550430298
Epoch: 1  Step: 0  Loss: 1.0318058729171753
Epoch: 1  Step: 10  Loss: 0.8718205094337463
Epoch: 2  Step: 0  Loss: 0.8274091482162476
Epoch: 2  Step: 10  Loss: 0.8227959871292114
Epoch: 3  Step: 0  Loss: 0.8565163612365723
Epoch: 3  Step: 10  Loss: 0.77791827917099
Validation loss: 0.44042434215545656
F1-Score: 0.8796
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.226255416870117
Epoch: 0  Step: 10  Loss: 1.1775641441345215
Epoch: 1  Step: 0  Loss: 1.0140107870101929
Epoch: 1  Step: 10  Loss: 0.8634536266326904
Epoch: 2  Step: 0  Loss: 1.069753885269165
Epoch: 2  Step: 10  Loss: 0.7539669275283813
Epoch: 3  Step: 0  Loss: 0.7988015413284302
Epoch: 3  Step: 10  Loss: 0.8262899518013
Validation loss: 0.4349671411514282
F1-Score: 0.8778
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.292889595031738
Epoch: 0  Step: 10  Loss: 1.2103261947631836
Epoch: 1  Step: 0  Loss: 1.041152834892273
Epoch: 1  Step: 10  Loss: 0.8123900890350342
Epoch: 2  Step: 0  Loss: 0.7839919328689575
Epoch: 2  Step: 10  Loss: 0.749350368976593
Epoch: 3  Step: 0  Loss: 0.7184507846832275
Epoch: 3  Step: 10  Loss: 0.7082673907279968
Validation loss: 0.4390988880395889
F1-Score: 0.8768
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.310847282409668
Epoch: 0  Step: 10  Loss: 1.0801191329956055
Epoch: 1  Step: 0  Loss: 1.0795520544052124
Epoch: 1  Step: 10  Loss: 0.9931679368019104
Epoch: 2  Step: 0  Loss: 0.9597789645195007
Epoch: 2  Step: 10  Loss: 0.7461528778076172
Epoch: 3  Step: 0  Loss: 0.8485317826271057
Epoch: 3  Step: 10  Loss: 0.6940948963165283
Validation loss: 0.4332398724555969
F1-Score: 0.8774
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.118152618408203
Epoch: 0  Step: 10  Loss: 1.1412197351455688
Epoch: 1  Step: 0  Loss: 0.988243579864502
Epoch: 1  Step: 10  Loss: 0.9707333445549011
Epoch: 2  Step: 0  Loss: 0.9084277749061584
Epoch: 2  Step: 10  Loss: 0.9630613327026367
Epoch: 3  Step: 0  Loss: 0.8323900699615479
Epoch: 3  Step: 10  Loss: 0.8386963605880737
Validation loss: 0.43899447679519654
F1-Score: 0.8744
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.389832496643066
Epoch: 0  Step: 10  Loss: 1.3396880626678467
Epoch: 1  Step: 0  Loss: 1.113393783569336
Epoch: 1  Step: 10  Loss: 0.8390498757362366
Epoch: 2  Step: 0  Loss: 0.8431789875030518
Epoch: 2  Step: 10  Loss: 0.9121683239936829
Epoch: 3  Step: 0  Loss: 0.7702758312225342
Epoch: 3  Step: 10  Loss: 0.7575777769088745
Validation loss: 0.4201903903484345
F1-Score: 0.8798
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.924657344818115
Epoch: 0  Step: 10  Loss: 1.3602311611175537
Epoch: 1  Step: 0  Loss: 1.0277396440505981
Epoch: 1  Step: 10  Loss: 0.956778883934021
Epoch: 2  Step: 0  Loss: 0.8131201863288879
Epoch: 2  Step: 10  Loss: 0.7568610906600952
Epoch: 3  Step: 0  Loss: 0.7396928071975708
Epoch: 3  Step: 10  Loss: 0.7435135841369629
Validation loss: 0.4272695505619049
F1-Score: 0.8806
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.583527088165283
Epoch: 0  Step: 10  Loss: 1.2721060514450073
Epoch: 1  Step: 0  Loss: 1.1240758895874023
Epoch: 1  Step: 10  Loss: 1.0284817218780518
Epoch: 2  Step: 0  Loss: 0.8109520673751831
Epoch: 2  Step: 10  Loss: 0.8094910383224487
Epoch: 3  Step: 0  Loss: 0.6855038404464722
Epoch: 3  Step: 10  Loss: 0.7719545364379883
Validation loss: 0.446859050989151
F1-Score: 0.8806
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.776584625244141
Epoch: 0  Step: 10  Loss: 1.2082741260528564
Epoch: 1  Step: 0  Loss: 0.9836190938949585
Epoch: 1  Step: 10  Loss: 0.9006124138832092
Epoch: 2  Step: 0  Loss: 0.808418869972229
Epoch: 2  Step: 10  Loss: 0.8538410067558289
Epoch: 3  Step: 0  Loss: 0.7500112056732178
Epoch: 3  Step: 10  Loss: 0.8128361701965332
Validation loss: 0.42175318717956545
F1-Score: 0.8794
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.797215461730957
Epoch: 0  Step: 10  Loss: 1.2746272087097168
Epoch: 1  Step: 0  Loss: 1.0525991916656494
Epoch: 1  Step: 10  Loss: 0.9829685688018799
Epoch: 2  Step: 0  Loss: 0.8864978551864624
Epoch: 2  Step: 10  Loss: 0.8328107595443726
Epoch: 3  Step: 0  Loss: 0.8353455066680908
Epoch: 3  Step: 10  Loss: 0.7419294118881226
Validation loss: 0.43624906837940214
F1-Score: 0.881
Training run 5 completed.

Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 5.730953216552734
Epoch: 0  Step: 10  Loss: 1.270866870880127
Epoch: 1  Step: 0  Loss: 1.082711935043335
Epoch: 1  Step: 10  Loss: 0.9416139721870422
Epoch: 2  Step: 0  Loss: 0.8713295459747314
Epoch: 2  Step: 10  Loss: 0.8191078901290894
Epoch: 3  Step: 0  Loss: 0.8366246819496155
Epoch: 3  Step: 10  Loss: 0.7673459053039551
Validation loss: 0.44756336450576784
F1-Score: 0.8754
Training run 1 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.3316192626953125
Epoch: 0  Step: 10  Loss: 1.38814115524292
Epoch: 1  Step: 0  Loss: 1.0534669160842896
Epoch: 1  Step: 10  Loss: 0.890581488609314
Epoch: 2  Step: 0  Loss: 0.788931131362915
Epoch: 2  Step: 10  Loss: 0.8796412348747253
Epoch: 3  Step: 0  Loss: 0.7575341463088989
Epoch: 3  Step: 10  Loss: 0.670474648475647
Validation loss: 0.4485188257694244
F1-Score: 0.881
Training run 2 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.096582412719727
Epoch: 0  Step: 10  Loss: 1.3446786403656006
Epoch: 1  Step: 0  Loss: 1.010474681854248
Epoch: 1  Step: 10  Loss: 0.8628054857254028
Epoch: 2  Step: 0  Loss: 0.8140055537223816
Epoch: 2  Step: 10  Loss: 0.736637532711029
Epoch: 3  Step: 0  Loss: 0.7375506162643433
Epoch: 3  Step: 10  Loss: 0.7885117530822754
Validation loss: 0.4440004098415375
F1-Score: 0.8782
Training run 3 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.4991278648376465
Epoch: 0  Step: 10  Loss: 1.124345064163208
Epoch: 1  Step: 0  Loss: 1.088498830795288
Epoch: 1  Step: 10  Loss: 0.8322250843048096
Epoch: 2  Step: 0  Loss: 1.0093187093734741
Epoch: 2  Step: 10  Loss: 0.6772608757019043
Epoch: 3  Step: 0  Loss: 0.8110606074333191
Epoch: 3  Step: 10  Loss: 0.6481634378433228
Validation loss: 0.4550692629814148
F1-Score: 0.8684
Training run 4 completed.



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertForTokenClassification: ['cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.decoder.bias']
- This IS expected if you are initializing BertForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForTokenClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertFor

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt




Epoch: 0  Step: 0  Loss: 6.207277297973633
Epoch: 0  Step: 10  Loss: 1.1326942443847656
Epoch: 1  Step: 0  Loss: 1.0481551885604858
Epoch: 1  Step: 10  Loss: 0.9503096342086792
Epoch: 2  Step: 0  Loss: 0.9141451120376587
Epoch: 2  Step: 10  Loss: 0.9123660326004028
Epoch: 3  Step: 0  Loss: 0.6639115810394287
Epoch: 3  Step: 10  Loss: 0.7573761343955994
Validation loss: 0.43077939689159395
F1-Score: 0.876
Training run 5 completed.

Logging model stats....

Total Training/Eval time: 1061.01 seconds
