In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
import torch.nn as nn
from torch.nn.parallel import DataParallel
import torch
from sklearn import metrics
import transformers
from transformers import AdamW, get_linear_schedule_with_warmup
import os, json, time
from transformers import RobertaTokenizer, RobertaModel

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaModel: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.weight', 'lm_head.decoder.weight', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [3]:
!rm -r DC_data/text/.ipynb_checkpoints/
!rm -r DC_data/labels/.ipynb_checkpoints/

rm: cannot remove 'DC_data/text/.ipynb_checkpoints/': No such file or directory
rm: cannot remove 'DC_data/labels/.ipynb_checkpoints/': No such file or directory


### Data Process

In [4]:
#len should be 1852
len(os.listdir('DC_data/text/')), len(os.listdir("DC_data/labels/"))

(1852, 1852)

In [5]:
for dirpath, dirnames, filenames in os.walk('./DC_data/text/'):
    idx2text = {}
    for filename in filenames:
        if filename.startswith('.'):
            continue
        idx = filename.split('.')[0]
        with open(dirpath + filename) as f:
            abstract = ''
            for sentence in f:
                abstract += sentence
            abstract = abstract.replace('\n', ' ')
            idx2text[idx] = abstract

for dirpath, dirnames, filenames in os.walk('./DC_data/labels/'):
    idx2label = [] 
    for filename in filenames:
        if filename.startswith('.'):
            continue
        idx = filename.split('.')[0]
        with open(dirpath + filename) as f:
            labels = f.readline()
            for label in labels.split('<'):
                if not label or label.isspace():
                    continue
                key_label = label.split('--')[0].strip()
                if key_label == 'NULL':
                    continue
                idx2label.append((idx, key_label))

In [6]:
idx2label = list(set(idx2label))
df = pd.DataFrame(idx2label, columns=['idx', 'label'])
df['text'] = df['idx'].map(idx2text)

In [7]:
label_vals = list(set(df['label'].tolist()))

In [8]:
df = df.groupby(['idx', 'text']).agg({'label': lambda x: list(x)}).reset_index()

In [9]:
mlb = MultiLabelBinarizer()
df = df.join(pd.DataFrame(mlb.fit_transform(df.pop('label')),
                          columns=mlb.classes_,
                          index=df.index))

In [10]:
df.head()

Unnamed: 0,idx,text,Activating invasion and metastasis,Avoiding immune destruction,Cellular energetics,Enabling replicative immortality,Evading growth suppressors,Genomic instability and mutation,Inducing angiogenesis,Resisting cell death,Sustaining proliferative signaling,Tumor promoting inflammation
0,11724768,Ghrelin was identified in the stomach as an en...,0,0,0,0,0,0,0,0,1,0
1,11773160,PURPOSE The epidermal growth factor receptor (...,0,0,0,0,1,0,0,0,1,0
2,11774243,Adoptive transfer of immunity against hepatiti...,0,1,0,0,0,0,0,0,0,0
3,11781072,The secretion of immunosuppressive factors lik...,0,1,0,0,0,0,0,1,1,0
4,11791181,To characterize the impact of increased produc...,1,0,0,0,0,0,0,0,1,0


In [10]:
del idx2label, idx2text

### Model Paths

In [11]:
checkpoint_folder4GB = '/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/'
checkpoint_folder12GB = '/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints'

In [11]:
model_paths = ['bert-base-uncased',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/'
              ]

### Create Dataset/Model/Training/Eval loops

In [16]:
class HoCDataset:
    def __init__(self, tokenizer, sentences, labels, max_len):
        self.sentences = sentences
        self.labels = labels
        self.tokenizer = tokenizer
        self.max_len = max_len
    
    def __len__(self):
        return len(self.sentences)
        
    def __getitem__(self, item):
        sentence = str(self.sentences[item])
        inputs = self.tokenizer.encode_plus(
            sentence,
            None,
            add_special_tokens=True,
            max_length=self.max_len,
            padding='max_length',
            truncation=True
        )
        ids = inputs['input_ids']
        token_type_ids = inputs['token_type_ids']
        mask = inputs['attention_mask']

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'label': torch.tensor(self.labels[item], dtype=torch.float)
        } 

    
class BERTClass(nn.Module):
    def __init__(self, model_path):
        super(BERTClass, self).__init__()
        self.bert = transformers.BertModel.from_pretrained(model_path)
        self.out = nn.Linear(768, 10)

    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        
        return self.out(output)
    
    
def loss_fn(outputs, targets):
    return nn.BCEWithLogitsLoss()(outputs, targets)


def train_loop_fn(data_loader, model, optimizer, device, scheduler=None):
    model.train()
    for bi, d in enumerate(data_loader):
        ids = d['ids']
        mask = d['mask']
        token_type_ids = d['token_type_ids']
        labels = d['label']
        
        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        labels = labels.to(device, dtype=torch.float)
        
        optimizer.zero_grad()
        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, labels)
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        if bi % 50 == 0:
            print(f'bi={bi}, loss={loss}')


def eval_loop_fn(data_loader, model, device):
    model.eval()
    fin_labels = []
    fin_outputs = []
    for bi, d in enumerate(data_loader):
        with torch.no_grad():
            ids = d['ids'].to(device, dtype=torch.long)
            mask = d['mask'].to(device, dtype=torch.long)
            token_type_ids = d['token_type_ids'].to(device, dtype=torch.long)
            labels = d['label'].to(device, dtype=torch.long)
          
            outputs = model(ids, mask, token_type_ids)
          
            fin_labels.append(labels.cpu().detach().numpy())
            fin_outputs.append(torch.sigmoid(outputs).cpu().detach().numpy())

    return np.vstack(fin_outputs), np.vstack(fin_labels)

In [22]:
MAX_LEN = 512
TRAIN_BATCH_SIZE = 8
EPOCHS = 4
LEARNING_RATE = 3e-5
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


start = time.perf_counter()

for model_path in model_paths:
    tokenizer_path = ('bert-base-uncased' if model_path == 'bert-base-uncased'\
                  else '../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt')
    model_name = model_path if model_path == 'bert-base-uncased' else model_path.split('/')[-2].split('.')[0]
    scores = []
    model_stats = {'model_name':model_name,
                   'seeds':[],
                   'batch_size':TRAIN_BATCH_SIZE,
                   'epochs':EPOCHS,
                   'metric':'f1-score',
                   'scores': [],
                   'mean_score':0
                    }
    for num, seed in enumerate([42,43,44,45,46], 1):
        SEED = seed
        model_stats['seeds'].append(SEED)
        label_cols = list(df.drop(['idx', 'text'], axis=1).columns)
        labels = df[label_cols].values

        df_train, df_test, labels_train, labels_test = train_test_split(
            df, labels, test_size=0.2, random_state=SEED)


        tokenizer = transformers.BertTokenizer.from_pretrained(tokenizer_path)
        model = BERTClass(model_path)
        model = DataParallel(model)
        print(f'Using model {model_name}, with tokenizer {tokenizer_path}')
        model.to(device)

        train_dataset = HoCDataset(
            sentences=df_train.text.values,
            labels=labels_train,
            tokenizer=tokenizer,
            max_len=MAX_LEN
        )
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=TRAIN_BATCH_SIZE,
            shuffle=True
        )

        test_dataset = HoCDataset(
            sentences=df_test.text.values,
            labels=labels_test,
            tokenizer=tokenizer,
            max_len=MAX_LEN
        )
        test_data_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=4,
            shuffle=True,
            drop_last=True
        )

        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
        num_training_steps = int(len(df_train) / TRAIN_BATCH_SIZE * EPOCHS)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, 
            num_warmup_steps=0,
            num_training_steps=num_training_steps
        )
        for epoch in range(EPOCHS):
            train_loop_fn(train_data_loader, model, optimizer, device, scheduler)
            
        output, target = eval_loop_fn(test_data_loader, model, device)
        preds = np.array(output) >= 0.5
        f1_score_micro = metrics.f1_score(target, preds, average='micro')
        print(f"F1 Score (Micro) = {round(f1_score_micro,4)}")
        model_stats['scores'].append(round(f1_score_micro,6))
        torch.cuda.empty_cache()
        time.sleep(3)
        print(f'Training run {num} completed.')
    
    print('Logging model stats....')
    print()
    final_score = np.round(np.mean(model_stats['scores']), 4)
    model_stats['mean_score'] = final_score
    with open('logs/stats.txt', 'a') as f:
        f.write(json.dumps(model_stats))
        f.write('\n')

end = time.perf_counter() - start
print(f'Total Training/Eval time: {round(end, 2)} seconds')

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using model bert-base-uncased, with tokenizer bert-base-uncased
bi=0, loss=0.6724764108657837
bi=50, loss=0.353378027677536
bi=100, loss=0.3245600163936615
bi=150, loss=0.34938469529151917
bi=0, loss=0.27509862184524536
bi=50, loss=0.24954788386821747
bi=100, loss=0.2549605071544647
bi=150, loss=0.22175753116607666
bi=0, loss=0.199954554438591
bi=50, loss=0.2637068033218384
bi=100, loss=0.20407043397426605
bi=150, loss=0.15232689678668976
bi=0, loss=0.1554514765739441
bi=50, loss=0.15801814198493958
bi=100, loss=0.18348319828510284
bi=150, loss=0.18558908998966217
F1 Score (Micro) = 0.7865
Training run 1 completed.


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using model bert-base-uncased, with tokenizer bert-base-uncased
bi=0, loss=0.742846667766571
bi=50, loss=0.3756067752838135
bi=100, loss=0.3423362970352173
bi=150, loss=0.29819798469543457
bi=0, loss=0.31681180000305176
bi=50, loss=0.2845384180545807
bi=100, loss=0.23170910775661469
bi=150, loss=0.23327182233333588
bi=0, loss=0.26353582739830017
bi=50, loss=0.21036015450954437
bi=100, loss=0.18905289471149445
bi=150, loss=0.1947965919971466
bi=0, loss=0.2277781069278717
bi=50, loss=0.134521946310997
bi=100, loss=0.1886909306049347
bi=150, loss=0.2373591959476471
F1 Score (Micro) = 0.7908
Training run 2 completed.


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using model bert-base-uncased, with tokenizer bert-base-uncased
bi=0, loss=0.6898431181907654
bi=50, loss=0.34510213136672974
bi=100, loss=0.3079327642917633
bi=150, loss=0.3499595820903778
bi=0, loss=0.38737109303474426
bi=50, loss=0.2634445130825043
bi=100, loss=0.20501609146595
bi=150, loss=0.24982045590877533
bi=0, loss=0.241261288523674
bi=50, loss=0.19174392521381378
bi=100, loss=0.1537034511566162
bi=150, loss=0.26159271597862244
bi=0, loss=0.1489725112915039
bi=50, loss=0.18011103570461273
bi=100, loss=0.16214384138584137
bi=150, loss=0.12636969983577728
F1 Score (Micro) = 0.799
Training run 3 completed.


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using model bert-base-uncased, with tokenizer bert-base-uncased
bi=0, loss=0.7164406776428223
bi=50, loss=0.3994050621986389
bi=100, loss=0.4400630593299866
bi=150, loss=0.34033241868019104
bi=0, loss=0.31789228320121765
bi=50, loss=0.25088241696357727
bi=100, loss=0.22026896476745605
bi=150, loss=0.17850004136562347
bi=0, loss=0.20692096650600433
bi=50, loss=0.22179822623729706
bi=100, loss=0.15399323403835297
bi=150, loss=0.11372482776641846
bi=0, loss=0.12165441364049911
bi=50, loss=0.13775864243507385
bi=100, loss=0.19033734500408173
bi=150, loss=0.11200977861881256
F1 Score (Micro) = 0.7752
Training run 4 completed.


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Using model bert-base-uncased, with tokenizer bert-base-uncased
bi=0, loss=0.7057167887687683
bi=50, loss=0.4719820022583008
bi=100, loss=0.3816884458065033
bi=150, loss=0.3994331657886505
bi=0, loss=0.26953253149986267
bi=50, loss=0.23731456696987152
bi=100, loss=0.19651588797569275
bi=150, loss=0.2509261667728424
bi=0, loss=0.25176578760147095
bi=50, loss=0.2076692134141922
bi=100, loss=0.23554043471813202
bi=150, loss=0.1529245227575302
bi=0, loss=0.18941903114318848
bi=50, loss=0.1406722515821457
bi=100, loss=0.2031526118516922
bi=150, loss=0.13043224811553955
F1 Score (Micro) = 0.7975
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /h

Using model 4GB-checkpoints, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6475403308868408
bi=50, loss=0.3863500654697418
bi=100, loss=0.34903427958488464
bi=150, loss=0.40721645951271057
bi=0, loss=0.23286424577236176
bi=50, loss=0.28720328211784363
bi=100, loss=0.206537127494812
bi=150, loss=0.269610732793808
bi=0, loss=0.12717068195343018
bi=50, loss=0.16375528275966644
bi=100, loss=0.19675613939762115
bi=150, loss=0.22009983658790588
bi=0, loss=0.15591135621070862
bi=50, loss=0.08291535079479218
bi=100, loss=0.1641530841588974
bi=150, loss=0.13632315397262573
F1 Score (Micro) = 0.7828
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /h

Using model 4GB-checkpoints, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7328251004219055
bi=50, loss=0.35392090678215027
bi=100, loss=0.281903475522995
bi=150, loss=0.32248687744140625
bi=0, loss=0.39105674624443054
bi=50, loss=0.28608402609825134
bi=100, loss=0.16948619484901428
bi=150, loss=0.2201278656721115
bi=0, loss=0.2989765703678131
bi=50, loss=0.14223679900169373
bi=100, loss=0.22380514442920685
bi=150, loss=0.12068145722150803
bi=0, loss=0.12679679691791534
bi=50, loss=0.15808716416358948
bi=100, loss=0.11753208935260773
bi=150, loss=0.19385206699371338
F1 Score (Micro) = 0.7882
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /h

Using model 4GB-checkpoints, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7388431429862976
bi=50, loss=0.3548583984375
bi=100, loss=0.3483312726020813
bi=150, loss=0.3180050551891327
bi=0, loss=0.24704419076442719
bi=50, loss=0.2993348240852356
bi=100, loss=0.19525419175624847
bi=150, loss=0.15325920283794403
bi=0, loss=0.12457270920276642
bi=50, loss=0.24779100716114044
bi=100, loss=0.15236693620681763
bi=150, loss=0.11649186909198761
bi=0, loss=0.15995261073112488
bi=50, loss=0.08002942055463791
bi=100, loss=0.11404677480459213
bi=150, loss=0.1274476796388626
F1 Score (Micro) = 0.798
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /h

Using model 4GB-checkpoints, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6830863356590271
bi=50, loss=0.35056519508361816
bi=100, loss=0.42754021286964417
bi=150, loss=0.2856346368789673
bi=0, loss=0.3423571288585663
bi=50, loss=0.22683773934841156
bi=100, loss=0.2011721432209015
bi=150, loss=0.22281388938426971
bi=0, loss=0.2793979346752167
bi=50, loss=0.10213303565979004
bi=100, loss=0.2067401260137558
bi=150, loss=0.1332062929868698
bi=0, loss=0.13831372559070587
bi=50, loss=0.1714884340763092
bi=100, loss=0.09119153022766113
bi=150, loss=0.12101290374994278
F1 Score (Micro) = 0.7765
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-0-3531-4GB were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at /h

Using model 4GB-checkpoints, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6452807784080505
bi=50, loss=0.37369081377983093
bi=100, loss=0.3907293975353241
bi=150, loss=0.3372023105621338
bi=0, loss=0.30187031626701355
bi=50, loss=0.2511384189128876
bi=100, loss=0.18395639955997467
bi=150, loss=0.23797443509101868
bi=0, loss=0.17913039028644562
bi=50, loss=0.19619552791118622
bi=100, loss=0.1974482536315918
bi=150, loss=0.1849907487630844
bi=0, loss=0.12227582931518555
bi=50, loss=0.09537488222122192
bi=100, loss=0.13914911448955536
bi=150, loss=0.09756967425346375
F1 Score (Micro) = 0.7834
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6580801010131836
bi=50, loss=0.3801601827144623
bi=100, loss=0.29418912529945374
bi=150, loss=0.27346116304397583
bi=0, loss=0.23131294548511505
bi=50, loss=0.2072726935148239
bi=100, loss=0.24272394180297852
bi=150, loss=0.18033166229724884
bi=0, loss=0.08887135237455368
bi=50, loss=0.12395989894866943
bi=100, loss=0.14382410049438477
bi=150, loss=0.09307395666837692
bi=0, loss=0.10286219418048859
bi=50, loss=0.1073198914527893
bi=100, loss=0.07345833629369736
bi=150, loss=0.07989972829818726
F1 Score (Micro) = 0.7995
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.679334282875061
bi=50, loss=0.42574092745780945
bi=100, loss=0.3327544331550598
bi=150, loss=0.22611720860004425
bi=0, loss=0.22599342465400696
bi=50, loss=0.18549615144729614
bi=100, loss=0.3041066825389862
bi=150, loss=0.17194245755672455
bi=0, loss=0.12344403564929962
bi=50, loss=0.11583267897367477
bi=100, loss=0.11548750847578049
bi=150, loss=0.11022119969129562
bi=0, loss=0.10592450946569443
bi=50, loss=0.09232433885335922
bi=100, loss=0.10344002395868301
bi=150, loss=0.06335185468196869
F1 Score (Micro) = 0.8329
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7391654849052429
bi=50, loss=0.46718356013298035
bi=100, loss=0.35074910521507263
bi=150, loss=0.32628536224365234
bi=0, loss=0.2309623807668686
bi=50, loss=0.2179441899061203
bi=100, loss=0.21889828145503998
bi=150, loss=0.09413931518793106
bi=0, loss=0.13854649662971497
bi=50, loss=0.1066657081246376
bi=100, loss=0.09866989403963089
bi=150, loss=0.12869806587696075
bi=0, loss=0.10548462718725204
bi=50, loss=0.09573977440595627
bi=100, loss=0.0706818550825119
bi=150, loss=0.10691659897565842
F1 Score (Micro) = 0.8074
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7450225949287415
bi=50, loss=0.3957406282424927
bi=100, loss=0.29631224274635315
bi=150, loss=0.19266565144062042
bi=0, loss=0.20268426835536957
bi=50, loss=0.2102544754743576
bi=100, loss=0.1764812022447586
bi=150, loss=0.13881857693195343
bi=0, loss=0.15165412425994873
bi=50, loss=0.08016572147607803
bi=100, loss=0.11153297871351242
bi=150, loss=0.11680658906698227
bi=0, loss=0.14267410337924957
bi=50, loss=0.06964776664972305
bi=100, loss=0.11485667526721954
bi=150, loss=0.11901070922613144
F1 Score (Micro) = 0.8141
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-18-67089-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint at

Using model model-trained-18-67089-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6856151819229126
bi=50, loss=0.3977987766265869
bi=100, loss=0.2747465670108795
bi=150, loss=0.2138931304216385
bi=0, loss=0.1968950480222702
bi=50, loss=0.12442958354949951
bi=100, loss=0.18668685853481293
bi=150, loss=0.18855403363704681
bi=0, loss=0.1831071674823761
bi=50, loss=0.08666256815195084
bi=100, loss=0.09120483696460724
bi=150, loss=0.0999719500541687
bi=0, loss=0.16858656704425812
bi=50, loss=0.06641478091478348
bi=100, loss=0.07169347256422043
bi=150, loss=0.0781402587890625
F1 Score (Micro) = 0.8029
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6644598841667175
bi=50, loss=0.32772204279899597
bi=100, loss=0.2697829306125641
bi=150, loss=0.2395290583372116
bi=0, loss=0.22830091416835785
bi=50, loss=0.20487360656261444
bi=100, loss=0.11495130509138107
bi=150, loss=0.21421609818935394
bi=0, loss=0.13070209324359894
bi=50, loss=0.10683053731918335
bi=100, loss=0.13754187524318695
bi=150, loss=0.08335988968610764
bi=0, loss=0.19950167834758759
bi=50, loss=0.14756977558135986
bi=100, loss=0.07535725831985474
bi=150, loss=0.08801168203353882
F1 Score (Micro) = 0.8068
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.69464111328125
bi=50, loss=0.32442575693130493
bi=100, loss=0.31872662901878357
bi=150, loss=0.21977739036083221
bi=0, loss=0.19819705188274384
bi=50, loss=0.20411725342273712
bi=100, loss=0.15032696723937988
bi=150, loss=0.17038914561271667
bi=0, loss=0.1395380049943924
bi=50, loss=0.11765823513269424
bi=100, loss=0.1295812427997589
bi=150, loss=0.0944443866610527
bi=0, loss=0.14768584072589874
bi=50, loss=0.09708299487829208
bi=100, loss=0.0822601467370987
bi=150, loss=0.08006216585636139
F1 Score (Micro) = 0.8261
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6808632016181946
bi=50, loss=0.3668905198574066
bi=100, loss=0.2901049554347992
bi=150, loss=0.19353531301021576
bi=0, loss=0.24145498871803284
bi=50, loss=0.15136544406414032
bi=100, loss=0.1761583685874939
bi=150, loss=0.13122320175170898
bi=0, loss=0.16459636390209198
bi=50, loss=0.09544195979833603
bi=100, loss=0.18507516384124756
bi=150, loss=0.08614809811115265
bi=0, loss=0.121103934943676
bi=50, loss=0.11064853519201279
bi=100, loss=0.07246249914169312
bi=150, loss=0.0933104157447815
F1 Score (Micro) = 0.8107
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7172778248786926
bi=50, loss=0.3581073582172394
bi=100, loss=0.3812964856624603
bi=150, loss=0.21856561303138733
bi=0, loss=0.22957733273506165
bi=50, loss=0.18229688704013824
bi=100, loss=0.12239282578229904
bi=150, loss=0.17700636386871338
bi=0, loss=0.12795215845108032
bi=50, loss=0.10057883709669113
bi=100, loss=0.13921599090099335
bi=150, loss=0.10144157707691193
bi=0, loss=0.08332540094852448
bi=50, loss=0.0910121351480484
bi=100, loss=0.09249541163444519
bi=150, loss=0.11344435065984726
F1 Score (Micro) = 0.8088
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/4GB-checkpoints/model-trained-36-130647-4GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-36-130647-4GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6914640665054321
bi=50, loss=0.3222445547580719
bi=100, loss=0.30015096068382263
bi=150, loss=0.21345578134059906
bi=0, loss=0.23765766620635986
bi=50, loss=0.18687045574188232
bi=100, loss=0.195726677775383
bi=150, loss=0.1387929767370224
bi=0, loss=0.127165749669075
bi=50, loss=0.12072036415338516
bi=100, loss=0.13026274740695953
bi=150, loss=0.11467482894659042
bi=0, loss=0.063018299639225
bi=50, loss=0.11541266739368439
bi=100, loss=0.06993378698825836
bi=150, loss=0.10224983841180801
F1 Score (Micro) = 0.8093
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7061797380447388
bi=50, loss=0.3742334544658661
bi=100, loss=0.3081986904144287
bi=150, loss=0.24085357785224915
bi=0, loss=0.19716139137744904
bi=50, loss=0.2414478361606598
bi=100, loss=0.1640900820493698
bi=150, loss=0.1042991429567337
bi=0, loss=0.13540063798427582
bi=50, loss=0.14539411664009094
bi=100, loss=0.15744994580745697
bi=150, loss=0.12914292514324188
bi=0, loss=0.10329421609640121
bi=50, loss=0.1719946712255478
bi=100, loss=0.2531307637691498
bi=150, loss=0.07521646469831467
F1 Score (Micro) = 0.8
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7122626900672913
bi=50, loss=0.39190202951431274
bi=100, loss=0.3681310713291168
bi=150, loss=0.3115191161632538
bi=0, loss=0.2042848914861679
bi=50, loss=0.20114651322364807
bi=100, loss=0.18203632533550262
bi=150, loss=0.15924589335918427
bi=0, loss=0.19088397920131683
bi=50, loss=0.23924219608306885
bi=100, loss=0.17918606102466583
bi=150, loss=0.18392221629619598
bi=0, loss=0.09577661007642746
bi=50, loss=0.1088266596198082
bi=100, loss=0.20484638214111328
bi=150, loss=0.16912634670734406
F1 Score (Micro) = 0.836
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6808533072471619
bi=50, loss=0.31389760971069336
bi=100, loss=0.3792473375797272
bi=150, loss=0.2937498688697815
bi=0, loss=0.19910965859889984
bi=50, loss=0.21173810958862305
bi=100, loss=0.19071748852729797
bi=150, loss=0.1516546607017517
bi=0, loss=0.17295202612876892
bi=50, loss=0.1581106185913086
bi=100, loss=0.11363210529088974
bi=150, loss=0.16758717596530914
bi=0, loss=0.11741330474615097
bi=50, loss=0.1110750064253807
bi=100, loss=0.10694875568151474
bi=150, loss=0.10402894020080566
F1 Score (Micro) = 0.8202
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6770532727241516
bi=50, loss=0.39084792137145996
bi=100, loss=0.3037228584289551
bi=150, loss=0.28678154945373535
bi=0, loss=0.20562390983104706
bi=50, loss=0.17093342542648315
bi=100, loss=0.17019249498844147
bi=150, loss=0.11897950619459152
bi=0, loss=0.16133569180965424
bi=50, loss=0.14636756479740143
bi=100, loss=0.13415198028087616
bi=150, loss=0.0895083099603653
bi=0, loss=0.09942011535167694
bi=50, loss=0.1578434854745865
bi=100, loss=0.10189606249332428
bi=150, loss=0.12221711128950119
F1 Score (Micro) = 0.8025
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-0-10596-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-0-10596-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6843156218528748
bi=50, loss=0.32142025232315063
bi=100, loss=0.28824982047080994
bi=150, loss=0.22063468396663666
bi=0, loss=0.2500593364238739
bi=50, loss=0.19985713064670563
bi=100, loss=0.202174112200737
bi=150, loss=0.16947101056575775
bi=0, loss=0.16465330123901367
bi=50, loss=0.17999696731567383
bi=100, loss=0.07605455070734024
bi=150, loss=0.1077437624335289
bi=0, loss=0.10680496692657471
bi=50, loss=0.08207881450653076
bi=100, loss=0.12305495887994766
bi=150, loss=0.1506180614233017
F1 Score (Micro) = 0.797
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7273723483085632
bi=50, loss=0.30843687057495117
bi=100, loss=0.2913966178894043
bi=150, loss=0.1480265110731125
bi=0, loss=0.2659478187561035
bi=50, loss=0.13486815989017487
bi=100, loss=0.1435897797346115
bi=150, loss=0.10712108761072159
bi=0, loss=0.1455584317445755
bi=50, loss=0.09461808204650879
bi=100, loss=0.11225538700819016
bi=150, loss=0.16729365289211273
bi=0, loss=0.10773313045501709
bi=50, loss=0.08605696260929108
bi=100, loss=0.08730166405439377
bi=150, loss=0.07700108736753464
F1 Score (Micro) = 0.8103
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7245100140571594
bi=50, loss=0.3296479880809784
bi=100, loss=0.2907155156135559
bi=150, loss=0.2657764256000519
bi=0, loss=0.13033176958560944
bi=50, loss=0.19026094675064087
bi=100, loss=0.13766418397426605
bi=150, loss=0.16822253167629242
bi=0, loss=0.1018982082605362
bi=50, loss=0.11608419567346573
bi=100, loss=0.07322538644075394
bi=150, loss=0.06841284036636353
bi=0, loss=0.09844966977834702
bi=50, loss=0.10126078128814697
bi=100, loss=0.07188476622104645
bi=150, loss=0.0899820551276207
F1 Score (Micro) = 0.8419
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7183259725570679
bi=50, loss=0.35216158628463745
bi=100, loss=0.3001633584499359
bi=150, loss=0.16576425731182098
bi=0, loss=0.21347928047180176
bi=50, loss=0.13918708264827728
bi=100, loss=0.14648787677288055
bi=150, loss=0.13953928649425507
bi=0, loss=0.1296585351228714
bi=50, loss=0.09915284812450409
bi=100, loss=0.12525883316993713
bi=150, loss=0.11414437741041183
bi=0, loss=0.06828456372022629
bi=50, loss=0.07520722597837448
bi=100, loss=0.07186811417341232
bi=150, loss=0.11122968047857285
F1 Score (Micro) = 0.8255
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6989083290100098
bi=50, loss=0.3624172806739807
bi=100, loss=0.2989831864833832
bi=150, loss=0.21388140320777893
bi=0, loss=0.1400012969970703
bi=50, loss=0.193049356341362
bi=100, loss=0.12974515557289124
bi=150, loss=0.15236906707286835
bi=0, loss=0.10901164263486862
bi=50, loss=0.12829245626926422
bi=100, loss=0.10790808498859406
bi=150, loss=0.07715588063001633
bi=0, loss=0.11882102489471436
bi=50, loss=0.10065658390522003
bi=100, loss=0.13539069890975952
bi=150, loss=0.09344418346881866
F1 Score (Micro) = 0.7907
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-3-42384-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-3-42384-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7073649764060974
bi=50, loss=0.370237797498703
bi=100, loss=0.2817348539829254
bi=150, loss=0.20546288788318634
bi=0, loss=0.21508324146270752
bi=50, loss=0.22716453671455383
bi=100, loss=0.15480725467205048
bi=150, loss=0.2486639767885208
bi=0, loss=0.10027748346328735
bi=50, loss=0.11296536773443222
bi=100, loss=0.1007559522986412
bi=150, loss=0.1052221804857254
bi=0, loss=0.09043741226196289
bi=50, loss=0.08403220027685165
bi=100, loss=0.054917704313993454
bi=150, loss=0.1374337375164032
F1 Score (Micro) = 0.8034
Training run 5 completed.
Logging model stats....



Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.691432535648346
bi=50, loss=0.41041478514671326
bi=100, loss=0.34073305130004883
bi=150, loss=0.19473133981227875
bi=0, loss=0.16924484074115753
bi=50, loss=0.15590298175811768
bi=100, loss=0.1601993292570114
bi=150, loss=0.1315949708223343
bi=0, loss=0.12744036316871643
bi=50, loss=0.0779212936758995
bi=100, loss=0.1799997240304947
bi=150, loss=0.11129485815763474
bi=0, loss=0.06659003347158432
bi=50, loss=0.06887584179639816
bi=100, loss=0.07032113522291183
bi=150, loss=0.09467639774084091
F1 Score (Micro) = 0.8123
Training run 1 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7406346201896667
bi=50, loss=0.34183433651924133
bi=100, loss=0.2902926504611969
bi=150, loss=0.26816660165786743
bi=0, loss=0.17762470245361328
bi=50, loss=0.13381117582321167
bi=100, loss=0.16872595250606537
bi=150, loss=0.18675720691680908
bi=0, loss=0.130807027220726
bi=50, loss=0.08762097358703613
bi=100, loss=0.09050247073173523
bi=150, loss=0.16004233062267303
bi=0, loss=0.13003619015216827
bi=50, loss=0.10754703730344772
bi=100, loss=0.07374420762062073
bi=150, loss=0.10761453956365585
F1 Score (Micro) = 0.8372
Training run 2 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.6771770715713501
bi=50, loss=0.39519867300987244
bi=100, loss=0.2796936631202698
bi=150, loss=0.15073202550411224
bi=0, loss=0.16502727568149567
bi=50, loss=0.1557181030511856
bi=100, loss=0.15564946830272675
bi=150, loss=0.1173369437456131
bi=0, loss=0.15398333966732025
bi=50, loss=0.09786175191402435
bi=100, loss=0.14127571880817413
bi=150, loss=0.07902289927005768
bi=0, loss=0.08364153653383255
bi=50, loss=0.0764140784740448
bi=100, loss=0.1043485775589943
bi=150, loss=0.08634033799171448
F1 Score (Micro) = 0.8291
Training run 3 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7335861325263977
bi=50, loss=0.3691302239894867
bi=100, loss=0.2727179527282715
bi=150, loss=0.17970715463161469
bi=0, loss=0.17992575466632843
bi=50, loss=0.12280433624982834
bi=100, loss=0.1305527687072754
bi=150, loss=0.13935764133930206
bi=0, loss=0.13676123321056366
bi=50, loss=0.1763909012079239
bi=100, loss=0.11906283348798752
bi=150, loss=0.07294964045286179
bi=0, loss=0.09469934552907944
bi=50, loss=0.1062656044960022
bi=100, loss=0.046425338834524155
bi=150, loss=0.09574195742607117
F1 Score (Micro) = 0.8217
Training run 4 completed.


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/12GB-checkpoints/model-trained-5-63576-12GB/ were not used when initializing BertModel: ['cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint a

Using model model-trained-5-63576-12GB, with tokenizer ../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt
bi=0, loss=0.7019864916801453
bi=50, loss=0.4045860469341278
bi=100, loss=0.26260167360305786
bi=150, loss=0.26338788866996765
bi=0, loss=0.21676340699195862
bi=50, loss=0.2085028886795044
bi=100, loss=0.08165328204631805
bi=150, loss=0.11604722589254379
bi=0, loss=0.11582460254430771
bi=50, loss=0.10508110374212265
bi=100, loss=0.10520865768194199
bi=150, loss=0.0810474082827568
bi=0, loss=0.05983774736523628
bi=50, loss=0.09452822059392929
bi=100, loss=0.08544166386127472
bi=150, loss=0.09806782007217407
F1 Score (Micro) = 0.823
Training run 5 completed.
Logging model stats....

Total Training/Eval time: 6506.26 seconds
