In [9]:
import pandas as pd
import numpy as np
import json
import os 
import time
import torch.nn as nn
import torch
import transformers
from transformers import get_linear_schedule_with_warmup
from torch.nn.parallel import DataParallel

### Change the path here

In [2]:
training_filepath = '../../Data/BioASQ_data/BioASQ-training7b/trainining7b.json'
test_directory = '../../Data/BioASQ_data/Task7BGoldenEnriched/'

In [3]:
training_text = []
test_text = []

# Filter out the training data
with open (training_filepath, "r") as f:
    data = json.loads(f.read())
    texts = data['questions']
    for text in texts:
        if 'exact_answer' in text.keys():
            if text['exact_answer'] == 'yes' or text['exact_answer'] == 'no':
                training_text.append(text)

# Filter out the text data
directory = test_directory
for filename in os.listdir(directory):
    with open (directory+filename, "r") as f:
        data = json.loads(f.read())
        texts = data['questions']
        for text in texts:
            if 'exact_answer' in text.keys():
                if text['exact_answer'] == 'yes' or text['exact_answer'] == 'no':
                    test_text.append(text)

In [4]:
len(test_text)

140

In [5]:
def process_data(texts):
    question_list = []
    context_list = []
    target_list = []
    for text in texts:
        question_list.append(text['body'])
        context_list.append(' '. join([x['text'] for x in text['snippets']]))
        target_list.append(1 if text['exact_answer'] == 'yes' else 0)
    df = pd.DataFrame(zip(question_list, context_list, target_list), columns=['question', 'context', 'target'])
    return df

In [6]:
class BERTBaseUncased(nn.Module):
    def __init__(self, model_path):
        super(BERTBaseUncased, self).__init__()
        self.bert = transformers.BertModel.from_pretrained(model_path)
        self.out = nn.Linear(768, 2)
        self.softmax = nn.Softmax(dim=1)
    
    def forward(self, ids, mask, token_type_ids):
        _, output = self.bert(ids, attention_mask=mask, token_type_ids=token_type_ids, return_dict=False)
        output = self.out(output)
        return self.softmax(output)

class BERTDatasetTraining:
    def __init__(self, question, context, target, tokenizer, max_len):
        self.question = question
        self.context = context
        self.target = target
        
        self.tokenizer = tokenizer
        self.max_len = max_len
        
    def __len__(self):
        return len(self.question)
    
    def __getitem__(self, item):
        question= str(self.question[item])
        context = str(self.context[item])

        inputs = self.tokenizer.encode_plus(
            question,
            context,
            add_special_tokens=True
        )

        ids = inputs['input_ids']
        token_type_ids = inputs['token_type_ids']
        mask = inputs['attention_mask']
        
        padding_len = self.max_len - len(ids)
        
        ids = ids[:self.max_len] + ([0] * padding_len) 
        token_type_ids = token_type_ids[:self.max_len] + ([0] * padding_len)
        mask = mask[:self.max_len] + ([0] * padding_len)
        

        return {
            'ids': torch.tensor(ids, dtype=torch.long),
            'mask': torch.tensor(mask, dtype=torch.long),
            'token_type_ids': torch.tensor(token_type_ids, dtype=torch.long),
            'target': torch.tensor(self.target[item], dtype=torch.long)
        }


def loss_fn(outputs, target):
    return nn.CrossEntropyLoss()(outputs, target)


def train_loop_fn(data_loader, model, optimizer, device, scheduler=None):
    model.train()
    for bi, d in enumerate(data_loader):
        ids = d['ids']
        mask = d['mask']
        token_type_ids = d['token_type_ids']
        target = d['target']
        
        ids = ids.to(device, dtype=torch.long)
        mask = mask.to(device, dtype=torch.long)
        token_type_ids = token_type_ids.to(device, dtype=torch.long)
        target = target.to(device, dtype=torch.long)
        
        optimizer.zero_grad()
        outputs = model(ids, mask, token_type_ids)

        loss = loss_fn(outputs, target)
        loss.backward()
        optimizer.step()
        if scheduler is not None:
            scheduler.step()
        if bi % 20 == 0:
            print(f'bi={bi}, loss={loss}')

            
def eval_loop_fn(data_loader, model, device):
    model.eval()
    fin_targets = []
    fin_outputs = []
    for bi, d in enumerate(data_loader):
        with torch.no_grad():
            ids = d['ids'].to(device, dtype=torch.long)
            mask = d['mask'].to(device, dtype=torch.long)
            token_type_ids = d['token_type_ids'].to(device, dtype=torch.long)
            target = d['target'].to(device, dtype=torch.long)
          
            outputs = model(ids, mask, token_type_ids)
          
            fin_targets.append(target.cpu().detach().numpy())
            fin_outputs.append(outputs.cpu().detach().numpy())

    return np.vstack(fin_outputs), np.hstack(fin_targets)

In [7]:
model_paths = [
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/',
'/home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/'
              ]

In [8]:
for path in model_paths:
    try:
        print(path.split('/')[-2].split('.')[0])
    except Exception as e:
        print(e) 
        continue

run_8GB_model-trained-0-7063
run_8GB_model-trained-8-63567
run_8GB_model-trained-22-162449


In [10]:
MAX_LEN = 512
TRAIN_BATCH_SIZE = 16
TEST_BATCH_SIZE = 4
EPOCHS = 8
LEARNING_RATE = 3e-5
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

start = time.perf_counter()

for model_path in model_paths:
    tokenizer_path = ('bert-base-uncased' if model_path == 'bert-base-uncased'\
                      else '../../Preprocessing/Tokenization/wp-vocab-30500-vocab.txt')
    model_name = model_path if model_path == 'bert-base-uncased' else model_path.split('/')[-2].split('.')[0]
    scores = []
    model_stats = {'model_name':model_name,
                   'seeds':[],
                   'batch_size':TRAIN_BATCH_SIZE,
                   'epochs':EPOCHS,
                   'metric':'accuracy',
                   'scores': [],
                   'mean_score':0
                    }
    for num, seed in enumerate([42,43,44,45,46], 1):
      
        tokenizer = transformers.BertTokenizer.from_pretrained(tokenizer_path)
        model = BERTBaseUncased(model_path)
        model = DataParallel(model)
        model.to(device)

        train_df = process_data(training_text)
        test_df = process_data(test_text)

        train_dataset = BERTDatasetTraining(
            question=train_df.question.values,
            context=train_df.context.values,
            target=train_df.target.values,
            tokenizer=tokenizer,
            max_len=MAX_LEN
        )
        train_data_loader = torch.utils.data.DataLoader(
            train_dataset,
            batch_size=TRAIN_BATCH_SIZE,
            shuffle=True
        )

        test_dataset = BERTDatasetTraining(
            question=test_df.question.values,
            context=test_df.context.values,
            target=test_df.target.values,
            tokenizer=tokenizer,
            max_len=MAX_LEN
        )
        test_data_loader = torch.utils.data.DataLoader(
            test_dataset,
            batch_size=TEST_BATCH_SIZE,
            shuffle=True,
            drop_last=True
        )

        optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)
        num_training_steps = int((len(train_dataset) / TRAIN_BATCH_SIZE) * EPOCHS)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, 
            num_warmup_steps=0,
            num_training_steps=num_training_steps
        )

        for epoch in range(EPOCHS):
            train_loop_fn(train_data_loader, model, optimizer, device, scheduler)
            
        output, target = eval_loop_fn(test_data_loader, model, device)
        acc = (output.argmax(1) == target).sum() / len(target)
        print(f'Final Accuracy for Round {num}: {round(acc,4)}')
        model_stats['scores'].append(round(acc, 6))
        torch.cuda.empty_cache()
        time.sleep(3)
        print()
        print('---------------------')
    print('Logging model stats....')
    print()
    final_score = np.round(np.mean(model_stats['scores']), 4)
    model_stats['mean_score'] = final_score
    with open('logs/stats.txt', 'a') as f:
        f.write(json.dumps(model_stats))
        f.write('\n')
    print('---------------------')

Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint 

bi=0, loss=0.715508759021759
bi=20, loss=0.3137582838535309
bi=40, loss=0.43840402364730835
bi=0, loss=0.6257165670394897
bi=20, loss=0.43836620450019836
bi=40, loss=0.625725507736206
bi=0, loss=0.5632762908935547
bi=20, loss=0.43833428621292114
bi=40, loss=0.5632702708244324
bi=0, loss=0.5008020401000977
bi=20, loss=0.5007956624031067
bi=40, loss=0.6257413625717163
bi=0, loss=0.5007944107055664
bi=20, loss=0.5007848739624023
bi=40, loss=0.5632685422897339
bi=0, loss=0.6257481575012207
bi=20, loss=0.3758203983306885
bi=40, loss=0.5632649064064026
bi=0, loss=0.5007869601249695
bi=20, loss=0.5007845163345337
bi=40, loss=0.5007820129394531
bi=0, loss=0.43829625844955444
bi=20, loss=0.43830206990242004
bi=40, loss=0.5632651448249817
Final Accuracy for Round 1: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint 

bi=0, loss=0.7130880355834961
bi=20, loss=0.5009768605232239
bi=40, loss=0.6256934404373169
bi=0, loss=0.4384469985961914
bi=20, loss=0.3759791851043701
bi=40, loss=0.5008022785186768
bi=0, loss=0.3759301006793976
bi=20, loss=0.4383658468723297
bi=40, loss=0.5008127689361572
bi=0, loss=0.5632714629173279
bi=20, loss=0.43834391236305237
bi=40, loss=0.5632756352424622
bi=0, loss=0.5632696747779846
bi=20, loss=0.43832123279571533
bi=40, loss=0.37585702538490295
bi=0, loss=0.3133784830570221
bi=20, loss=0.5007981061935425
bi=40, loss=0.3758460283279419
bi=0, loss=0.6882193684577942
bi=20, loss=0.43831732869148254
bi=40, loss=0.3758370578289032
bi=0, loss=0.5007929801940918
bi=20, loss=0.5007844567298889
bi=40, loss=0.4383183717727661
Final Accuracy for Round 2: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint 

bi=0, loss=0.6897773146629333
bi=20, loss=0.43863800168037415
bi=40, loss=0.5008514523506165
bi=0, loss=0.37593331933021545
bi=20, loss=0.3758949637413025
bi=40, loss=0.5008049607276917
bi=0, loss=0.5008039474487305
bi=20, loss=0.43832889199256897
bi=40, loss=0.31337466835975647
bi=0, loss=0.5632663369178772
bi=20, loss=0.500789225101471
bi=40, loss=0.4383050203323364
bi=0, loss=0.31334930658340454
bi=20, loss=0.5007799863815308
bi=40, loss=0.7507157921791077
bi=0, loss=0.3758212625980377
bi=20, loss=0.5007763504981995
bi=40, loss=0.6257506608963013
bi=0, loss=0.3758150637149811
bi=20, loss=0.625751256942749
bi=40, loss=0.5007811188697815
bi=0, loss=0.4382968842983246
bi=20, loss=0.500778079032898
bi=40, loss=0.4382920265197754
Final Accuracy for Round 3: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint 

bi=0, loss=0.7623088955879211
bi=20, loss=0.37607282400131226
bi=40, loss=0.5632805228233337
bi=0, loss=0.4383757412433624
bi=20, loss=0.5632743239402771
bi=40, loss=0.5007973313331604
bi=0, loss=0.37584778666496277
bi=20, loss=0.500794529914856
bi=40, loss=0.750708281993866
bi=0, loss=0.6257479786872864
bi=20, loss=0.31333762407302856
bi=40, loss=0.3758159577846527
bi=0, loss=0.6257517337799072
bi=20, loss=0.4382968842983246
bi=40, loss=0.4382939636707306
bi=0, loss=0.5007811784744263
bi=20, loss=0.5632664561271667
bi=40, loss=0.5007774233818054
bi=0, loss=0.5007784962654114
bi=20, loss=0.5632658004760742
bi=40, loss=0.4382913112640381
bi=0, loss=0.5632657408714294
bi=20, loss=0.37580373883247375
bi=40, loss=0.6257514953613281
Final Accuracy for Round 4: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-0-7063/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint 

bi=0, loss=0.7159106731414795
bi=20, loss=0.5010585188865662
bi=40, loss=0.43852803111076355
bi=0, loss=0.5008797645568848
bi=20, loss=0.37600377202033997
bi=40, loss=0.4383910894393921
bi=0, loss=0.31349053978919983
bi=20, loss=0.6257275938987732
bi=40, loss=0.5008185505867004
bi=0, loss=0.5008072853088379
bi=20, loss=0.5008049011230469
bi=40, loss=0.5632730722427368
bi=0, loss=0.6257365942001343
bi=20, loss=0.4383322596549988
bi=40, loss=0.5632712244987488
bi=0, loss=0.6257320642471313
bi=20, loss=0.500796914100647
bi=40, loss=0.5632730722427368
bi=0, loss=0.5007941722869873
bi=20, loss=0.5007693767547607
bi=40, loss=0.5632704496383667
bi=0, loss=0.4383139908313751
bi=20, loss=0.3758389949798584
bi=40, loss=0.625746488571167
Final Accuracy for Round 5: 0.6714

---------------------
Logging model stats....

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint

bi=0, loss=0.7214868664741516
bi=20, loss=0.4390156865119934
bi=40, loss=0.5008606910705566
bi=0, loss=0.43833643198013306
bi=20, loss=0.43842020630836487
bi=40, loss=0.5631966590881348
bi=0, loss=0.6255940794944763
bi=20, loss=0.50081866979599
bi=40, loss=0.37591925263404846
bi=0, loss=0.6257746815681458
bi=20, loss=0.3758966028690338
bi=40, loss=0.7505738139152527
bi=0, loss=0.5631365776062012
bi=20, loss=0.37595218420028687
bi=40, loss=0.6882184743881226
bi=0, loss=0.3134373426437378
bi=20, loss=0.4383319020271301
bi=40, loss=0.5632245540618896
bi=0, loss=0.5008074045181274
bi=20, loss=0.3758785128593445
bi=40, loss=0.6253570914268494
bi=0, loss=0.5632641315460205
bi=20, loss=0.5007962584495544
bi=40, loss=0.5007917881011963
Final Accuracy for Round 1: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint

bi=0, loss=0.6988315582275391
bi=20, loss=0.563424825668335
bi=40, loss=0.688101053237915
bi=0, loss=0.4384562075138092
bi=20, loss=0.37604573369026184
bi=40, loss=0.8130662441253662
bi=0, loss=0.6257133483886719
bi=20, loss=0.5008403062820435
bi=40, loss=0.5632782578468323
bi=0, loss=0.500827968120575
bi=20, loss=0.4383563995361328
bi=40, loss=0.6257315874099731
bi=0, loss=0.6882010698318481
bi=20, loss=0.43828850984573364
bi=40, loss=0.43833234906196594
bi=0, loss=0.3758920729160309
bi=20, loss=0.31339558959007263
bi=40, loss=0.4383356273174286
bi=0, loss=0.5632814168930054
bi=20, loss=0.5007321834564209
bi=40, loss=0.4383406639099121
bi=0, loss=0.31339937448501587
bi=20, loss=0.43835484981536865
bi=40, loss=0.3757883608341217
Final Accuracy for Round 2: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint

bi=0, loss=0.6088639497756958
bi=20, loss=0.3768012225627899
bi=40, loss=0.5009221434593201
bi=0, loss=0.43853166699409485
bi=20, loss=0.5008024573326111
bi=40, loss=0.5008887052536011
bi=0, loss=0.6880475878715515
bi=20, loss=0.438418447971344
bi=40, loss=0.43841466307640076
bi=0, loss=0.6257168650627136
bi=20, loss=0.5008284449577332
bi=40, loss=0.43831878900527954
bi=0, loss=0.3759237229824066
bi=20, loss=0.438292920589447
bi=40, loss=0.563144862651825
bi=0, loss=0.5008986592292786
bi=20, loss=0.5632185935974121
bi=40, loss=0.5008043646812439
bi=0, loss=0.5632370710372925
bi=20, loss=0.5007266402244568
bi=40, loss=0.4383649230003357
bi=0, loss=0.5008116960525513
bi=20, loss=0.500824511051178
bi=40, loss=0.3758912682533264
Final Accuracy for Round 3: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint

bi=0, loss=0.6559893488883972
bi=20, loss=0.5009816288948059
bi=40, loss=0.4384993314743042
bi=0, loss=0.4384741187095642
bi=20, loss=0.43842440843582153
bi=40, loss=0.37594345211982727
bi=0, loss=0.4383932054042816
bi=20, loss=0.5632808804512024
bi=40, loss=0.43835875391960144
bi=0, loss=0.37590593099594116
bi=20, loss=0.6257306933403015
bi=40, loss=0.5008207559585571
bi=0, loss=0.6882166266441345
bi=20, loss=0.3758782744407654
bi=40, loss=0.43833643198013306
bi=0, loss=0.3133945167064667
bi=20, loss=0.6257383823394775
bi=40, loss=0.5632663369178772
bi=0, loss=0.3758598864078522
bi=20, loss=0.5632699728012085
bi=40, loss=0.3133797347545624
bi=0, loss=0.6257155537605286
bi=20, loss=0.43832385540008545
bi=40, loss=0.6257337331771851
Final Accuracy for Round 4: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-8-63567/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoint

bi=0, loss=0.6916256546974182
bi=20, loss=0.3142433166503906
bi=40, loss=0.37620165944099426
bi=0, loss=0.31368204951286316
bi=20, loss=0.4384147524833679
bi=40, loss=0.37596842646598816
bi=0, loss=0.43841707706451416
bi=20, loss=0.4383704662322998
bi=40, loss=0.5008276104927063
bi=0, loss=0.8130616545677185
bi=20, loss=0.6257811188697815
bi=40, loss=0.5008139610290527
bi=0, loss=0.5007957816123962
bi=20, loss=0.43835994601249695
bi=40, loss=0.5008010864257812
bi=0, loss=0.4383760094642639
bi=20, loss=0.3758564591407776
bi=40, loss=0.5632628202438354
bi=0, loss=0.5007989406585693
bi=20, loss=0.4383184611797333
bi=40, loss=0.5632226467132568
bi=0, loss=0.3758527636528015
bi=20, loss=0.5008055567741394
bi=40, loss=0.3758486807346344
Final Accuracy for Round 5: 0.6714

---------------------
Logging model stats....

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoi

bi=0, loss=0.704915463924408
bi=20, loss=0.439016729593277
bi=40, loss=0.5633245706558228
bi=0, loss=0.5009728074073792
bi=20, loss=0.5009222626686096
bi=40, loss=0.4384104311466217
bi=0, loss=0.5632842183113098
bi=20, loss=0.43836671113967896
bi=40, loss=0.7505554556846619
bi=0, loss=0.563163161277771
bi=20, loss=0.625767707824707
bi=40, loss=0.500857949256897
bi=0, loss=0.3759307563304901
bi=20, loss=0.5631797313690186
bi=40, loss=0.5008669495582581
bi=0, loss=0.6256673336029053
bi=20, loss=0.4382793605327606
bi=40, loss=0.4384063184261322
bi=0, loss=0.3134635090827942
bi=20, loss=0.500739574432373
bi=40, loss=0.3759142756462097
bi=0, loss=0.5006908178329468
bi=20, loss=0.6257304549217224
bi=40, loss=0.4383482038974762
Final Accuracy for Round 1: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoi

bi=0, loss=0.7105241417884827
bi=20, loss=0.5011458396911621
bi=40, loss=0.5009334683418274
bi=0, loss=0.7504209876060486
bi=20, loss=0.5633223652839661
bi=40, loss=0.5008732676506042
bi=0, loss=0.4385031759738922
bi=20, loss=0.3135209381580353
bi=40, loss=0.43838369846343994
bi=0, loss=0.3759423792362213
bi=20, loss=0.5007381439208984
bi=40, loss=0.5632721781730652
bi=0, loss=0.43835610151290894
bi=20, loss=0.5008310675621033
bi=40, loss=0.5007939338684082
bi=0, loss=0.4383661150932312
bi=20, loss=0.5008054375648499
bi=40, loss=0.6257391571998596
bi=0, loss=0.5008033514022827
bi=20, loss=0.5007964968681335
bi=40, loss=0.5632708668708801
bi=0, loss=0.6882104277610779
bi=20, loss=0.3758784234523773
bi=40, loss=0.4383329451084137
Final Accuracy for Round 2: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoi

bi=0, loss=0.7555574774742126
bi=20, loss=0.37812867760658264
bi=40, loss=0.37665167450904846
bi=0, loss=0.43889978528022766
bi=20, loss=0.31388285756111145
bi=40, loss=0.4385802149772644
bi=0, loss=0.31378689408302307
bi=20, loss=0.5009547472000122
bi=40, loss=0.5632438063621521
bi=0, loss=0.4382820129394531
bi=20, loss=0.5008319616317749
bi=40, loss=0.6257015466690063
bi=0, loss=0.3759329319000244
bi=20, loss=0.37595465779304504
bi=40, loss=0.5633894205093384
bi=0, loss=0.438289076089859
bi=20, loss=0.3751663863658905
bi=40, loss=0.4383544921875
bi=0, loss=0.3811506927013397
bi=20, loss=0.5319992899894714
bi=40, loss=0.41580817103385925
bi=0, loss=0.5009181499481201
bi=20, loss=0.6666535139083862
bi=40, loss=0.5196933150291443
Final Accuracy for Round 3: 0.7357

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoi

bi=0, loss=0.6158610582351685
bi=20, loss=0.43901312351226807
bi=40, loss=0.5009333491325378
bi=0, loss=0.5004693269729614
bi=20, loss=0.6257903575897217
bi=40, loss=0.5632901787757874
bi=0, loss=0.5007635354995728
bi=20, loss=0.4384327530860901
bi=40, loss=0.37599119544029236
bi=0, loss=0.5008662939071655
bi=20, loss=0.438281387090683
bi=40, loss=0.5006917119026184
bi=0, loss=0.43825563788414
bi=20, loss=0.5630446672439575
bi=40, loss=0.4383472800254822
bi=0, loss=0.5632711052894592
bi=20, loss=0.3758915662765503
bi=40, loss=0.5008163452148438
bi=0, loss=0.6257544159889221
bi=20, loss=0.5008159875869751
bi=40, loss=0.37587666511535645
bi=0, loss=0.500870943069458
bi=20, loss=0.375882089138031
bi=40, loss=0.5008062124252319
Final Accuracy for Round 4: 0.6714

---------------------


Some weights of the model checkpoint at /home/americanthinker/notebooks/pytorch/NationalSecurityBERT/Modeling/checkpoints/8GB-checkpoints/run_8GB_model-trained-22-162449/ were not used when initializing BertModel: ['cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.predictions.decoder.bias', 'cls.predictions.bias']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertModel were not initialized from the model checkpoi

bi=0, loss=0.6855602264404297
bi=20, loss=0.4389772117137909
bi=40, loss=0.6880782842636108
bi=0, loss=0.43871018290519714
bi=20, loss=0.43851858377456665
bi=40, loss=0.3136983811855316
bi=0, loss=0.43845421075820923
bi=20, loss=0.37621986865997314
bi=40, loss=0.5008324384689331
bi=0, loss=0.37800633907318115
bi=20, loss=0.35351115465164185
bi=40, loss=0.5044620037078857
bi=0, loss=0.43866071105003357
bi=20, loss=0.5009447932243347
bi=40, loss=0.6228271722793579
bi=0, loss=0.43299248814582825
bi=20, loss=0.563065230846405
bi=40, loss=0.4390726089477539
bi=0, loss=0.563274621963501
bi=20, loss=0.43844789266586304
bi=40, loss=0.5008668303489685
bi=0, loss=0.4383482038974762
bi=20, loss=0.5011698603630066
bi=40, loss=0.4106025695800781
Final Accuracy for Round 5: 0.6714

---------------------
Logging model stats....

---------------------


In [13]:
torch.cuda.empty_cache()