# ***Bert Text Classification***

In [None]:
# Tutorials provided by Chris McCormick (https://mccormickml.com/) were used to create the basis of this code.

In [1]:
import os
import sys
import torch
import numpy as np
import pandas as pd
import transformers
from tensorflow import keras
import matplotlib.pyplot as plt
import time
import datetime
import seaborn as sns
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import KFold, train_test_split
from torch.utils.data import TensorDataset, DataLoader, RandomSampler, SequentialSampler
from transformers import BertForSequenceClassification, AdamW, BertConfig, get_linear_schedule_with_warmup, BertTokenizer
from sklearn.metrics import roc_auc_score, classification_report, f1_score, accuracy_score, recall_score, precision_score
import torch.nn.functional as F
from sklearn.utils.class_weight import compute_class_weight

os.environ["CUDA_VISIBLE_DEVICES"]="1,3"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

if torch.cuda.is_available(): 
     print('There are %d GPU(s) available.' % torch.cuda.device_count())

cuda
There are 2 GPU(s) available.


In [2]:
# read data

data = pd.read_csv("genocide-transcript-corpus-v0.1.csv", sep=";")

X_data = data['paragraph'].to_numpy()
Y_data = data['label'].to_numpy()
tribunals = data['tribunal'].to_numpy()

In [3]:
# function for text tokenization

def tokenize_data(text_samples):
    tokenizer = BertTokenizer.from_pretrained('bert-base-uncased', do_lower_case=True)
    input_ids = []

    print('Tokenizing paragraphs...')
    
    for sentence in text_samples:
        encoded_par = tokenizer.encode(
                            sentence,
                            add_special_tokens = True,
                       )

        input_ids.append(encoded_par)

    print("Number of samples:", len(input_ids))
    
    input_ids = pad_sequences(input_ids, maxlen=512, truncating="post", padding="post", dtype="int")
    return input_ids


# function for attention mask alignment

def generate_attention_mask(tokenized_samples):
    attention_masks = []

    for sample in tokenized_samples:
        att_mask = [int(token_id > 0) for token_id in sample]
        attention_masks.append(att_mask)
        
    return attention_masks

In [4]:
# function that creates cross validation sets

def split_data(x_data, y_data):
    cv_folds = []

    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(x_data):
        X_train, X_test_eval = x_data[train_index], x_data[test_index]
        y_train, y_test_eval = y_data[train_index], y_data[test_index]
        
        X_eval, X_test, y_eval, y_test = train_test_split(X_test_eval, y_test_eval, test_size=0.5, random_state=42)
        
        cv_folds.append((X_train, X_eval, X_test, y_train,
                         y_eval, y_test))
        print("X_train:", cv_folds[-1][0].shape, "X_eval:", cv_folds[-1][1].shape, "X_test:", cv_folds[-1][2].shape,
              "y_train:", cv_folds[-1][3].shape, "y_eval:", cv_folds[-1][4].shape, "y_test:", cv_folds[-1][5].shape,)
        
    return cv_folds

In [5]:
### helper functions
# accuracy

def flat_accuracy(preds, labels):
    pred_flat = np.argmax(preds, axis=1).flatten()
    labels_flat = labels.flatten()
    return np.sum(pred_flat == labels_flat) / len(labels_flat)

# duration

def format_time(elapsed):
    elapsed_rounded = int(round((elapsed)))
    return str(datetime.timedelta(seconds=elapsed_rounded))

# loss plot

def plot_loss(loss_val):
    sns.set(style='darkgrid')
    sns.set(font_scale=1.5)
    plt.rcParams["figure.figsize"] = (12,6)
    plt.plot(loss_val, 'b-o')
    plt.title("Training loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.show()
    
# evaluation

def evaluate(true, pred):
    print('EVAL')
    print(pred)
    p1 = pred[:,1]
    auc = roc_auc_score(true, p1)
    print('Test ROC AUC: %.3f' %auc)
    
    p1[p1 > 0] = 1
    p1[p1 < 0] = 0
    
    print(classification_report(true, p1, digits=4))

In [6]:
# function for model training

def train_model(cross_fold_set, class_weights=None, batch_size=8, epochs=3):
    X_train = cross_fold_set[0]
    X_eval = cross_fold_set[1]
    X_test = cross_fold_set[2]
    y_train = cross_fold_set[3]
    y_eval = cross_fold_set[4]
    y_test = cross_fold_set[5]
    
    with open('logging.txt', 'a') as log_file:
        log_file.write(str(X_train.shape[0]) + ',' + str(X_eval.shape[0]) + ',' + str(X_test.shape[0]) + ',')

    # DataLoader for training set
    train_inputs = tokenize_data(X_train)
    train_attention_masks = generate_attention_mask(train_inputs)
    train_data = TensorDataset(torch.tensor(train_inputs), torch.tensor(train_attention_masks), torch.tensor(y_train))
    train_sampler = RandomSampler(train_data)
    train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch_size)

    # DataLoader for validation set
    eval_inputs = tokenize_data(X_eval)
    eval_attention_masks = generate_attention_mask(eval_inputs)
    validation_data = TensorDataset(torch.tensor(eval_inputs), torch.tensor(eval_attention_masks), torch.tensor(y_eval))
    validation_sampler = SequentialSampler(validation_data)
    validation_dataloader = DataLoader(validation_data, sampler=validation_sampler, batch_size=batch_size)
    
    model = BertForSequenceClassification.from_pretrained(
        "bert-base-uncased",
        num_labels = 2,
        output_attentions = False,
        output_hidden_states = False,
    )
    #model= torch.nn.DataParallel(model, device_ids = [0,2,3])
    model= torch.nn.DataParallel(model)
    model.to(device)
    
    optimizer = AdamW(model.parameters(),
                          lr = 2e-5, 
                          eps = 1e-8
                        )

    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, 
                                                num_warmup_steps = 0,
                                                num_training_steps = total_steps)
    
    loss_values = []

    for epoch_i in range(0, epochs):
        print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
        
        t0 = time.time()
        total_loss = 0
        total_acc = 0
        nb_train_steps = 0
        model.train()
        for step, batch in enumerate(train_dataloader):
            if step % 100 == 0 and not step == 0:
                elapsed = format_time(time.time() - t0)
                print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(train_dataloader), elapsed))
                
            b_input_ids = batch[0].to(device)
            b_input_mask = batch[1].to(device)
            b_labels = batch[2].to(device)
            model.zero_grad()        
            result = model(b_input_ids, 
                           token_type_ids=None, 
                           attention_mask=b_input_mask, 
                           labels=b_labels,
                           return_dict=True)

            #print("LOSS")
            #print(F.cross_entropy(result.logits, b_labels))
            
            loss = F.cross_entropy(result.logits, b_labels, weight=class_weights.to(device))
            #loss = result.loss.sum()
            
            total_loss += loss.item()
            train_logits = result.logits
            train_logits = train_logits.detach().cpu().numpy()
            train_label_ids = b_labels.to('cpu').numpy()
            total_acc += flat_accuracy(train_logits, train_label_ids)
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            nb_train_steps += 1
            
        avg_train_acc = total_acc / (nb_train_steps)
        avg_train_loss = total_loss / len(train_dataloader)            
        loss_values.append(avg_train_loss)
        
        print("  Average training loss: {0:.2f}".format(avg_train_loss))
        print("  Average training accuracy: {0:.2f}".format(avg_train_acc))
        print("  Training epoch took: {:}".format(format_time(time.time() - t0)))

        print("Running Validation...")
        t0 = time.time()
        model.eval()
        eval_loss, eval_accuracy = 0, 0
        nb_eval_steps, nb_eval_examples = 0, 0
        for batch in validation_dataloader:
            batch = tuple(t.to(device) for t in batch)
            #batch = tuple(t for t in batch)
            b_input_ids, b_input_mask, b_labels = batch
            with torch.no_grad():
                result = model(b_input_ids, 
                               token_type_ids=None, 
                               attention_mask=b_input_mask,
                               labels=b_labels,
                               return_dict=True)

            loss = result.loss.sum()
            logits = result.logits
            logits = logits.detach().cpu().numpy()
            label_ids = b_labels.to('cpu').numpy()
            tmp_eval_accuracy = flat_accuracy(logits, label_ids)
            eval_accuracy += tmp_eval_accuracy
            nb_eval_steps += 1

        print("  Validation Accuracy: {0:.2f}".format(eval_accuracy/nb_eval_steps))
        print("  Validation took: {:}".format(format_time(time.time() - t0)))
        
    print("Training complete!")
    #plot_loss(loss_values)
    
    print("Start Testing:")
    test_inputs = tokenize_data(X_test)
    test_attention_masks = generate_attention_mask(test_inputs)
    test_data = TensorDataset(torch.tensor(test_inputs), torch.tensor(test_attention_masks), torch.tensor(y_test))
    test_sampler = SequentialSampler(test_data)
    test_dataloader = DataLoader(test_data, sampler=test_sampler, batch_size=batch_size)
    
    model.eval()
    predictions , true_labels = [], []
    t0 = time.time()

    for (step, batch) in enumerate(test_dataloader):
        #batch = tuple(t.to(device) for t in batch)
        batch = tuple(t for t in batch)
        if step % 100 == 0 and not step == 0:
            elapsed = format_time(time.time() - t0)
            
            print('  Batch {:>5,}  of  {:>5,}.    Elapsed: {:}.'.format(step, len(test_dataloader), elapsed))

        b_input_ids, b_input_mask, b_labels = batch
        with torch.no_grad():
            result = model(b_input_ids, 
                           token_type_ids=None, 
                           attention_mask=b_input_mask,
                           return_dict=True)
        logits = result.logits
        logits = logits.detach().cpu().numpy()
        label_ids = b_labels.to('cpu').numpy()

        predictions.append(logits)
        true_labels.append(label_ids)

    print('DONE')

    predictions = np.concatenate(predictions, axis=0)
    predictions = np.argmax(predictions, axis=1).flatten()
    true_labels = np.concatenate(true_labels, axis=0)
    
    print('EVAL')
    print(predictions)
    
    print(classification_report(true_labels, predictions, digits=4))
    
    #evaluate(true_labels, predictions)
    with open('logging.txt', 'a') as log_file:
        log_file.write(str(accuracy_score(true_labels, predictions)) + ','
                       + str(f1_score(true_labels, predictions, average='macro')) + ',' 
                       + str(f1_score(true_labels, predictions, average='micro')) + ','
                       + str(f1_score(true_labels, predictions, average='binary')) + ','
                       + str(precision_score(true_labels, predictions, average='macro')) + ','
                       + str(precision_score(true_labels, predictions, average='micro')) + ','
                       + str(precision_score(true_labels, predictions, average='binary')) + ','
                       + str(recall_score(true_labels, predictions, average='macro')) + ','
                       + str(recall_score(true_labels, predictions, average='micro'))+ ','
                       + str(recall_score(true_labels, predictions, average='binary')))

In [7]:
def get_samples_by_id(tribunal_id):
    indices = np.where(tribunals == tribunal_id)
    return (np.take(X_data, indices)[0], np.take(Y_data, indices)[0])

def get_samples_by_negative_id(not_tribunal_id):
    indices = np.where(tribunals != not_tribunal_id)
    return (np.take(X_data, indices)[0], np.take(Y_data, indices)[0])

def get_train_eval(train_eval_tuple, test_tuple):
    cv_folds = []
    kf = KFold(n_splits=5)
    for train_index, test_index in kf.split(train_eval_tuple[0]):
        X_train, X_eval = train_eval_tuple[0][train_index], train_eval_tuple[0][test_index]
        y_train, y_eval = train_eval_tuple[1][train_index], train_eval_tuple[1][test_index]
        cv_folds.append((X_train, X_eval, test_tuple[0], y_train, y_eval, test_tuple[1]))
        print("X_train:", cv_folds[-1][0].shape, "X_eval:", cv_folds[-1][1].shape, "X_test:", cv_folds[-1][2].shape,
              "y_train:", cv_folds[-1][3].shape, "y_eval:", cv_folds[-1][4].shape, "y_test:", cv_folds[-1][5].shape,)
    return cv_folds

def split_by_sets(train_eval, test):
    if train_eval == 0 and test == 0:
        cv_splits = split_data(X_data, Y_data)
    elif train_eval == 0:
        cv_splits = get_train_eval(get_samples_by_negative_id(test), get_samples_by_id(test))
    elif test == 0:
        cv_splits = get_train_eval(get_samples_by_id(train_eval), get_samples_by_negative_id(train_eval))
    elif train_eval == test:
        cv_splits = split_data(get_samples_by_id(test)[0], get_samples_by_id(test)[1])
    else:
        cv_splits = get_train_eval(get_samples_by_id(train_eval), get_samples_by_id(test))
    return cv_splits

In [8]:
def compute_class_weights(Y_data):
    class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(Y_data), y=Y_data)
    return class_weight

In [9]:
def combination(train_eval, test):
    values = [0, 1, 2, 3]
    if train_eval not in values or test not in values:
        sys.exit("Falscher Parameter bei train_eval oder test")
    else:
        cv_splits = split_by_sets(train_eval, test)
    for idx, split in enumerate(cv_splits):
        print(60*'*')
        print("Using Split", idx)
        print(60*'*')
        with open('logging.txt', 'a') as log_file:
            log_file.write('\n' + str(train_eval) + ',' + str(train_eval) + ',' + str(test) + ',' + str(idx) + ',')
        train_model(split, class_weights=torch.tensor(compute_class_weights(split[3]), dtype=torch.float32))

In [10]:
# parameters: "train_eval" und "test" with values 0-3 (0 -> "complete dataset"; 1-3 -> respective tribunal)

for idx_1 in range(3, 4):
    for idx_2 in range(0, 4):
        combination(train_eval=idx_1, test=idx_2)


X_train: (384,) X_eval: (96,) X_test: (995,) y_train: (384,) y_eval: (96,) y_test: (995,)
X_train: (384,) X_eval: (96,) X_test: (995,) y_train: (384,) y_eval: (96,) y_test: (995,)
X_train: (384,) X_eval: (96,) X_test: (995,) y_train: (384,) y_eval: (96,) y_test: (995,)
X_train: (384,) X_eval: (96,) X_test: (995,) y_train: (384,) y_eval: (96,) y_test: (995,)
X_train: (384,) X_eval: (96,) X_test: (995,) y_train: (384,) y_eval: (96,) y_test: (995,)
************************************************************
Using Split 0
************************************************************
Tokenizing paragraphs...
Number of samples: 384
Tokenizing paragraphs...
Number of samples: 96


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at





  Average training loss: 0.58
  Average training accuracy: 0.73
  Training epoch took: 0:00:17
Running Validation...
  Validation Accuracy: 0.90
  Validation took: 0:00:01
  Average training loss: 0.42
  Average training accuracy: 0.80
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
  Average training loss: 0.29
  Average training accuracy: 0.89
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 995
  Batch   100  of    125.    Elapsed: 0:00:11.
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 0 0 0 0 0 0 1 0 1
 1 0 0 0 0 0 0 0 1 0 0 1 0 0 1 1 1 1 1 0 0 0 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 0 0
 1 1 0 0 0 0 0 1 1 1 1 1 1 0 0 0 0 1 1 1 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.66
  Average training accuracy: 0.63
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
  Average training loss: 0.42
  Average training accuracy: 0.86
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.86
  Validation took: 0:00:01
  Average training loss: 0.28
  Average training accuracy: 0.90
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 995
  Batch   100  of    125.    Elapsed: 0:00:11.
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 1 0 0 1 1 0
 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 1 0 0 1 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 0 1 0 1 0 1 0 0 1 0 0
 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 0
 1 1 0 1 1 1 0 1 1 1 1 1 1 0 0 0 1 1 1 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.59
  Average training accuracy: 0.68
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:01
  Average training loss: 0.35
  Average training accuracy: 0.87
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:01
  Average training loss: 0.22
  Average training accuracy: 0.91
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.76
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 995
  Batch   100  of    125.    Elapsed: 0:00:11.
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 0 1 1 0 1 0 1 1 0 0 1 0 0 0 0 0 0 0 0 0
 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 0 0 0 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 1 0 0 0 0 0 0
 1 1 0 0 1 0 0 1 1 1 1 1 1 0 0 1 0 1 1 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.58
  Average training accuracy: 0.70
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.69
  Validation took: 0:00:01
  Average training loss: 0.42
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.73
  Validation took: 0:00:01
  Average training loss: 0.32
  Average training accuracy: 0.89
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 995
  Batch   100  of    125.    Elapsed: 0:00:11.
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 1 0 1
 1 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 1 1 1 0 0 0 0 1 1 0 0 0 1 1 0 0 0 1 1 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 1 1 1 0 1 0 0 1 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 0 0 1 1 0 0 1 0 0
 1 1 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.60
  Average training accuracy: 0.67
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
  Average training loss: 0.39
  Average training accuracy: 0.83
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
  Average training loss: 0.30
  Average training accuracy: 0.88
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.86
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 995
  Batch   100  of    125.    Elapsed: 0:00:11.
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0 0 0 0 1 0 1
 1 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 0 0 0 1 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 1 1 0 0 0
 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 1 0
 1 0 0 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.59
  Average training accuracy: 0.69
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.74
  Validation took: 0:00:01
  Average training loss: 0.42
  Average training accuracy: 0.83
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.89
  Validation took: 0:00:01
  Average training loss: 0.29
  Average training accuracy: 0.89
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 465
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 1 0
 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 1 1 1 1 0 0 0 0 1 0 0
 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 0
 1 0 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 0 1 0 0 1 1 0 0 0 0 0 0
 0 1 0 0 1 0 0 0 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.60
  Average training accuracy: 0.70
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.34
  Average training accuracy: 0.88
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.23
  Average training accuracy: 0.93
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.88
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 465
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 0 1 0 0 0 0 0 0 1 1 0
 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 1 1 0 1 0 0 1 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 1 1 0 1 0 0 1 0 1 1 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1
 1 1 0 0 1 1 0 1 1 1 1 1 1 0 0 0 0 1 1 0 1 1 1 0 1 1 1 0 0 1 1 1 0 0 0 1 0
 0 1 1 1 1 0 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.57
  Average training accuracy: 0.76
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:01
  Average training loss: 0.37
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.73
  Validation took: 0:00:01
  Average training loss: 0.28
  Average training accuracy: 0.90
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.73
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 465
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 0 1
 1 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1 0 0 1 0 1 1 0 1 0 1 1 1 1 0 1 1 0 1
 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 1 1 0 1 1 0 1 1 1 0 1 1 0 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 0 1 1 0 0
 1 1 1 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 1 1 1 0 0 1 0 0 0 0 1 0
 0 1 1 1 0 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.59
  Average training accuracy: 0.68
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:01
  Average training loss: 0.40
  Average training accuracy: 0.85
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:01
  Average training loss: 0.32
  Average training accuracy: 0.88
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 465
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 1 0 0 0 1 1 0
 1 0 0 0 0 0 0 0 1 1 0 1 1 1 1 1 1 1 1 0 0 0 0 0 1 0 0 0 1 1 0 1 0 0 1 0 0
 0 1 0 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 1 0 1 1 1 0 0 0
 0 0 0 0 0 0 0 0 1 1 0 0 0 1 1 1 1 0 0 1 1 1 1 1 1 1 0 1 0 1 1 1 0 1 1 0 0
 1 1 0 0 1 1 0 1 1 1 1 1 1 0 0 1 1 1 1 0 1 1 1 0 1 1 1 0 0 1 1 1 0 0 0 1 0
 0 1 1 1 1 0 0 0 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.58
  Average training accuracy: 0.71
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.74
  Validation took: 0:00:01
  Average training loss: 0.38
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:01
  Average training loss: 0.26
  Average training accuracy: 0.91
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 465
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 0 0 0 0 0 1 0 0
 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 0 1 1 0 0 0 0 0 0 0
 0 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0
 0 0 0 0 0 0 0 1 1 1 1 0 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0
 1 0 0 0 1 0 0 1 1 1 1 1 1 0 0 0 0 0 0 0 1 1 0 0 0 0 1 0 0 1 1 0 0 0 0 0 0
 0 1 1 0 1 0 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.59
  Average training accuracy: 0.70
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.45
  Average training accuracy: 0.78
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.70
  Validation took: 0:00:01
  Average training loss: 0.35
  Average training accuracy: 0.85
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.90
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 530
DONE
EVAL
[0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
 0 1 1 1 1 1 0 1 1 1 1 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 1 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 1 0 1 0 0 1 1 1 1 0 1
 0 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 1 0 0 0
 0 0 1 0 1 1 1 0 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 1 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.64
  Average training accuracy: 0.62
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
  Average training loss: 0.39
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.88
  Validation took: 0:00:01
  Average training loss: 0.25
  Average training accuracy: 0.91
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.86
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 530
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 1 0 1 1 1 1 0 1 1 0 1 0 1 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0
 0 0 1 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 0 0
 0 1 0 1 1 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0
 0 0 0 0 0 0 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.59
  Average training accuracy: 0.70
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.70
  Validation took: 0:00:01
  Average training loss: 0.36
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
  Average training loss: 0.24
  Average training accuracy: 0.90
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.76
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 530
DONE
EVAL
[0 0 0 0 0 0 0 1 0 0 1 0 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 1 0 1 0 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 1 1 1 1 0 1 0 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0
 0 1 1 0 1 1 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 1 0
 0 0 0 0 0 0 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.56
  Average training accuracy: 0.73
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.74
  Validation took: 0:00:01
  Average training loss: 0.39
  Average training accuracy: 0.84
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.76
  Validation took: 0:00:01
  Average training loss: 0.27
  Average training accuracy: 0.92
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.78
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 530
DONE
EVAL
[0 0 0 0 0 1 1 1 0 0 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 0
 1 1 0 1 1 0 1 1 1 1 1 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1 0 0 1 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 1 1 1 1 0 1 1 0 1 1 0 1 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 1 1
 0 1 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0
 0 1 0 0 0 1 1 1 1 1 1 0 0 1 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 1 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.57
  Average training accuracy: 0.73
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.68
  Validation took: 0:00:01
  Average training loss: 0.45
  Average training accuracy: 0.80
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.73
  Validation took: 0:00:01
  Average training loss: 0.32
  Average training accuracy: 0.88
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 530
DONE
EVAL
[0 0 0 0 0 0 0 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 0 1 1 0 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 1 1
 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 0 0 0 0 0 0 0 1 0 0 0 0 1 1 0 1 0 0 0
 0 1 1 0 0 1 1 1 1 1 1 0 0 0 1 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 1 0 0 0 0 0 1 

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.60
  Average training accuracy: 0.64
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:01
  Average training loss: 0.39
  Average training accuracy: 0.84
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
  Average training loss: 0.25
  Average training accuracy: 0.92
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.81
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[1 0 0 0 1 1 1 0 0 1 1 1 1 0 1 0 1 0 1 0 0 1 1 1 1 0 0 1 1 0 0 1 1 0 0 1 1
 0 1 1 0 1 0 1 1 0 1 1]
              precision    recall  f1-score   support

           0     0.7500    0.8824    0.8108        17
           1     0.9286    0.8387    0.8814        31

    accuracy                         0.8542        48
   macro avg     0.8393    0.8605    0.8461        48
weighted avg     0.865

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.60
  Average training accuracy: 0.67
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.44
  Average training accuracy: 0.81
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.32
  Average training accuracy: 0.88
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 0 0 0 1 1 0 0 0 1 1 0 1 1 1 0 1 1 1
 1 1 1 0 1 1 1 1 1 1 1]
              precision    recall  f1-score   support

           0     0.9091    0.6250    0.7407        16
           1     0.8378    0.9688    0.8986        32

    accuracy                         0.8542        48
   macro avg     0.8735    0.7969    0.8196        48
weighted avg     0.861

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.61
  Average training accuracy: 0.67
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.65
  Validation took: 0:00:01
  Average training loss: 0.43
  Average training accuracy: 0.81
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:01
  Average training loss: 0.31
  Average training accuracy: 0.89
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[1 1 1 0 0 1 1 1 1 0 1 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 1 0 0 1 1 1 0 0 1 1 0
 1 1 0 0 0 1 0 1 1 0 1]
              precision    recall  f1-score   support

           0     0.8824    0.7500    0.8108        20
           1     0.8387    0.9286    0.8814        28

    accuracy                         0.8542        48
   macro avg     0.8605    0.8393    0.8461        48
weighted avg     0.856

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.61
  Average training accuracy: 0.70
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.69
  Validation took: 0:00:01
  Average training loss: 0.37
  Average training accuracy: 0.86
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.26
  Average training accuracy: 0.91
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.81
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 1 0 1 0 1 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8780    0.9474    0.9114        38
           1     0.7143    0.5000    0.5882        10

    accuracy                         0.8542        48
   macro avg     0.7962    0.7237    0.7498        48
weighted avg     0.843

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.65
  Average training accuracy: 0.58
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:01
  Average training loss: 0.45
  Average training accuracy: 0.82
  Training epoch took: 0:00:16
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.32
  Average training accuracy: 0.88
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0 0 0 0
 0 0 0 0 0 1 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8810    0.9487    0.9136        39
           1     0.6667    0.4444    0.5333         9

    accuracy                         0.8542        48
   macro avg     0.7738    0.6966    0.7235        48
weighted avg     0.840

In [11]:
import random
from sklearn.utils.class_weight import compute_class_weight

def compute_class_weights(Y_data):
    class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(Y_data), y=Y_data)
    return class_weight

for tribunal_id in [1, 2, 3]:
    X, Y = get_samples_by_id(tribunal_id)
    idc_0 = np.where(Y==0)[0]
    idc_1 = np.asarray(random.sample(list(np.where(Y==1)[0]), int(np.where(Y==0)[0].shape[0]*0.2)))
    
    X = np.concatenate((X[idc_0], X[idc_1]))
    Y = np.concatenate((Y[idc_0], Y[idc_1]))

    np.random.shuffle(X)
    np.random.shuffle(Y)

    cv_splits = split_data(X, Y)
    for idx, split in enumerate(cv_splits):
        print(60*'*')
        print("Using Split", idx)
        print(60*'*')
        with open('logging.txt', 'a') as log_file:
            log_file.write('\n' + str(tribunal_id) + ',' + str(tribunal_id) + ',' + str(tribunal_id) + ',' + str(idx) + ',')
        train_model(split, class_weights=torch.tensor(compute_class_weights(split[3]), dtype=torch.float32))

X_train: (274,) X_eval: (34,) X_test: (35,) y_train: (274,) y_eval: (34,) y_test: (35,)
X_train: (274,) X_eval: (34,) X_test: (35,) y_train: (274,) y_eval: (34,) y_test: (35,)
X_train: (274,) X_eval: (34,) X_test: (35,) y_train: (274,) y_eval: (34,) y_test: (35,)
X_train: (275,) X_eval: (34,) X_test: (34,) y_train: (275,) y_eval: (34,) y_test: (34,)
X_train: (275,) X_eval: (34,) X_test: (34,) y_train: (275,) y_eval: (34,) y_test: (34,)
************************************************************
Using Split 0
************************************************************
Tokenizing paragraphs...
Number of samples: 274
Tokenizing paragraphs...
Number of samples: 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.70
  Average training accuracy: 0.75
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
  Average training loss: 0.67
  Average training accuracy: 0.81
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
  Average training loss: 0.66
  Average training accuracy: 0.82
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 35
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.9143    1.0000    0.9552        32
           1     0.0000    0.0000    0.0000         3

    accuracy                         0.9143        35
   macro avg     0.4571    0.5000    0.4776        35
weighted avg     0.8359    0.9143    0.8733      

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


Tokenizing paragraphs...
Number of samples: 274
Tokenizing paragraphs...
Number of samples: 34


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.68
  Average training accuracy: 0.83
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
  Average training loss: 0.70
  Average training accuracy: 0.39
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.70
  Validation took: 0:00:01
  Average training loss: 0.65
  Average training accuracy: 0.82
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 35
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8824    0.9677    0.9231        31
           1     0.0000    0.0000    0.0000         4

    accuracy                         0.8571        35
   macro avg     0.4412    0.4839    0.4615        35
weighted avg     0.7815    0.8571    0.8176      

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.69
  Average training accuracy: 0.74
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.88
  Validation took: 0:00:01
  Average training loss: 0.68
  Average training accuracy: 0.83
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.88
  Validation took: 0:00:01
  Average training loss: 0.62
  Average training accuracy: 0.85
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.80
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 35
DONE
EVAL
[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.9118    1.0000    0.9538        31
           1     1.0000    0.2500    0.4000         4

    accuracy                         0.9143        35
   macro avg     0.9559    0.6250    0.6769        35
weighted avg     0.9218    0.9143    0.8905      

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.69
  Average training accuracy: 0.69
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.72
  Average training accuracy: 0.70
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.69
  Average training accuracy: 0.66
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.45
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 34
DONE
EVAL
[0 0 0 0 0 1 0 1 1 0 0 1 1 0 0 0 0 0 1 0 0 0 0 0 0 0 1 1 0 0 1 0 0 0]
              precision    recall  f1-score   support

           0     0.7600    0.7308    0.7451        26
           1     0.2222    0.2500    0.2353         8

    accuracy                         0.6176        34
   macro avg     0.4911    0.4904    0.4902        34
weighted avg     0.6335    0.6176    0.6251        

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.69
  Average training accuracy: 0.76
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.65
  Validation took: 0:00:01
  Average training loss: 0.66
  Average training accuracy: 0.83
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.65
  Validation took: 0:00:01
  Average training loss: 0.67
  Average training accuracy: 0.85
  Training epoch took: 0:00:11
Running Validation...
  Validation Accuracy: 0.65
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 34
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.7647    1.0000    0.8667        26
           1     0.0000    0.0000    0.0000         8

    accuracy                         0.7647        34
   macro avg     0.3824    0.5000    0.4333        34
weighted avg     0.5848    0.7647    0.6627        

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.72
  Average training accuracy: 0.49
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.69
  Average training accuracy: 0.76
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
  Average training loss: 0.65
  Average training accuracy: 0.84
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 49
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8571    1.0000    0.9231        42
           1     0.0000    0.0000    0.0000         7

    accuracy                         0.8571        49
   macro avg     0.4286    0.5000    0.4615        49
weighted avg     0.7

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.72
  Average training accuracy: 0.72
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.48
  Validation took: 0:00:01
  Average training loss: 0.66
  Average training accuracy: 0.63
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:01
  Average training loss: 0.63
  Average training accuracy: 0.74
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.81
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 1 0 0 0 0 0 1 1 0 0 0 0 0 1 0 1 0 0 1
 0 0 1 1 0 0 1 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8649    0.7442    0.8000        43
           1     0.0000    0.0000    0.0000         5

    accuracy                         0.6667        48
   macro avg     0.4324    0.3721    0.4000        48
weighted avg     0.774

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.72
  Average training accuracy: 0.60
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.48
  Validation took: 0:00:01
  Average training loss: 0.66
  Average training accuracy: 0.78
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.79
  Validation took: 0:00:01
  Average training loss: 0.61
  Average training accuracy: 0.85
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.85
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.7234    1.0000    0.8395        34
           1     1.0000    0.0714    0.1333        14

    accuracy                         0.7292        48
   macro avg     0.8617    0.5357    0.4864        48
weighted avg     0.804

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.70
  Average training accuracy: 0.73
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.79
  Validation took: 0:00:01
  Average training loss: 0.69
  Average training accuracy: 0.74
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:01
  Average training loss: 0.66
  Average training accuracy: 0.79
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 1 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0
 0 0 1 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.7805    0.8421    0.8101        38
           1     0.1429    0.1000    0.1176        10

    accuracy                         0.6875        48
   macro avg     0.4617    0.4711    0.4639        48
weighted avg     0.647

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.70
  Average training accuracy: 0.60
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:01
  Average training loss: 0.67
  Average training accuracy: 0.80
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:01
  Average training loss: 0.60
  Average training accuracy: 0.84
  Training epoch took: 0:00:15
Running Validation...
  Validation Accuracy: 0.88
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 48
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.7500    1.0000    0.8571        36
           1     0.0000    0.0000    0.0000        12

    accuracy                         0.7500        48
   macro avg     0.3750    0.5000    0.4286        48
weighted avg     0.562

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.76
  Average training accuracy: 0.46
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.25
  Validation took: 0:00:00
  Average training loss: 0.70
  Average training accuracy: 0.52
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:00
  Average training loss: 0.69
  Average training accuracy: 0.70
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.72
  Validation took: 0:00:00
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 31
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.9677    1.0000    0.9836        30
           1     0.0000    0.0000    0.0000         1

    accuracy                         0.9677        31
   macro avg     0.4839    0.5000    0.4918        31
weighted avg     0.9365    0.9677    0.9519        31

**

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.71
  Average training accuracy: 0.62
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:00
  Average training loss: 0.65
  Average training accuracy: 0.83
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.45
  Validation took: 0:00:00
  Average training loss: 0.65
  Average training accuracy: 0.60
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.75
  Validation took: 0:00:00
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 31
DONE
EVAL
[0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 0 0 0 1 1 0 1 0 1 0 0 0 0 0 1 0]
              precision    recall  f1-score   support

           0     0.8571    0.6923    0.7660        26
           1     0.2000    0.4000    0.2667         5

    accuracy                         0.6452        31
   macro avg     0.5286    0.5462    0.5163        31
weighted avg     0.7512    0.6452    0.6854        31

**

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.70
  Average training accuracy: 0.69
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.26
  Validation took: 0:00:00
  Average training loss: 0.70
  Average training accuracy: 0.63
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.77
  Validation took: 0:00:00
  Average training loss: 0.67
  Average training accuracy: 0.74
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.71
  Validation took: 0:00:00
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 31
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.8966    0.9286    0.9123        28
           1     0.0000    0.0000    0.0000         3

    accuracy                         0.8387        31
   macro avg     0.4483    0.4643    0.4561        31
weighted avg     0.8098    0.8387    0.8240        31

**

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.69
  Average training accuracy: 0.79
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:00
  Average training loss: 0.70
  Average training accuracy: 0.63
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.22
  Validation took: 0:00:00
  Average training loss: 0.69
  Average training accuracy: 0.62
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:01
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 31
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]
              precision    recall  f1-score   support

           0     0.7419    1.0000    0.8519        23
           1     0.0000    0.0000    0.0000         8

    accuracy                         0.7419        31
   macro avg     0.3710    0.5000    0.4259        31
weighted avg     0.5505    0.7419    0.6320        31

**

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Average training loss: 0.72
  Average training accuracy: 0.77
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.97
  Validation took: 0:00:00
  Average training loss: 0.68
  Average training accuracy: 0.69
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.97
  Validation took: 0:00:00
  Average training loss: 0.69
  Average training accuracy: 0.50
  Training epoch took: 0:00:10
Running Validation...
  Validation Accuracy: 0.58
  Validation took: 0:00:00
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 31
DONE
EVAL
[0 0 0 0 1 0 0 0 0 1 1 0 0 0 0 0 0 0 1 1 1 1 1 0 1 0 0 0 0 1 0]
              precision    recall  f1-score   support

           0     0.8095    0.6296    0.7083        27
           1     0.0000    0.0000    0.0000         4

    accuracy                         0.5484        31
   macro avg     0.4048    0.3148    0.3542        31
weighted avg     0.7051    0.5484    0.6169        31



In [12]:
import random
from sklearn.utils.class_weight import compute_class_weight

def compute_class_weights(Y_data):
    class_weight = compute_class_weight(class_weight='balanced', classes=np.unique(Y_data), y=Y_data)
    return class_weight

X_all = []
Y_all = []

for tribunal_id in [1, 2, 3]:
    X, Y = get_samples_by_id(tribunal_id)
    idc_0 = np.where(Y==0)[0]
    idc_1 = np.asarray(random.sample(list(np.where(Y==1)[0]), int(np.where(Y==0)[0].shape[0]*0.2)))
    
    X_all.append(np.concatenate((X[idc_0], X[idc_1])))
    Y_all.append(np.concatenate((Y[idc_0], Y[idc_1])))
    
X = np.concatenate(X_all)
Y = np.concatenate(Y_all)

np.random.shuffle(X)
np.random.shuffle(Y)

cv_splits = split_data(X, Y)
for idx, split in enumerate(cv_splits):
    print(60*'*')
    print("Using Split", idx)
    print(60*'*')
    with open('logging.txt', 'a') as log_file:
        log_file.write('\n' + str(tribunal_id) + ',' + str(tribunal_id) + ',' + str(tribunal_id) + ',' + str(idx) + ',')
    train_model(split, class_weights=torch.tensor(compute_class_weights(split[3]), dtype=torch.float32))

X_train: (907,) X_eval: (113,) X_test: (114,) y_train: (907,) y_eval: (113,) y_test: (114,)
X_train: (907,) X_eval: (113,) X_test: (114,) y_train: (907,) y_eval: (113,) y_test: (114,)
X_train: (907,) X_eval: (113,) X_test: (114,) y_train: (907,) y_eval: (113,) y_test: (114,)
X_train: (907,) X_eval: (113,) X_test: (114,) y_train: (907,) y_eval: (113,) y_test: (114,)
X_train: (908,) X_eval: (113,) X_test: (113,) y_train: (908,) y_eval: (113,) y_test: (113,)
************************************************************
Using Split 0
************************************************************
Tokenizing paragraphs...
Number of samples: 907
Tokenizing paragraphs...
Number of samples: 113


Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.70
  Average training accuracy: 0.67
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.68
  Average training accuracy: 0.71
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.63
  Average training accuracy: 0.72
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:02
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 114
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0
 0 0 0]
              prec

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.68
  Average training accuracy: 0.74
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.87
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.69
  Average training accuracy: 0.72
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.67
  Average training accuracy: 0.75
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 114
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1
 0 0 1]
              prec

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.72
  Average training accuracy: 0.64
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.84
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.69
  Average training accuracy: 0.61
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.83
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.62
  Average training accuracy: 0.75
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.68
  Validation took: 0:00:02
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 114
DONE
EVAL
[0 1 0 0 0 1 0 0 0 1 0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 0
 0 0 1 0 0 1 0 0 1 1 1 0 0 1 0 0 0 0 0 1 0 0 0 0 1 1 1 0 0 0 0 0 0 0 0 1 0
 1 0 0 0 0 1 1 0 0 1 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 1 1 0 0 1 1 0 0
 1 0 0]
              prec

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.69
  Average training accuracy: 0.74
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.87
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.67
  Average training accuracy: 0.78
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.81
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.62
  Average training accuracy: 0.80
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 114
DONE
EVAL
[0 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 0 0
 0 0 0 0 0 1 0 0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0]
              prec

Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.seq_relationship.bias', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.71
  Average training accuracy: 0.59
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.69
  Average training accuracy: 0.76
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.50
  Validation took: 0:00:02
  Batch   100  of    114.    Elapsed: 0:00:32.
  Average training loss: 0.68
  Average training accuracy: 0.78
  Training epoch took: 0:00:36
Running Validation...
  Validation Accuracy: 0.82
  Validation took: 0:00:02
Training complete!
Start Testing:
Tokenizing paragraphs...
Number of samples: 113
DONE
EVAL
[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0]
              precis