In [3]:
!pip install transformers

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/27/3c/91ed8f5c4e7ef3227b4119200fc0ed4b4fd965b1f0172021c25701087825/transformers-3.0.2-py3-none-any.whl (769kB)
[K     |████████████████████████████████| 778kB 6.3MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/7d/34/09d19aff26edcc8eb2a01bed8e98f13a1537005d31e95233fd48216eed10/sacremoses-0.0.43.tar.gz (883kB)
[K     |████████████████████████████████| 890kB 18.0MB/s 
[?25hCollecting tokenizers==0.8.1.rc1
[?25l  Downloading https://files.pythonhosted.org/packages/40/d0/30d5f8d221a0ed981a186c8eb986ce1c94e3a6e87f994eae9f4aa5250217/tokenizers-0.8.1rc1-cp36-cp36m-manylinux1_x86_64.whl (3.0MB)
[K     |████████████████████████████████| 3.0MB 42.4MB/s 
Collecting sentencepiece!=0.1.92
[?25l  Downloading https://files.pythonhosted.org/packages/d4/a4/d0a884c4300004a78cca907a6ff9a5e9fe4f090f5d95ab341c53d28cbc58/sentencepiece-0.1.91-cp36-cp36m-manylinux1_x86_64.whl (1.1MB

In [4]:
import torch
import numpy as np
import pandas as pd
from tqdm import trange
from tqdm.notebook import tqdm
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler
from transformers.tokenization_bert import BertTokenizer
from transformers.optimization import AdamW, get_linear_schedule_with_warmup
from transformers.modeling_bert import BertForSequenceClassification

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
num_labels_to_classify = 2
max_seq_length = 256
batch_size = 16
MBERT_MODEL = "bert-base-multilingual-uncased"

In [6]:
device

device(type='cuda')

In [7]:
tokenizer = BertTokenizer.from_pretrained(MBERT_MODEL)

model = BertForSequenceClassification.from_pretrained(
    MBERT_MODEL, 
    num_labels = num_labels_to_classify
    ).to(device)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=871891.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=625.0, style=ProgressStyle(description_…




HBox(children=(FloatProgress(value=0.0, description='Downloading', max=672271273.0, style=ProgressStyle(descri…




Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

In [23]:
class BertInputItem(object):
  def __init__(self, text, input_ids, input_mask, segment_ids, label_ids):
    self.text = text
    self.input_ids = input_ids
    self.input_mask = input_mask
    self.segment_ids = segment_ids
    self.label_ids = label_ids

def convert_examples_to_inputs(example_premise, example_hypothesis, 
                               example_label, max_seq_length, 
                               tokenizer, verbose=0):
  input_items = []
  examples = zip(example_premise, example_hypothesis, example_label)
  for ex_idx, (text_p, text_h, label) in enumerate(tqdm(examples)):
    # create list of token ids
    #input_ids = tokenizer.encode(f'[CLS] {text_p} [SEP] {text_h} [SEP]')

    encoded_cls = tokenizer.encode('[CLS]')
    encoded_sep = tokenizer.encode('[SEP]')

    input_ids_p = encoded_cls + tokenizer.encode(f'{text_p}') + encoded_sep
    input_ids_h = tokenizer.encode(f'{text_h}') + encoded_sep

    input_ids = input_ids_p + input_ids_h
    segment_ids = [0] * len(input_ids_p) + [1] * len(input_ids_h)
    input_mask = [1] * len(input_ids)

    if len(input_ids) > max_seq_length:
      input_ids = input_ids[:max_seq_length]
      segment_ids = segment_ids[:max_seq_length]
      input_mask = input_mask[:max_seq_length]


    #segment_ids = [0] * len(input_ids)

    #input_mask = [1] * len(input_ids)

    padding = [0] * (max_seq_length - len(input_ids))
    input_ids += padding
    input_mask += padding
    segment_ids += padding

    assert len(input_ids) == max_seq_length
    assert len(input_mask) == max_seq_length
    assert len(segment_ids) == max_seq_length

    text = text_p + ' [SEP] ' + text_h

    input_items.append(
        BertInputItem(text=text,
                      input_ids=input_ids,
                      input_mask=input_mask,
                      segment_ids=segment_ids,
                      label_ids=label
                      )
    )
  return input_items

In [9]:
def get_data_loader(features, max_seq_length, batch_size=batch_size, shuffle=True):
  all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long)
  all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long)
  all_segment_ids = torch.tensor([f.segment_ids for f in features], dtype=torch.long)
  all_label_ids = torch.tensor([f.label_ids for f in features], dtype=torch.long)
  data = TensorDataset(all_input_ids, all_input_mask, all_segment_ids, all_label_ids)

  data_loader = DataLoader(data, shuffle=shuffle, batch_size=batch_size)
  return data_loader

In [10]:
from google.colab import drive 
drive.mount('/content/gdrive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/gdrive


In [11]:
train_path = 'gdrive/My Drive/data/csv/train_boolQ.csv'
val_path = 'gdrive/My Drive/data/csv/New_ru_bool/train.csv'
test_path = 'gdrive/My Drive/data/csv/New_ru_bool/val.csv'

train = pd.read_csv(train_path)
val = pd.read_csv(val_path, )
test = pd.read_csv(test_path)

In [12]:
train.head()

Unnamed: 0.1,Unnamed: 0,answer,passage,question
0,0,1.0,"Persian (/ˈpɜːrʒən, -ʃən/), also known by its ...",do iran and afghanistan speak the same language
1,1,1.0,Good Samaritan laws offer legal protection to ...,do good samaritan laws protect those who help ...
2,2,1.0,Windows Movie Maker (formerly known as Windows...,is windows movie maker part of windows essentials
3,3,1.0,"Powdered sugar, also called confectioners' sug...",is confectionary sugar the same as powdered sugar
4,4,0.0,As with other games in The Elder Scrolls serie...,is elder scrolls online the same as skyrim


In [13]:
val.head()

Unnamed: 0,passage,question,answer,idx
0,Успешное выполнение программы полёта «Аполлона...,была ли высадка на луну,True,0
1,Ива́н Алекса́ндрович Хлестако́в — главный геро...,был ли хлестаков ревизором,False,1
2,Украинское законодательство допускает владение...,разрешено ли автоматическое оружие в украине,True,2
3,(1) Мальчишка разбил стекло. – (2)Кто это сдел...,"Всегда ли мальчишка, разбивший стекло, труслив?",False,3
4,«День Конституции» — празднование принятия Кон...,был ли 12 декабря выходным днем,True,4


In [14]:
test_mapping = {False: 0, True: 1}
test['answer'].replace(test_mapping, inplace=True)
val['answer'].replace(test_mapping, inplace=True)

In [15]:
train.shape, val.shape, test.shape

((9427, 4), (392, 4), (295, 4))

In [16]:
train.head(3)

Unnamed: 0.1,Unnamed: 0,answer,passage,question
0,0,1.0,"Persian (/ˈpɜːrʒən, -ʃən/), also known by its ...",do iran and afghanistan speak the same language
1,1,1.0,Good Samaritan laws offer legal protection to ...,do good samaritan laws protect those who help ...
2,2,1.0,Windows Movie Maker (formerly known as Windows...,is windows movie maker part of windows essentials


In [17]:
train.tail(3)

Unnamed: 0.1,Unnamed: 0,answer,passage,question
9424,9424,1.0,The Warriors went into the 2018 playoffs as th...,is the golden state warriors in the playoffs
9425,9425,0.0,Downton Abbey is a British period drama televi...,downton abbey will there be a season 7
9426,9426,0.0,The margin of error is usually defined as the ...,is margin of error the same as confidence inte...


In [18]:
test.head(3)

Unnamed: 0,passage,question,answer,idx
0,В его основе — всё те же легенды о святом Нико...,есть ли дед мороз,0,0
1,"Троекуров подкупает губернский суд и, пользуяс...",был ли дубровский разбойником,1,1
2,"Репарация — особая функция клеток, заключающа...",был исследован участок цепи молекулы днк,1,2


In [19]:
n_samples_train = train.shape[0]

In [24]:
train_features = convert_examples_to_inputs(train['passage'].values, 
                                            train['question'].values,
                                            train['answer'].values,
                                            max_seq_length,
                                            tokenizer)

val_features = convert_examples_to_inputs(val['passage'].values, 
                                          val['question'].values,
                                          val['answer'].values,
                                          max_seq_length,
                                          tokenizer)

test_features = convert_examples_to_inputs(test['passage'].values, 
                                          test['question'].values,
                                          test['answer'].values,
                                          max_seq_length,
                                          tokenizer)


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Token indices sequence length is longer than the specified maximum sequence length for this model (926 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (552 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (513 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (600 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (636 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for thi




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Token indices sequence length is longer than the specified maximum sequence length for this model (539 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (537 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (773 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (847 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (664 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for thi




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Token indices sequence length is longer than the specified maximum sequence length for this model (701 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (623 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (617 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (592 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for this model (629 > 512). Running this sequence through the model will result in indexing errors
Token indices sequence length is longer than the specified maximum sequence length for thi




In [25]:
train_dataloader = get_data_loader(train_features, max_seq_length, shuffle=True)
val_dataloader = get_data_loader(val_features, max_seq_length, shuffle=True)
test_dataloader = get_data_loader(test_features, max_seq_length, shuffle=True)

In [26]:
def evaluate(model, dataloader, device='cpu'):
  model.eval()

  eval_loss = 0
  number_eval_steps = 0
  pred_labels, true_labels = [], []

  model.to(device)
  for step, batch in enumerate(tqdm(dataloader, desc='Eval')):
    batch = tuple(t.to(device) for t in batch)
    input_ids, input_mask, segment_ids, label_ids = batch

    with torch.no_grad():
      tmp_eval_loss, logits = model(input_ids, attention_mask=input_mask,
                                token_type_ids=segment_ids, labels=label_ids)
    
    outputs = np.argmax(logits.to('cpu'), axis=1)
    label_ids = label_ids.to('cpu').numpy()

    pred_labels += list(outputs)
    true_labels += list(label_ids)

    eval_loss += tmp_eval_loss.mean().item()
    number_eval_steps += 1

  eval_loss /= number_eval_steps

  true_labels = np.array(true_labels)
  pred_labels = np.array(pred_labels)

  return eval_loss, true_labels, pred_labels

In [27]:
def metrics(y_true, y_preds):
  acc = accuracy_score(y_true, y_preds)
  precision = precision_score(y_true, y_preds)
  recall = recall_score(y_true, y_preds)
  f1 = f1_score(y_true, y_preds)
  return acc, precision, recall, f1

In [28]:
loss, true, pred = evaluate(model, test_dataloader, device)
acc, precision, recall, f1 = metrics(true, pred)
print(f'Loss: {loss};  Acc: {acc};  Precision: {precision};  Recall: {recall};  F1: {f1}' )

HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


Loss: 0.7367014539869208;  Acc: 0.2542372881355932;  Precision: 0.0;  Recall: 0.0;  F1: 0.0


  _warn_prf(average, modifier, msg_start, len(result))


In [29]:
print('model predictions: ', pred)

model predictions:  [0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]


In [30]:
def train(model, train_dataloader, val_dataloader, 
          output_model_file = '/tmp/mbert.bin', num_train_epochs=1, 
          patience=2, gradient_accumulation_steps=1, max_grad_norm=5, 
          warmup_proportion=0.1, batch_size=batch_size, learning_rate=5e-5):
  
  num_train_steps = int(n_samples_train / batch_size / gradient_accumulation_steps * num_train_epochs)
  num_warmup_steps = int(warmup_proportion * num_train_steps)
  
  param_optim = list(model.named_parameters())
  no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight']
  optim_grouped_params = [
    {'params': [p for n, p in param_optim if not any(nd in n for nd in no_decay)],
     'weight_decay': 0.01},
    {'params': [p for n, p in param_optim if any(nd in n for nd in no_decay)],
     'weight_decay': 0.0} 
  ]

  optimizer = AdamW(optim_grouped_params, lr=learning_rate, correct_bias=False)
  scheduler = get_linear_schedule_with_warmup(optimizer, 
                                              num_warmup_steps=num_warmup_steps, 
                                              num_training_steps=num_train_steps
                                              )
  
  loss_history = []
  no_improvement = 0
  for epoch in range(num_train_epochs):
    print(f'===== Epoch: {epoch} =====')

    model.train()
    train_loss = 0
    num_train_examples, num_train_steps = 0, 0
    for step, batch in enumerate(tqdm(train_dataloader, desc='Train iter')):
      batch = tuple(t.to(device) for t in batch)
      input_ids, input_mask, segment_ids, label_ids = batch

      outputs = model(input_ids, attention_mask=input_mask, 
                      token_type_ids=segment_ids, labels=label_ids)
      
      loss = outputs[0]
      if gradient_accumulation_steps > 1:
        loss /= gradient_accumulation_steps
      loss.backward()
      train_loss += loss.item() 

      if (step + 1) % gradient_accumulation_steps == 0:
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm)
        optimizer.step()
        optimizer.zero_grad()
        scheduler.step()

    val_loss, true, pred = evaluate(model, val_dataloader, device=device)
    print("loss history: ", loss_history)
    acc, precision, recall, f1 = metrics(true, pred)
    print(f'Val loss: {loss};  Acc: {acc};  Precision: {precision};  Recall: {recall};  F1: {f1}' )

    if len(loss_history) == 0 or val_loss < min(loss_history):
      no_improvement = 0
      model_to_save = model.module if hasattr(model, 'module') else model
      torch.save(model_to_save.state_dict(), output_model_file)
    else:
      no_improvement += 1
    
    if no_improvement >= patience:
      print(f'No improvement of val set for {patience} epochs')
      break
    loss_history.append(val_loss)

  return output_model_file, model

In [31]:
torch.cuda.empty_cache()
model_mbert = BertForSequenceClassification.from_pretrained(
    MBERT_MODEL, 
    num_labels=num_labels_to_classify
    )
model_mbert.to(device)
model_file_name, model_ru = train(model_mbert, 
                                  train_dataloader, 
                                  val_dataloader, 
                                  num_train_epochs=3,
                                  gradient_accumulation_steps=4,
                                  )

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

===== Epoch: 0 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=590.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=25.0, style=ProgressStyle(description_width='i…


loss history:  []
Val loss: 0.2178543210029602;  Acc: 0.7755102040816326;  Precision: 0.7755102040816326;  Recall: 1.0;  F1: 0.8735632183908045
===== Epoch: 1 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=590.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=25.0, style=ProgressStyle(description_width='i…


loss history:  [0.5645310938358307]
Val loss: 0.20920836925506592;  Acc: 0.7755102040816326;  Precision: 0.7755102040816326;  Recall: 1.0;  F1: 0.8735632183908045
===== Epoch: 2 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=590.0, style=ProgressStyle(description_w…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=25.0, style=ProgressStyle(description_width='i…


loss history:  [0.5645310938358307, 0.5638135731220245]
Val loss: 0.16090363264083862;  Acc: 0.7755102040816326;  Precision: 0.7755102040816326;  Recall: 1.0;  F1: 0.8735632183908045


In [32]:
loss, true, pred = evaluate(model_mbert, test_dataloader, device)
acc, precision, recall, f1 = metrics(true, pred)
print(f'Loss: {loss};  Acc: {acc};  Precision: {precision};  Recall: {recall};  F1: {f1}' )

HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


Loss: 0.5995854766745317;  Acc: 0.7457627118644068;  Precision: 0.7457627118644068;  Recall: 1.0;  F1: 0.8543689320388349


In [33]:
print('model predictions: ', pred)

model predictions:  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]


In [34]:
torch.cuda.empty_cache()
model_mbert = BertForSequenceClassification.from_pretrained(
    MBERT_MODEL, 
    num_labels=num_labels_to_classify
    )
model_mbert.to(device)
model_file_name, model_ru = train(model_mbert, 
                                  val_dataloader, 
                                  test_dataloader, 
                                  num_train_epochs=3,
                                  gradient_accumulation_steps=4,
                                  )

Some weights of the model checkpoint at bert-base-multilingual-uncased were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight', 'cls.seq_relationship.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model 

===== Epoch: 0 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=25.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


loss history:  []
Val loss: 0.16020141541957855;  Acc: 0.7423728813559322;  Precision: 0.7448979591836735;  Recall: 0.9954545454545455;  F1: 0.8521400778210118
===== Epoch: 1 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=25.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


loss history:  [0.5944485883963736]
Val loss: 0.09261837601661682;  Acc: 0.7457627118644068;  Precision: 0.7457627118644068;  Recall: 1.0;  F1: 0.8543689320388349
===== Epoch: 2 =====


HBox(children=(FloatProgress(value=0.0, description='Train iter', max=25.0, style=ProgressStyle(description_wi…




HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


loss history:  [0.5944485883963736, 0.5755816243196789]
Val loss: 0.09393356740474701;  Acc: 0.7389830508474576;  Precision: 0.7457044673539519;  Recall: 0.9863636363636363;  F1: 0.8493150684931507


In [35]:
loss, true, pred = evaluate(model_mbert, test_dataloader, device)
acc, precision, recall, f1 = metrics(true, pred)
print(f'Loss: {loss};  Acc: {acc};  Precision: {precision};  Recall: {recall};  F1: {f1}' )

HBox(children=(FloatProgress(value=0.0, description='Eval', max=19.0, style=ProgressStyle(description_width='i…


Loss: 0.5409780411343825;  Acc: 0.7389830508474576;  Precision: 0.7457044673539519;  Recall: 0.9863636363636363;  F1: 0.8493150684931507


In [40]:
print('model predictions: ', pred)

model predictions:  [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1 1 1 1]
