In [1]:
%env WANDB_DISABLED=1

env: WANDB_DISABLED=1


In [2]:
import transformers
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from datasets import load_dataset, load_metric

def finetune_task(glue_task, model_name_or_path, tokenizer_name_or_path, output_folder, batch_size = 16):
    """
    Inputs:
    |  glue_task = A single GLUE task to finetune towards
    |  model_name_or_path = model name (hugging face) or path (local)
    |  tokenizer_name_or_path = model name (hugging face) or path (local)
    |  sub_folder = path to save in lucky_workspace [NEED REVAMP]
    |  batch_size = batch size during fine-tuning. Default to 16.
    Outputs:
    |  Move all relevant data to working directory (Currently only moves to lucky_workspace) [NEED REVAMP]
    Extra:
    |  glue_task = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
    """
    
    # Helper Functions
    def preprocess_function(examples):
        sentence1_key, sentence2_key = task_to_keys[task]
        if sentence2_key is None:
            return tokenizer(examples[sentence1_key], truncation=True)
        return tokenizer(examples[sentence1_key], examples[sentence2_key], truncation=True)
    
    def compute_metrics(eval_pred):
        predictions, labels = eval_pred
        if task != "stsb":
            predictions = np.argmax(predictions, axis=1)
        else:
            predictions = predictions[:, 0]
        return metric.compute(predictions=predictions, references=labels)
    
    print("Getting Task...")
    task = glue_task
    model_checkpoint = model_name_or_path #"/mnt/data1/lucky_workspace/bert-base-uncased-cl/checkpoint-53000"
    tokenizer_checkpoint = tokenizer_name_or_path #"bert-base-uncased"
    batch_size = batch_size
    
    
    print("Getting Metric...")
    actual_task = "mnli" if task == "mnli-mm" else task
    dataset = load_dataset("glue", actual_task)
    metric = load_metric('glue', actual_task)
    
    print("Getting Tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(tokenizer_checkpoint, use_fast=True)
    
    task_to_keys = {
        "cola": ("sentence", None),
        "mnli": ("premise", "hypothesis"),
        "mnli-mm": ("premise", "hypothesis"),
        "mrpc": ("sentence1", "sentence2"),
        "qnli": ("question", "sentence"),
        "qqp": ("question1", "question2"),
        "rte": ("sentence1", "sentence2"),
        "sst2": ("sentence", None),
        "stsb": ("sentence1", "sentence2"),
        "wnli": ("sentence1", "sentence2"),
    }
    
    print("Encoding...")
    encoded_dataset = dataset.map(preprocess_function, batched=True)
    
    print("Setting Labels...")
    num_labels = 3 if task.startswith("mnli") else 1 if task=="stsb" else 2
    model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=num_labels)
    
    metric_name = "pearson" if task == "stsb" else "matthews_correlation" if task == "cola" else "accuracy"
    model_name = model_checkpoint.split("/")[-1]

    print("Setting Training Arguments...")
    args = TrainingArguments(
        f"{output_folder}/{model_name}-finetuned-{task}",
        evaluation_strategy = "epoch",
        save_strategy = "epoch",
        learning_rate=2e-5,
        per_device_train_batch_size=batch_size,
        per_device_eval_batch_size=batch_size,
        num_train_epochs=3,
        weight_decay=0.01,
        load_best_model_at_end=True,
        metric_for_best_model=metric_name
    )
    
    validation_key = "validation_mismatched" if task == "mnli-mm" else "validation_matched" if task == "mnli" else "validation"
    trainer = Trainer(
        model,
        args,
        train_dataset=encoded_dataset["train"],
        eval_dataset=encoded_dataset[validation_key],
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )
    print("Training and Evaluation...")
    trainer.train()
    trainer.evaluate()

In [5]:
model_name_or_local_path = "model/progressive-5p-mean_double"
tokenizer_name_or_local_path = "bert-base-uncased"
output_folder = "log/progressive-5p-mean_double-finetune"

In [6]:
#  glue_task = ["cola", "mnli", "mnli-mm", "mrpc", "qnli", "qqp", "rte", "sst2", "stsb", "wnli"]
fast_glue_task = ["cola", "mrpc", "rte", "sst2", "stsb", "wnli"]

for task in fast_glue_task:
    finetune_task(task, model_name_or_local_path, tokenizer_name_or_local_path, output_folder)

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/cola/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Matthews Correlation
1,0.6025,0.664586,0.065589
2,0.4817,0.61953,0.262085
3,0.3699,0.699252,0.270909


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1043
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-cola/checkpoint-535
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-cola/checkpoint-535/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-cola/checkpoint-535/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-cola/checkpoint-535/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-col

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/mrpc/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Accuracy,F1
1,No log,0.582797,0.705882,0.815951
2,No log,0.607251,0.698529,0.813918
3,0.587500,0.620851,0.693627,0.794069


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 408
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-mrpc/checkpoint-230
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-mrpc/checkpoint-230/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-mrpc/checkpoint-230/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-mrpc/checkpoint-230/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-me

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/rte/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.725258,0.469314
2,No log,0.711122,0.548736
3,No log,0.769226,0.530686


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 277
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-rte/checkpoint-156
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-rte/checkpoint-156/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-rte/checkpoint-156/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-rte/checkpoint-156/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_d

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/sst2/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Accuracy
1,0.2583,0.40681,0.863532
2,0.1761,0.487975,0.861239
3,0.113,0.532209,0.865826


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence. If idx, sentence are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 872
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-sst2/checkpoint-4210
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-sst2/checkpoint-4210/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-sst2/checkpoint-4210/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-sst2/checkpoint-4210/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/stsb/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Pearson,Spearmanr
1,No log,2.45948,0.188232,0.177644
2,2.029400,2.350255,0.267625,0.265506
3,1.559800,2.565156,0.286918,0.290001


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 1500
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-stsb/checkpoint-360
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-stsb/checkpoint-360/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-stsb/checkpoint-360/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-stsb/checkpoint-360/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-m

Getting Task...
Getting Metric...


Reusing dataset glue (/root/.cache/huggingface/datasets/glue/wnli/1.0.0/dacbe3125aa31d7f70367a07a8a9e72a5a0bfeb5fc42e75c9db75b96da6053ad)


  0%|          | 0/3 [00:00<?, ?it/s]

Getting Tokenizer...


loading configuration file https://huggingface.co/bert-base-uncased/resolve/main/config.json from cache at /root/.cache/huggingface/transformers/3c61d016573b14f7f008c02c4e51a366c67ab274726fe2910691e2a761acf43e.37395cee442ab11005bcd270f3c34464dc1704b715b5d7d52b1a461abe3b9e4e
Model config BertConfig {
  "_name_or_path": "bert-base-uncased",
  "architectures": [
    "BertForMaskedLM"
  ],
  "attention_probs_dropout_prob": 0.1,
  "classifier_dropout": null,
  "gradient_checkpointing": false,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "layer_norm_eps": 1e-12,
  "max_position_embeddings": 512,
  "model_type": "bert",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "pad_token_id": 0,
  "position_embedding_type": "absolute",
  "transformers_version": "4.19.4",
  "type_vocab_size": 2,
  "use_cache": true,
  "vocab_size": 30522
}

loading file https://huggingface.co/bert-base-uncased/resolve/ma

Encoding...
Setting Labels...


Some weights of the model checkpoint at model/progressive-5p-mean_double were not used when initializing BertForSequenceClassification: ['cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at model/progre

Setting Training Arguments...
Training and Evaluation...


Epoch,Training Loss,Validation Loss,Accuracy
1,No log,0.697425,0.43662
2,No log,0.698674,0.492958
3,No log,0.709122,0.352113


The following columns in the evaluation set don't have a corresponding argument in `BertForSequenceClassification.forward` and have been ignored: idx, sentence1, sentence2. If idx, sentence1, sentence2 are not expected by `BertForSequenceClassification.forward`,  you can safely ignore this message.
***** Running Evaluation *****
  Num examples = 71
  Batch size = 16
Saving model checkpoint to log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-wnli/checkpoint-40
Configuration saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-wnli/checkpoint-40/config.json
Model weights saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-wnli/checkpoint-40/pytorch_model.bin
tokenizer config file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_double-finetuned-wnli/checkpoint-40/tokenizer_config.json
Special tokens file saved in log/progressive-5p-mean_double-finetune/progressive-5p-mean_do

In [7]:
# slow_glue_task = ["mnli", "mnli-mm", "qnli", "qqp"]
# for task in slow_glue_task:
#     finetune_task(task, model_name_or_local_path, tokenizer_name_or_local_path, output_folder)

In [11]:
import torch
torch.cuda.empty_cache()