In [1]:
!pip install transformers
!pip install datasets
!pip install evaluate

[0m

## Examping Using Hugging face and train.py

In [2]:
import pandas as pd
import torch

from train import get_dataloaders, train_model, calculate_f1

In [3]:
# Hyperparams
NUM_EPOCHS = 1
BATCH_SIZE = 16
LEARNING_RATE = 5e-5
USE_LR_SCHEDULER = True
PRETRAINED_MODEL_NAME = "roberta-base"

# Data augmentation params
DOWNSAMPLE_LABEL_0 = True
DOWNSAMPLE_FRAC = 0.2  # 0.2 means 20% of the data
UPSAMPLE_LABEL_1 = False  # Buggy: train_dataloader has an extra field?
UPSAMPLE_TIMES = 2  # 2 means 2x the data

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

args = {
    "NUM_EPOCHS": NUM_EPOCHS,
    "BATCH_SIZE": BATCH_SIZE,
    "LEARNING_RATE": LEARNING_RATE,
    "USE_LR_SCHEDULER": USE_LR_SCHEDULER,
    "PRETRAINED_MODEL_NAME": PRETRAINED_MODEL_NAME,
}

cuda


In [4]:
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')
print("Original training data numbers:")
print(train_data.label.value_counts())
# downsampling the data whose label is 0
if DOWNSAMPLE_LABEL_0:
    train_0 = train_data[train_data["label"] == 0].sample(frac=DOWNSAMPLE_FRAC, random_state=42)
    train_1 = train_data[train_data["label"] == 1]
    train_data = pd.concat([train_0, train_1], axis=0).reset_index(drop=True)
    print("After downsampling:")
    print(train_data.label.value_counts())

if UPSAMPLE_LABEL_1:
    train_0 = train_data[train_data['label'] == 0]
    # 1 label
    train_1 = train_data[train_data['label'] == 1]

    train_data = pd.concat([train_0, train_1], axis=0)
    for _ in range(UPSAMPLE_TIMES - 1):
        train_data = pd.concat([train_data, train_1], axis=0)
    print("After upsampling:")
    print(train_data.label.value_counts())

train_dataloader, val_dataloader = get_dataloaders(args, train_data, val_data)

Original training data numbers:
0    6831
1     706
Name: label, dtype: int64
After downsampling:
0    1366
1     706
Name: label, dtype: int64




  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [21]:
# Train model
model_name = "model"
model = train_model(args, device, train_dataloader, model_name=model_name)

Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['lm_head.bias', 'roberta.pooler.dense.bias', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.weight', 'classifie

Epoch: 0


train loss: 0.5824371576309204: 100%|██████████| 130/130 [03:10<00:00,  1.46s/it] 


In [7]:
# Get f1 score
model_name = "model"
f1 = calculate_f1(model_name, device, val_dataloader)
print("F1 score: {}".format(f1))

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/6.77k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.55k [00:00<?, ?B/s]

Downloading builder script:   0%|          | 0.00/7.36k [00:00<?, ?B/s]

F1 score: 0.5069444444444444


# Hyperparameter tuning

In [2]:
import pandas as pd
import torch

from train import get_dataloaders, train_model, calculate_f1

In [4]:
# Hyperparams
BATCH_SIZE = 16
PRETRAINED_MODEL_NAME = "roberta-base"
LEARNING_RATE_LIST = [8e-5, 5e-5, 2e-5]
USE_LR_SCHEDULER_LIST = [True, False]
NUM_EPOCHS_LIST = [1, 2]

# Data augmentation params
DOWNSAMPLE_LABEL_0 = True
DOWNSAMPLE_FRAC = 0.2  # 0.2 means 20% of the data

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print(device)

args = {
    # "NUM_EPOCHS": NUM_EPOCHS,
    "BATCH_SIZE": BATCH_SIZE,
    # "LEARNING_RATE": LEARNING_RATE,
    # "USE_LR_SCHEDULER": USE_LR_SCHEDULER,
    "PRETRAINED_MODEL_NAME": PRETRAINED_MODEL_NAME,
}
train_data = pd.read_csv('train_data.csv')
val_data = pd.read_csv('val_data.csv')
print("Original training data numbers:")
print(train_data.label.value_counts())
# downsampling the data whose label is 0
if DOWNSAMPLE_LABEL_0:
    train_0 = train_data[train_data["label"] == 0].sample(frac=DOWNSAMPLE_FRAC, random_state=42)
    train_1 = train_data[train_data["label"] == 1]
    train_data = pd.concat([train_0, train_1], axis=0).reset_index(drop=True)
    print("After downsampling:")
    print(train_data.label.value_counts())

train_dataloader, val_dataloader = get_dataloaders(args, train_data, val_data)

cuda
Original training data numbers:
0    6831
1     706
Name: label, dtype: int64
After downsampling:
0    1366
1     706
Name: label, dtype: int64




  0%|          | 0/3 [00:00<?, ?ba/s]

  0%|          | 0/1 [00:00<?, ?ba/s]

In [9]:
def hyperparams_search(train_dataloader, val_dataloader, args):
    best_f1 = 0
    best_params = {}
    for num_epochs in NUM_EPOCHS_LIST:
        for learning_rate in LEARNING_RATE_LIST:
            for use_lr_scheduler in USE_LR_SCHEDULER_LIST:
                args["NUM_EPOCHS"] = num_epochs
                args["LEARNING_RATE"] = learning_rate
                args["USE_LR_SCHEDULER"] = use_lr_scheduler
                print("Now params: {}".format(args))
                model = train_model(args, device, train_dataloader, model_name="model")
                f1 = calculate_f1("model", device, val_dataloader)
                print("Training with params: {}".format(args))
                print("F1 score: {}".format(f1))
                if f1 > best_f1:
                    best_f1 = f1
                    best_params = args
    print("Best params: {}, best f1: {}".format(best_params, best_f1))
    return best_params

In [10]:
hyperparams_search(train_dataloader, val_dataloader, args)

Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.5420885682106018: 100%|██████████| 130/130 [03:14<00:00,  1.50s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.4340175953079179
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.6324615478515625: 100%|██████████| 130/130 [03:14<00:00,  1.49s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.29616724738675965
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.5422806739807129: 100%|██████████| 130/130 [03:13<00:00,  1.49s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.5251798561151079
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.47858136892318726: 100%|██████████| 130/130 [03:10<00:00,  1.46s/it]


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.4294117647058823
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.3662198483943939: 100%|██████████| 130/130 [03:11<00:00,  1.47s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.4982935153583618
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.28206443786621094: 100%|██████████| 130/130 [03:12<00:00,  1.48s/it]


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 1, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.4359673024523161
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.6056450605392456: 100%|██████████| 130/130 [03:12<00:00,  1.48s/it]


Epoch: 1


train loss: 0.6555594205856323: 100%|██████████| 130/130 [03:14<00:00,  1.50s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.4368600682593856
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.5805913209915161: 100%|██████████| 130/130 [03:13<00:00,  1.49s/it] 


Epoch: 1


train loss: 0.9283526539802551: 100%|██████████| 130/130 [03:13<00:00,  1.49s/it] 
  _warn_prf(average, modifier, msg_start, len(result))


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 8e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.0
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.337646484375: 100%|██████████| 130/130 [03:05<00:00,  1.43s/it]     


Epoch: 1


train loss: 0.9693192839622498: 100%|██████████| 130/130 [03:05<00:00,  1.42s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.5182481751824817
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.5519188046455383: 100%|██████████| 130/130 [03:06<00:00,  1.43s/it]


Epoch: 1


train loss: 0.6759375929832458: 100%|██████████| 130/130 [03:02<00:00,  1.40s/it] 
  _warn_prf(average, modifier, msg_start, len(result))


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 5e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.0
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': True}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.14034529030323029: 100%|██████████| 130/130 [03:17<00:00,  1.52s/it]


Epoch: 1


train loss: 0.2525808811187744: 100%|██████████| 130/130 [03:20<00:00,  1.54s/it] 


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': True}
F1 score: 0.5016722408026756
Now params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': False}


Some weights of the model checkpoint at roberta-base were not used when initializing RobertaForSequenceClassification: ['roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'lm_head.decoder.weight', 'lm_head.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.bias']
- This IS expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.out_proj.weight', 'classi

Epoch: 0


train loss: 0.3235809803009033: 100%|██████████| 130/130 [03:17<00:00,  1.52s/it] 


Epoch: 1


train loss: 0.47081196308135986: 100%|██████████| 130/130 [03:14<00:00,  1.49s/it]


Training with params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': False}
F1 score: 0.4297082228116711
Best params: {'BATCH_SIZE': 16, 'PRETRAINED_MODEL_NAME': 'roberta-base', 'NUM_EPOCHS': 2, 'LEARNING_RATE': 2e-05, 'USE_LR_SCHEDULER': False}, best f1: 0.5251798561151079


{'BATCH_SIZE': 16,
 'PRETRAINED_MODEL_NAME': 'roberta-base',
 'NUM_EPOCHS': 2,
 'LEARNING_RATE': 2e-05,
 'USE_LR_SCHEDULER': False}