In [3]:
import torch
from transformers import CamembertTokenizer, CamembertForSequenceClassification
from utils import *

device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
df_articles = load_newspaper()

In [None]:
train_dataset, test_dataset = extract_train_test_dataset(df_articles)

In [5]:
# Set gloabal parameters and tokenizer
MAX_LEN = 64
batch_size = 32
tokenizer = CamembertTokenizer.from_pretrained('camembert-base',do_lower_case=True)

In [6]:
test_dataloader = dataset_to_dataloader(test_dataset, tokenizer, level='title')
train_dataloader = dataset_to_dataloader(train_dataset, tokenizer, level='title')

In [4]:
# Load pretained model
model = CamembertForSequenceClassification.from_pretrained("camembert-base", num_labels=7).to(device)

Some weights of the model checkpoint at camembert-base were not used when initializing CamembertForSequenceClassification: ['lm_head.layer_norm.bias', 'lm_head.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.bias', 'roberta.pooler.dense.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.decoder.weight']
- This IS expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing CamembertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of CamembertForSequenceClassification were not initialized from the model checkpoint at camembert-base and are newly initialized: ['classifier.out_proj.bias

In [None]:
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

In [12]:
# extract parameters to optimize
param_optimizer = list(model.named_parameters())
no_decay = ['bias', 'gamma', 'beta']
optimizer_grouped_parameters = [
    {'params': [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.01},
    {'params': [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
     'weight_decay_rate': 0.0}
]

opt = torch.optim.Adam(optimizer_grouped_parameters, lr=2e-5, eps=10e-8)

In [13]:
model = train(model, train_dataloader, test_dataloader, opt, epochs=2, level='title')

| epoch   0 |    50/ 1469 steps | loss 1.81394
| epoch   0 |   100/ 1469 steps | loss 1.25588
| epoch   0 |   150/ 1469 steps | loss 0.89543
| epoch   0 |   200/ 1469 steps | loss 0.78953
| epoch   0 |   250/ 1469 steps | loss 0.67403
| epoch   0 |   300/ 1469 steps | loss 0.62081
| epoch   0 |   350/ 1469 steps | loss 0.58372
| epoch   0 |   400/ 1469 steps | loss 0.56182
| epoch   0 |   450/ 1469 steps | loss 0.47667
| epoch   0 |   500/ 1469 steps | loss 0.50559
| epoch   0 |   550/ 1469 steps | loss 0.49567
| epoch   0 |   600/ 1469 steps | loss 0.44547
| epoch   0 |   650/ 1469 steps | loss 0.45166
| epoch   0 |   700/ 1469 steps | loss 0.46312
| epoch   0 |   750/ 1469 steps | loss 0.49224
| epoch   0 |   800/ 1469 steps | loss 0.45498
| epoch   0 |   850/ 1469 steps | loss 0.46739
| epoch   0 |   900/ 1469 steps | loss 0.42009
| epoch   0 |   950/ 1469 steps | loss 0.47173
| epoch   0 |  1000/ 1469 steps | loss 0.44227
| epoch   0 |  1050/ 1469 steps | loss 0.45995
| epoch   0 |

100%|██████████| 625/625 [02:52<00:00,  3.63it/s]


Test accuracy : 0.671
| epoch   1 |    50/ 1469 steps | loss 0.33204
| epoch   1 |   100/ 1469 steps | loss 0.24871
| epoch   1 |   150/ 1469 steps | loss 0.26962
| epoch   1 |   200/ 1469 steps | loss 0.29491
| epoch   1 |   250/ 1469 steps | loss 0.25126
| epoch   1 |   300/ 1469 steps | loss 0.28984
| epoch   1 |   350/ 1469 steps | loss 0.29596
| epoch   1 |   400/ 1469 steps | loss 0.27086
| epoch   1 |   450/ 1469 steps | loss 0.27196
| epoch   1 |   500/ 1469 steps | loss 0.27721
| epoch   1 |   550/ 1469 steps | loss 0.26761
| epoch   1 |   600/ 1469 steps | loss 0.25930
| epoch   1 |   650/ 1469 steps | loss 0.26737
| epoch   1 |   700/ 1469 steps | loss 0.29556
| epoch   1 |   750/ 1469 steps | loss 0.29903
| epoch   1 |   800/ 1469 steps | loss 0.27771
| epoch   1 |   850/ 1469 steps | loss 0.29790
| epoch   1 |   900/ 1469 steps | loss 0.27438
| epoch   1 |   950/ 1469 steps | loss 0.25406
| epoch   1 |  1000/ 1469 steps | loss 0.22844
| epoch   1 |  1050/ 1469 steps | loss

100%|██████████| 625/625 [02:49<00:00,  3.69it/s]


Test accuracy : 0.704


In [14]:
torch.save(model.state_dict(), '../models/camembert_title.pt')