In [1]:
import datasets
from datasets import load_dataset, load_metric

from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig, DataCollator, TrainingArguments, Trainer
from dataclasses import dataclass, field

import torch

import random
import pandas as pd
import numpy as np

In [2]:
dataset = load_dataset('amazon_reviews_multi')

No config specified, defaulting to: amazon_reviews_multi/all_languages
Reusing dataset amazon_reviews_multi (C:\Users\chris\.cache\huggingface\datasets\amazon_reviews_multi\all_languages\1.0.0\724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609)


In [3]:
dataset["train"][0:5]

{'review_id': ['de_0203609',
  'de_0559494',
  'de_0238777',
  'de_0477884',
  'de_0270868'],
 'product_id': ['product_de_0865382',
  'product_de_0678997',
  'product_de_0372235',
  'product_de_0719501',
  'product_de_0022613'],
 'reviewer_id': ['reviewer_de_0267719',
  'reviewer_de_0783625',
  'reviewer_de_0911426',
  'reviewer_de_0836478',
  'reviewer_de_0736276'],
 'stars': [1, 1, 1, 1, 1],
 'review_body': ['Armband ist leider nach 1 Jahr kaputt gegangen',
  'In der Lieferung war nur Ein Akku!',
  'Ein Stern, weil gar keine geht nicht. Es handelt sich um gebraucht Waren, die Stein haben so ein Belag drauf, wo man sich dabei denken kann, dass jemand schon die benutzt und nicht Mal richtig gewaschen. Bei ein paar ist die Qualität Mangelhaft, siehe Bild. Ein habe ich ausprobiert, richtig gewaschen, dann verfärbt sich..... Wärme halt nicht lange. Deswegen wird es zurückgeschickt.',
  'Dachte, das wären einfach etwas festere Binden, vielleicht größere Always. Aber die Verpackung ist dera

In [4]:
metric = load_metric('accuracy')
f1_metric = load_metric('f1')

In [15]:
do_shard=True
if do_shard:
    dataset = dataset.shuffle(seed=8855)
    train_dataset=dataset["train"].shard(index=1, num_shards=10)
    val_dataset=dataset["validation"].shard(index=1, num_shards=5)
else:
    train_dataset=dataset["train"]
    val_dataset=dataset["validation"]

Loading cached shuffled indices for dataset at C:\Users\chris\.cache\huggingface\datasets\amazon_reviews_multi\all_languages\1.0.0\724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609\cache-ee52c5ebc979ea41.arrow
Loading cached shuffled indices for dataset at C:\Users\chris\.cache\huggingface\datasets\amazon_reviews_multi\all_languages\1.0.0\724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609\cache-88d6095512481bd8.arrow
Loading cached shuffled indices for dataset at C:\Users\chris\.cache\huggingface\datasets\amazon_reviews_multi\all_languages\1.0.0\724e94f4b0c6c405ce7e476a6c5ef4f87db30799ad49f765094cf9770e0f7609\cache-c59e3d568381596d.arrow


In [16]:
model_checkpoint='./model/checkpoint-90000'
tokenizer=AutoTokenizer.from_pretrained(model_checkpoint, use_fast=True)

In [17]:
pad_to_max=False
def tokenize_data(example):
    text_ = example["review_body"] + " " + example["review_title"] + " " + example["product_category"]
    encodings = tokenizer(text_, pad_to_max_length=pad_to_max,
                                truncation=True,
                                add_special_tokens=True,
                                return_token_type_ids=False,
                                return_attention_mask=True,
                                return_overflowing_tokens=False,
                                return_special_tokens_mask=False,
                                )
    encodings["labels"] = example["stars"] - 1
    return encodings

In [18]:
encoded_train_dataset = train_dataset.map(tokenize_data)
encoded_val_dataset = val_dataset.map(tokenize_data)

100%|██████████| 120000/120000 [00:42<00:00, 2837.54ex/s]
100%|██████████| 6000/6000 [00:02<00:00, 2756.09ex/s]


In [21]:
def pad_seq(seq, max_batch_len, pad_value):
    return seq + (max_batch_len - len(seq)) * [pad_value]

In [22]:
@dataclass
class SmartCollator():
    pad_token_id: int

    def __call__(self, batch):
        batch_inputs = list()
        batch_attention_mask = list()
        labels = list()
        max_size = max(len(ex['input_ids']) for ex in batch)
        for item in batch:
            batch_inputs += [pad_seq(item['input_ids'], max_size, self.pad_token_id)]
            batch_attention_mask += [pad_seq(item['attention_mask'], max_size, 0)]
            labels.append(item['labels'])
        
        return {"input_ids": torch.tensor(batch_inputs, dtype=torch.long),
                "attention_mask": torch.tensor(batch_attention_mask, dtype=torch.long),
                "labels": torch.tensor(labels, dtype=torch.long)
                }

In [5]:
BATCH_SIZE = 4
NUM_LABELS = 5

resume_training = True
if resume_training:
    model_checkpoint = './model/checkpoint-90000'
else:
    model_checkpoint = 'xlm-roberta-base'
model = AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=NUM_LABELS)

In [7]:
METRIC_NAME = 'accuracy'

args = TrainingArguments(
    output_dir='./model',
    seed=8855,
    evaluation_strategy='steps',
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=3,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME,
    eval_steps=5000,
    save_steps=5000,
    fp16=True,
)

In [8]:
def compute_metrics(eval_pred):
    predictions, labels = eval_pred
    predictions = np.argmax(predictions, axis=1)

    return metric.compute(predictions=predictions, references=labels)

In [14]:
validation_key = "validation"
trainer = Trainer(
    model,
    args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_val_dataset,
    data_collator=SmartCollator(pad_token_id=tokenizer.pad_token_id),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

Using amp half precision backend


In [15]:
trainer.train()

The following columns in the training set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running training *****
  Num examples = 120000
  Num Epochs = 3
  Instantaneous batch size per device = 4
  Total train batch size (w. parallel, distributed & accumulation) = 4
  Gradient Accumulation steps = 1
  Total optimization steps = 90000
  1%|          | 501/90000 [00:56<2:50:51,  8.73it/s]

{'loss': 1.6121, 'learning_rate': 1.988911111111111e-05, 'epoch': 0.02}


  1%|          | 1001/90000 [01:52<2:51:21,  8.66it/s]

{'loss': 1.619, 'learning_rate': 1.9778000000000003e-05, 'epoch': 0.03}


  2%|▏         | 1501/90000 [02:48<2:47:11,  8.82it/s]

{'loss': 1.6154, 'learning_rate': 1.966688888888889e-05, 'epoch': 0.05}


  2%|▏         | 2001/90000 [03:44<2:47:27,  8.76it/s]

{'loss': 1.619, 'learning_rate': 1.955577777777778e-05, 'epoch': 0.07}


  3%|▎         | 2501/90000 [04:33<2:27:34,  9.88it/s]

{'loss': 1.6154, 'learning_rate': 1.9444888888888892e-05, 'epoch': 0.08}


  3%|▎         | 3002/90000 [05:22<2:23:58, 10.07it/s]

{'loss': 1.6137, 'learning_rate': 1.933377777777778e-05, 'epoch': 0.1}


  4%|▍         | 3501/90000 [06:11<2:21:59, 10.15it/s]

{'loss': 1.6171, 'learning_rate': 1.9222666666666668e-05, 'epoch': 0.12}


  4%|▍         | 4001/90000 [07:00<2:22:03, 10.09it/s]

{'loss': 1.6154, 'learning_rate': 1.9111555555555556e-05, 'epoch': 0.13}


  5%|▌         | 4502/90000 [07:48<2:21:14, 10.09it/s]

{'loss': 1.6154, 'learning_rate': 1.9000444444444444e-05, 'epoch': 0.15}


  6%|▌         | 5000/90000 [08:37<2:19:28, 10.16it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6126, 'learning_rate': 1.8889555555555557e-05, 'epoch': 0.17}


                                                      
  6%|▌         | 5000/90000 [09:06<2:19:28, 10.16it/s]Saving model checkpoint to ./model\checkpoint-5000
Configuration saved in ./model\checkpoint-5000\config.json


{'eval_loss': 1.6099364757537842, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.8594, 'eval_samples_per_second': 207.904, 'eval_steps_per_second': 51.976, 'epoch': 0.17}


Model weights saved in ./model\checkpoint-5000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-5000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-5000\special_tokens_map.json
  6%|▌         | 5502/90000 [10:30<2:20:46, 10.00it/s]  

{'loss': 1.6158, 'learning_rate': 1.8778444444444445e-05, 'epoch': 0.18}


  7%|▋         | 6002/90000 [11:18<2:18:49, 10.08it/s]

{'loss': 1.6172, 'learning_rate': 1.8667333333333337e-05, 'epoch': 0.2}


  7%|▋         | 6501/90000 [12:07<2:18:46, 10.03it/s]

{'loss': 1.6145, 'learning_rate': 1.8556222222222225e-05, 'epoch': 0.22}


  8%|▊         | 7002/90000 [12:56<2:16:14, 10.15it/s]

{'loss': 1.6143, 'learning_rate': 1.8445333333333334e-05, 'epoch': 0.23}


  8%|▊         | 7502/90000 [13:45<2:16:55, 10.04it/s]

{'loss': 1.614, 'learning_rate': 1.8334222222222226e-05, 'epoch': 0.25}


  9%|▉         | 8001/90000 [14:34<2:16:06, 10.04it/s]

{'loss': 1.615, 'learning_rate': 1.8223111111111114e-05, 'epoch': 0.27}


  9%|▉         | 8502/90000 [15:23<2:14:45, 10.08it/s]

{'loss': 1.6147, 'learning_rate': 1.8112000000000002e-05, 'epoch': 0.28}


 10%|█         | 9002/90000 [16:11<2:13:26, 10.12it/s]

{'loss': 1.613, 'learning_rate': 1.8001111111111115e-05, 'epoch': 0.3}


 11%|█         | 9501/90000 [17:00<2:14:40,  9.96it/s]

{'loss': 1.6134, 'learning_rate': 1.789e-05, 'epoch': 0.32}


 11%|█         | 10000/90000 [17:49<2:10:00, 10.26it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6142, 'learning_rate': 1.777888888888889e-05, 'epoch': 0.33}


                                                       
 11%|█         | 10000/90000 [18:18<2:10:00, 10.26it/s]Saving model checkpoint to ./model\checkpoint-10000
Configuration saved in ./model\checkpoint-10000\config.json


{'eval_loss': 1.611668348312378, 'eval_accuracy': 0.19516666666666665, 'eval_runtime': 29.0151, 'eval_samples_per_second': 206.789, 'eval_steps_per_second': 51.697, 'epoch': 0.33}


Model weights saved in ./model\checkpoint-10000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-10000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-10000\special_tokens_map.json
 12%|█▏        | 10501/90000 [19:41<2:11:33, 10.07it/s]  

{'loss': 1.6132, 'learning_rate': 1.766777777777778e-05, 'epoch': 0.35}


 12%|█▏        | 11001/90000 [20:30<2:11:18, 10.03it/s]

{'loss': 1.6152, 'learning_rate': 1.7556666666666667e-05, 'epoch': 0.37}


 13%|█▎        | 11501/90000 [21:19<2:10:40, 10.01it/s]

{'loss': 1.616, 'learning_rate': 1.744577777777778e-05, 'epoch': 0.38}


 13%|█▎        | 12002/90000 [22:08<2:08:29, 10.12it/s]

{'loss': 1.6137, 'learning_rate': 1.733466666666667e-05, 'epoch': 0.4}


 14%|█▍        | 12502/90000 [22:57<2:08:24, 10.06it/s]

{'loss': 1.6154, 'learning_rate': 1.7223555555555557e-05, 'epoch': 0.42}


 14%|█▍        | 13001/90000 [23:46<2:06:16, 10.16it/s]

{'loss': 1.6129, 'learning_rate': 1.7112444444444445e-05, 'epoch': 0.43}


 15%|█▌        | 13501/90000 [24:35<2:04:55, 10.21it/s]

{'loss': 1.6125, 'learning_rate': 1.7001555555555558e-05, 'epoch': 0.45}


 16%|█▌        | 14000/90000 [25:23<2:04:28, 10.18it/s]

{'loss': 1.6126, 'learning_rate': 1.6890444444444446e-05, 'epoch': 0.47}


 16%|█▌        | 14501/90000 [26:12<2:03:42, 10.17it/s]

{'loss': 1.6123, 'learning_rate': 1.6779333333333334e-05, 'epoch': 0.48}


 17%|█▋        | 15000/90000 [27:01<2:04:12, 10.06it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6166, 'learning_rate': 1.6668222222222222e-05, 'epoch': 0.5}


                                                       
 17%|█▋        | 15000/90000 [27:30<2:04:12, 10.06it/s]Saving model checkpoint to ./model\checkpoint-15000
Configuration saved in ./model\checkpoint-15000\config.json


{'eval_loss': 1.6106808185577393, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.8875, 'eval_samples_per_second': 207.702, 'eval_steps_per_second': 51.926, 'epoch': 0.5}


Model weights saved in ./model\checkpoint-15000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-15000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-15000\special_tokens_map.json
 17%|█▋        | 15501/90000 [28:53<2:02:42, 10.12it/s]  

{'loss': 1.6157, 'learning_rate': 1.6557111111111113e-05, 'epoch': 0.52}


 18%|█▊        | 16001/90000 [29:42<2:03:37,  9.98it/s]

{'loss': 1.6152, 'learning_rate': 1.6446e-05, 'epoch': 0.53}


 18%|█▊        | 16502/90000 [30:31<2:00:48, 10.14it/s]

{'loss': 1.6102, 'learning_rate': 1.633511111111111e-05, 'epoch': 0.55}


 19%|█▉        | 17001/90000 [31:20<2:00:23, 10.11it/s]

{'loss': 1.6145, 'learning_rate': 1.6224000000000003e-05, 'epoch': 0.57}


 19%|█▉        | 17500/90000 [32:09<2:00:17, 10.04it/s]

{'loss': 1.6143, 'learning_rate': 1.611288888888889e-05, 'epoch': 0.58}


 20%|██        | 18001/90000 [32:58<1:58:22, 10.14it/s]

{'loss': 1.6123, 'learning_rate': 1.600177777777778e-05, 'epoch': 0.6}


 21%|██        | 18500/90000 [33:46<1:56:26, 10.23it/s]

{'loss': 1.6123, 'learning_rate': 1.5890666666666667e-05, 'epoch': 0.62}


 21%|██        | 19002/90000 [34:36<1:56:04, 10.19it/s]

{'loss': 1.6155, 'learning_rate': 1.5779555555555558e-05, 'epoch': 0.63}


 22%|██▏       | 19501/90000 [35:24<1:56:05, 10.12it/s]

{'loss': 1.612, 'learning_rate': 1.5668444444444446e-05, 'epoch': 0.65}


 22%|██▏       | 20000/90000 [36:13<1:55:27, 10.10it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6158, 'learning_rate': 1.5557555555555556e-05, 'epoch': 0.67}


                                                       
 22%|██▏       | 20000/90000 [36:42<1:55:27, 10.10it/s]Saving model checkpoint to ./model\checkpoint-20000
Configuration saved in ./model\checkpoint-20000\config.json


{'eval_loss': 1.6104899644851685, 'eval_accuracy': 0.19516666666666665, 'eval_runtime': 29.253, 'eval_samples_per_second': 205.107, 'eval_steps_per_second': 51.277, 'epoch': 0.67}


Model weights saved in ./model\checkpoint-20000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-20000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-20000\special_tokens_map.json
 23%|██▎       | 20501/90000 [38:06<1:54:56, 10.08it/s]  

{'loss': 1.6113, 'learning_rate': 1.5446444444444447e-05, 'epoch': 0.68}


 23%|██▎       | 21002/90000 [38:55<1:53:28, 10.13it/s]

{'loss': 1.6158, 'learning_rate': 1.5335333333333335e-05, 'epoch': 0.7}


 24%|██▍       | 21501/90000 [39:43<1:53:38, 10.05it/s]

{'loss': 1.6138, 'learning_rate': 1.5224222222222222e-05, 'epoch': 0.72}


 24%|██▍       | 22002/90000 [40:32<1:50:48, 10.23it/s]

{'loss': 1.6121, 'learning_rate': 1.5113333333333335e-05, 'epoch': 0.73}


 25%|██▌       | 22501/90000 [41:21<1:51:27, 10.09it/s]

{'loss': 1.6105, 'learning_rate': 1.5002222222222223e-05, 'epoch': 0.75}


 26%|██▌       | 23002/90000 [42:10<1:51:36, 10.00it/s]

{'loss': 1.6143, 'learning_rate': 1.4891111111111111e-05, 'epoch': 0.77}


 26%|██▌       | 23501/90000 [42:58<1:49:40, 10.11it/s]

{'loss': 1.6135, 'learning_rate': 1.478e-05, 'epoch': 0.78}


 27%|██▋       | 24001/90000 [43:47<1:49:22, 10.06it/s]

{'loss': 1.6134, 'learning_rate': 1.4669111111111112e-05, 'epoch': 0.8}


 27%|██▋       | 24500/90000 [44:36<1:47:27, 10.16it/s]

{'loss': 1.613, 'learning_rate': 1.4558e-05, 'epoch': 0.82}


 28%|██▊       | 25000/90000 [45:25<1:45:51, 10.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6129, 'learning_rate': 1.444688888888889e-05, 'epoch': 0.83}


                                                       
 28%|██▊       | 25000/90000 [45:59<1:45:51, 10.23it/s]Saving model checkpoint to ./model\checkpoint-25000
Configuration saved in ./model\checkpoint-25000\config.json


{'eval_loss': 1.6097493171691895, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 33.6793, 'eval_samples_per_second': 178.151, 'eval_steps_per_second': 44.538, 'epoch': 0.83}


Model weights saved in ./model\checkpoint-25000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-25000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-25000\special_tokens_map.json
 28%|██▊       | 25502/90000 [47:23<1:47:06, 10.04it/s]  

{'loss': 1.6112, 'learning_rate': 1.433577777777778e-05, 'epoch': 0.85}


 29%|██▉       | 26001/90000 [48:11<1:45:59, 10.06it/s]

{'loss': 1.6158, 'learning_rate': 1.422466666666667e-05, 'epoch': 0.87}


 29%|██▉       | 26500/90000 [49:01<1:43:33, 10.22it/s]

{'loss': 1.6144, 'learning_rate': 1.4113777777777779e-05, 'epoch': 0.88}


 30%|███       | 27001/90000 [49:49<1:48:56,  9.64it/s]

{'loss': 1.6104, 'learning_rate': 1.4002666666666669e-05, 'epoch': 0.9}


 31%|███       | 27502/90000 [50:38<1:44:16,  9.99it/s]

{'loss': 1.6147, 'learning_rate': 1.3891555555555555e-05, 'epoch': 0.92}


 31%|███       | 28001/90000 [51:27<1:44:34,  9.88it/s]

{'loss': 1.6126, 'learning_rate': 1.3780444444444445e-05, 'epoch': 0.93}


 32%|███▏      | 28502/90000 [52:16<1:43:06,  9.94it/s]

{'loss': 1.6134, 'learning_rate': 1.3669333333333335e-05, 'epoch': 0.95}


 32%|███▏      | 29001/90000 [53:05<1:42:30,  9.92it/s]

{'loss': 1.6122, 'learning_rate': 1.3558444444444444e-05, 'epoch': 0.97}


 33%|███▎      | 29501/90000 [53:54<1:40:18, 10.05it/s]

{'loss': 1.6131, 'learning_rate': 1.3447333333333334e-05, 'epoch': 0.98}


 33%|███▎      | 30000/90000 [54:43<1:39:31, 10.05it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6134, 'learning_rate': 1.3336222222222224e-05, 'epoch': 1.0}


                                                       
 33%|███▎      | 30000/90000 [55:12<1:39:31, 10.05it/s]Saving model checkpoint to ./model\checkpoint-30000
Configuration saved in ./model\checkpoint-30000\config.json


{'eval_loss': 1.6099849939346313, 'eval_accuracy': 0.203, 'eval_runtime': 28.8416, 'eval_samples_per_second': 208.032, 'eval_steps_per_second': 52.008, 'epoch': 1.0}


Model weights saved in ./model\checkpoint-30000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-30000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-30000\special_tokens_map.json
 34%|███▍      | 30501/90000 [56:35<1:39:58,  9.92it/s]  

{'loss': 1.6114, 'learning_rate': 1.3225111111111114e-05, 'epoch': 1.02}


 34%|███▍      | 31002/90000 [57:24<1:37:32, 10.08it/s]

{'loss': 1.613, 'learning_rate': 1.3114222222222223e-05, 'epoch': 1.03}


 35%|███▌      | 31502/90000 [58:12<1:37:41,  9.98it/s]

{'loss': 1.615, 'learning_rate': 1.3003111111111113e-05, 'epoch': 1.05}


 36%|███▌      | 32001/90000 [59:01<1:34:59, 10.18it/s]

{'loss': 1.6133, 'learning_rate': 1.2892e-05, 'epoch': 1.07}


 36%|███▌      | 32502/90000 [59:50<1:34:06, 10.18it/s]

{'loss': 1.6152, 'learning_rate': 1.278088888888889e-05, 'epoch': 1.08}


 37%|███▋      | 33000/90000 [1:00:39<1:33:06, 10.20it/s]

{'loss': 1.6124, 'learning_rate': 1.2669777777777779e-05, 'epoch': 1.1}


 37%|███▋      | 33501/90000 [1:01:27<1:34:13,  9.99it/s]

{'loss': 1.6141, 'learning_rate': 1.2558888888888889e-05, 'epoch': 1.12}


 38%|███▊      | 34000/90000 [1:02:16<1:31:15, 10.23it/s]

{'loss': 1.6139, 'learning_rate': 1.2447777777777779e-05, 'epoch': 1.13}


 38%|███▊      | 34501/90000 [1:03:05<1:31:40, 10.09it/s]

{'loss': 1.61, 'learning_rate': 1.2336666666666668e-05, 'epoch': 1.15}


 39%|███▉      | 35000/90000 [1:03:54<1:30:39, 10.11it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6117, 'learning_rate': 1.2225555555555556e-05, 'epoch': 1.17}


                                                         
 39%|███▉      | 35000/90000 [1:04:23<1:30:39, 10.11it/s]Saving model checkpoint to ./model\checkpoint-35000
Configuration saved in ./model\checkpoint-35000\config.json


{'eval_loss': 1.6098072528839111, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 29.5518, 'eval_samples_per_second': 203.034, 'eval_steps_per_second': 50.758, 'epoch': 1.17}


Model weights saved in ./model\checkpoint-35000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-35000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-35000\special_tokens_map.json
 39%|███▉      | 35501/90000 [1:05:47<1:29:39, 10.13it/s]  

{'loss': 1.6125, 'learning_rate': 1.2114444444444446e-05, 'epoch': 1.18}


 40%|████      | 36002/90000 [1:06:36<1:29:38, 10.04it/s]

{'loss': 1.6112, 'learning_rate': 1.2003333333333334e-05, 'epoch': 1.2}


 41%|████      | 36500/90000 [1:07:25<1:27:20, 10.21it/s]

{'loss': 1.6132, 'learning_rate': 1.1892444444444447e-05, 'epoch': 1.22}


 41%|████      | 37002/90000 [1:08:14<1:27:57, 10.04it/s]

{'loss': 1.6098, 'learning_rate': 1.1781333333333334e-05, 'epoch': 1.23}


 42%|████▏     | 37502/90000 [1:09:03<1:27:49,  9.96it/s]

{'loss': 1.6142, 'learning_rate': 1.1670222222222223e-05, 'epoch': 1.25}


 42%|████▏     | 38002/90000 [1:09:52<1:27:26,  9.91it/s]

{'loss': 1.6121, 'learning_rate': 1.1559111111111111e-05, 'epoch': 1.27}


 43%|████▎     | 38501/90000 [1:10:40<1:25:40, 10.02it/s]

{'loss': 1.6141, 'learning_rate': 1.1448222222222223e-05, 'epoch': 1.28}


 43%|████▎     | 39001/90000 [1:11:29<1:23:52, 10.13it/s]

{'loss': 1.6107, 'learning_rate': 1.1337111111111113e-05, 'epoch': 1.3}


 44%|████▍     | 39500/90000 [1:12:18<1:23:32, 10.08it/s]

{'loss': 1.6135, 'learning_rate': 1.1226e-05, 'epoch': 1.32}


 44%|████▍     | 40000/90000 [1:13:07<1:21:29, 10.23it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6143, 'learning_rate': 1.111488888888889e-05, 'epoch': 1.33}


                                                         
 44%|████▍     | 40000/90000 [1:13:36<1:21:29, 10.23it/s]Saving model checkpoint to ./model\checkpoint-40000
Configuration saved in ./model\checkpoint-40000\config.json


{'eval_loss': 1.6096400022506714, 'eval_accuracy': 0.203, 'eval_runtime': 29.1777, 'eval_samples_per_second': 205.637, 'eval_steps_per_second': 51.409, 'epoch': 1.33}


Model weights saved in ./model\checkpoint-40000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-40000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-40000\special_tokens_map.json
 45%|████▌     | 40500/90000 [1:14:59<1:21:01, 10.18it/s]  

{'loss': 1.612, 'learning_rate': 1.1003777777777778e-05, 'epoch': 1.35}


 46%|████▌     | 41001/90000 [1:15:48<1:21:50,  9.98it/s]

{'loss': 1.613, 'learning_rate': 1.089288888888889e-05, 'epoch': 1.37}


 46%|████▌     | 41501/90000 [1:16:37<1:19:39, 10.15it/s]

{'loss': 1.6126, 'learning_rate': 1.0781777777777778e-05, 'epoch': 1.38}


 47%|████▋     | 42002/90000 [1:17:26<1:19:59, 10.00it/s]

{'loss': 1.6124, 'learning_rate': 1.0670666666666668e-05, 'epoch': 1.4}


 47%|████▋     | 42500/90000 [1:18:15<1:18:24, 10.10it/s]

{'loss': 1.6125, 'learning_rate': 1.0559555555555556e-05, 'epoch': 1.42}


 48%|████▊     | 43001/90000 [1:19:04<1:17:34, 10.10it/s]

{'loss': 1.6147, 'learning_rate': 1.0448444444444445e-05, 'epoch': 1.43}


 48%|████▊     | 43501/90000 [1:19:53<1:15:37, 10.25it/s]

{'loss': 1.6104, 'learning_rate': 1.0337555555555557e-05, 'epoch': 1.45}


 49%|████▉     | 44002/90000 [1:20:41<1:15:50, 10.11it/s]

{'loss': 1.6128, 'learning_rate': 1.0226444444444445e-05, 'epoch': 1.47}


 49%|████▉     | 44502/90000 [1:21:31<1:14:46, 10.14it/s]

{'loss': 1.6101, 'learning_rate': 1.0115333333333335e-05, 'epoch': 1.48}


 50%|█████     | 45000/90000 [1:22:19<1:13:43, 10.17it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6116, 'learning_rate': 1.0004222222222224e-05, 'epoch': 1.5}


                                                         
 50%|█████     | 45000/90000 [1:22:48<1:13:43, 10.17it/s]Saving model checkpoint to ./model\checkpoint-45000
Configuration saved in ./model\checkpoint-45000\config.json


{'eval_loss': 1.6099905967712402, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.9186, 'eval_samples_per_second': 207.479, 'eval_steps_per_second': 51.87, 'epoch': 1.5}


Model weights saved in ./model\checkpoint-45000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-45000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-45000\special_tokens_map.json
 51%|█████     | 45500/90000 [1:24:15<1:12:49, 10.18it/s]  

{'loss': 1.6128, 'learning_rate': 9.893111111111112e-06, 'epoch': 1.52}


 51%|█████     | 46001/90000 [1:25:04<1:14:36,  9.83it/s]

{'loss': 1.6109, 'learning_rate': 9.782e-06, 'epoch': 1.53}


 52%|█████▏    | 46501/90000 [1:25:53<1:11:59, 10.07it/s]

{'loss': 1.6131, 'learning_rate': 9.67088888888889e-06, 'epoch': 1.55}


 52%|█████▏    | 47002/90000 [1:26:42<1:10:09, 10.22it/s]

{'loss': 1.6105, 'learning_rate': 9.559777777777778e-06, 'epoch': 1.57}


 53%|█████▎    | 47502/90000 [1:27:31<1:10:15, 10.08it/s]

{'loss': 1.6143, 'learning_rate': 9.44888888888889e-06, 'epoch': 1.58}


 53%|█████▎    | 48001/90000 [1:28:20<1:09:38, 10.05it/s]

{'loss': 1.6114, 'learning_rate': 9.33777777777778e-06, 'epoch': 1.6}


 54%|█████▍    | 48502/90000 [1:29:09<1:08:48, 10.05it/s]

{'loss': 1.6139, 'learning_rate': 9.226666666666668e-06, 'epoch': 1.62}


 54%|█████▍    | 49002/90000 [1:29:58<1:08:22,  9.99it/s]

{'loss': 1.6121, 'learning_rate': 9.115555555555556e-06, 'epoch': 1.63}


 55%|█████▌    | 49501/90000 [1:30:47<1:09:01,  9.78it/s]

{'loss': 1.6122, 'learning_rate': 9.004666666666667e-06, 'epoch': 1.65}


 56%|█████▌    | 50000/90000 [1:31:35<1:05:27, 10.18it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6116, 'learning_rate': 8.893555555555557e-06, 'epoch': 1.67}


                                                         
 56%|█████▌    | 50000/90000 [1:32:05<1:05:27, 10.18it/s]Saving model checkpoint to ./model\checkpoint-50000
Configuration saved in ./model\checkpoint-50000\config.json


{'eval_loss': 1.6105374097824097, 'eval_accuracy': 0.20016666666666666, 'eval_runtime': 29.0796, 'eval_samples_per_second': 206.33, 'eval_steps_per_second': 51.583, 'epoch': 1.67}


Model weights saved in ./model\checkpoint-50000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-50000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-50000\special_tokens_map.json
 56%|█████▌    | 50501/90000 [1:33:28<1:05:33, 10.04it/s]  

{'loss': 1.6104, 'learning_rate': 8.782444444444446e-06, 'epoch': 1.68}


 57%|█████▋    | 51002/90000 [1:34:17<1:05:05,  9.99it/s]

{'loss': 1.6152, 'learning_rate': 8.671333333333335e-06, 'epoch': 1.7}


 57%|█████▋    | 51500/90000 [1:35:06<1:02:55, 10.20it/s]

{'loss': 1.6108, 'learning_rate': 8.560222222222223e-06, 'epoch': 1.72}


 58%|█████▊    | 52000/90000 [1:35:54<1:02:47, 10.09it/s]

{'loss': 1.6116, 'learning_rate': 8.44911111111111e-06, 'epoch': 1.73}


 58%|█████▊    | 52502/90000 [1:36:43<1:01:23, 10.18it/s]

{'loss': 1.6113, 'learning_rate': 8.338e-06, 'epoch': 1.75}


 59%|█████▉    | 53002/90000 [1:37:32<1:01:47,  9.98it/s]

{'loss': 1.613, 'learning_rate': 8.227111111111112e-06, 'epoch': 1.77}


 59%|█████▉    | 53500/90000 [1:38:21<59:39, 10.20it/s]  

{'loss': 1.6133, 'learning_rate': 8.116e-06, 'epoch': 1.78}


 60%|██████    | 54002/90000 [1:39:10<1:00:05,  9.98it/s]

{'loss': 1.6107, 'learning_rate': 8.00488888888889e-06, 'epoch': 1.8}


 61%|██████    | 54500/90000 [1:39:59<58:00, 10.20it/s]  

{'loss': 1.6121, 'learning_rate': 7.893777777777778e-06, 'epoch': 1.82}


 61%|██████    | 55000/90000 [1:40:48<57:28, 10.15it/s]  The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6119, 'learning_rate': 7.782666666666667e-06, 'epoch': 1.83}


                                                       
 61%|██████    | 55000/90000 [1:41:21<57:28, 10.15it/s]Saving model checkpoint to ./model\checkpoint-55000
Configuration saved in ./model\checkpoint-55000\config.json


{'eval_loss': 1.6107196807861328, 'eval_accuracy': 0.19516666666666665, 'eval_runtime': 33.6267, 'eval_samples_per_second': 178.43, 'eval_steps_per_second': 44.607, 'epoch': 1.83}


Model weights saved in ./model\checkpoint-55000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-55000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-55000\special_tokens_map.json
 62%|██████▏   | 55501/90000 [1:42:44<57:29, 10.00it/s]    

{'loss': 1.6122, 'learning_rate': 7.671555555555557e-06, 'epoch': 1.85}


 62%|██████▏   | 56001/90000 [1:43:33<56:34, 10.02it/s]

{'loss': 1.6118, 'learning_rate': 7.5604444444444445e-06, 'epoch': 1.87}


 63%|██████▎   | 56502/90000 [1:44:22<55:31, 10.05it/s]

{'loss': 1.6107, 'learning_rate': 7.449333333333334e-06, 'epoch': 1.88}


 63%|██████▎   | 57001/90000 [1:45:10<54:07, 10.16it/s]  

{'loss': 1.6119, 'learning_rate': 7.338444444444445e-06, 'epoch': 1.9}


 64%|██████▍   | 57501/90000 [1:45:59<53:44, 10.08it/s]  

{'loss': 1.6106, 'learning_rate': 7.227333333333334e-06, 'epoch': 1.92}


 64%|██████▍   | 58000/90000 [1:46:48<52:53, 10.08it/s]

{'loss': 1.6092, 'learning_rate': 7.116222222222223e-06, 'epoch': 1.93}


 65%|██████▌   | 58501/90000 [1:47:37<52:26, 10.01it/s]

{'loss': 1.6108, 'learning_rate': 7.0051111111111115e-06, 'epoch': 1.95}


 66%|██████▌   | 59000/90000 [1:48:26<50:43, 10.18it/s]

{'loss': 1.6111, 'learning_rate': 6.894222222222224e-06, 'epoch': 1.97}


 66%|██████▌   | 59502/90000 [1:49:15<50:28, 10.07it/s]

{'loss': 1.6106, 'learning_rate': 6.783111111111112e-06, 'epoch': 1.98}


 67%|██████▋   | 60000/90000 [1:50:03<50:04,  9.99it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6115, 'learning_rate': 6.672222222222223e-06, 'epoch': 2.0}


                                                       
 67%|██████▋   | 60000/90000 [1:50:33<50:04,  9.99it/s]Saving model checkpoint to ./model\checkpoint-60000
Configuration saved in ./model\checkpoint-60000\config.json


{'eval_loss': 1.6111441850662231, 'eval_accuracy': 0.20016666666666666, 'eval_runtime': 29.1039, 'eval_samples_per_second': 206.158, 'eval_steps_per_second': 51.54, 'epoch': 2.0}


Model weights saved in ./model\checkpoint-60000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-60000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-60000\special_tokens_map.json
 67%|██████▋   | 60502/90000 [1:51:56<49:21,  9.96it/s]   

{'loss': 1.6101, 'learning_rate': 6.561111111111111e-06, 'epoch': 2.02}


 68%|██████▊   | 61002/90000 [1:52:45<47:57, 10.08it/s]

{'loss': 1.612, 'learning_rate': 6.450000000000001e-06, 'epoch': 2.03}


 68%|██████▊   | 61501/90000 [1:53:34<47:23, 10.02it/s]

{'loss': 1.6123, 'learning_rate': 6.338888888888889e-06, 'epoch': 2.05}


 69%|██████▉   | 62001/90000 [1:54:23<46:25, 10.05it/s]

{'loss': 1.6132, 'learning_rate': 6.227777777777778e-06, 'epoch': 2.07}


 69%|██████▉   | 62502/90000 [1:55:11<45:13, 10.14it/s]

{'loss': 1.6096, 'learning_rate': 6.116888888888889e-06, 'epoch': 2.08}


 70%|███████   | 63001/90000 [1:56:00<45:25,  9.91it/s]

{'loss': 1.613, 'learning_rate': 6.005777777777778e-06, 'epoch': 2.1}


 71%|███████   | 63501/90000 [1:56:49<43:46, 10.09it/s]

{'loss': 1.612, 'learning_rate': 5.894666666666667e-06, 'epoch': 2.12}


 71%|███████   | 64002/90000 [1:57:38<42:41, 10.15it/s]

{'loss': 1.6104, 'learning_rate': 5.783555555555556e-06, 'epoch': 2.13}


 72%|███████▏  | 64501/90000 [1:58:27<42:47,  9.93it/s]

{'loss': 1.6082, 'learning_rate': 5.672444444444445e-06, 'epoch': 2.15}


 72%|███████▏  | 65000/90000 [1:59:16<40:56, 10.18it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6127, 'learning_rate': 5.561333333333334e-06, 'epoch': 2.17}


                                                       
 72%|███████▏  | 65000/90000 [1:59:45<40:56, 10.18it/s]Saving model checkpoint to ./model\checkpoint-65000
Configuration saved in ./model\checkpoint-65000\config.json


{'eval_loss': 1.6096092462539673, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.8811, 'eval_samples_per_second': 207.748, 'eval_steps_per_second': 51.937, 'epoch': 2.17}


Model weights saved in ./model\checkpoint-65000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-65000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-65000\special_tokens_map.json
 73%|███████▎  | 65502/90000 [2:01:08<39:59, 10.21it/s]   

{'loss': 1.6113, 'learning_rate': 5.450222222222222e-06, 'epoch': 2.18}


 73%|███████▎  | 66001/90000 [2:01:57<39:40, 10.08it/s]

{'loss': 1.6112, 'learning_rate': 5.339111111111112e-06, 'epoch': 2.2}


 74%|███████▍  | 66502/90000 [2:02:46<38:52, 10.07it/s]

{'loss': 1.6108, 'learning_rate': 5.228222222222222e-06, 'epoch': 2.22}


 74%|███████▍  | 67001/90000 [2:03:34<42:17,  9.06it/s]

{'loss': 1.6097, 'learning_rate': 5.117333333333334e-06, 'epoch': 2.23}


 75%|███████▌  | 67500/90000 [2:04:23<37:23, 10.03it/s]

{'loss': 1.612, 'learning_rate': 5.0062222222222224e-06, 'epoch': 2.25}


 76%|███████▌  | 68001/90000 [2:05:12<36:27, 10.06it/s]

{'loss': 1.6123, 'learning_rate': 4.895111111111111e-06, 'epoch': 2.27}


 76%|███████▌  | 68500/90000 [2:06:01<35:01, 10.23it/s]

{'loss': 1.6115, 'learning_rate': 4.784e-06, 'epoch': 2.28}


 77%|███████▋  | 69002/90000 [2:06:50<35:01,  9.99it/s]

{'loss': 1.6107, 'learning_rate': 4.672888888888889e-06, 'epoch': 2.3}


 77%|███████▋  | 69502/90000 [2:07:39<34:04, 10.03it/s]

{'loss': 1.6099, 'learning_rate': 4.561777777777778e-06, 'epoch': 2.32}


 78%|███████▊  | 70000/90000 [2:08:27<33:05, 10.08it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6109, 'learning_rate': 4.450666666666667e-06, 'epoch': 2.33}


                                                       
 78%|███████▊  | 70000/90000 [2:08:56<33:05, 10.08it/s]Saving model checkpoint to ./model\checkpoint-70000
Configuration saved in ./model\checkpoint-70000\config.json


{'eval_loss': 1.61015784740448, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.9092, 'eval_samples_per_second': 207.546, 'eval_steps_per_second': 51.887, 'epoch': 2.33}


Model weights saved in ./model\checkpoint-70000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-70000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-70000\special_tokens_map.json
 78%|███████▊  | 70501/90000 [2:10:23<33:03,  9.83it/s]   

{'loss': 1.6114, 'learning_rate': 4.339555555555556e-06, 'epoch': 2.35}


 79%|███████▉  | 71001/90000 [2:11:11<31:14, 10.14it/s]

{'loss': 1.6099, 'learning_rate': 4.228666666666667e-06, 'epoch': 2.37}


 79%|███████▉  | 71502/90000 [2:12:00<30:17, 10.18it/s]

{'loss': 1.6133, 'learning_rate': 4.117555555555555e-06, 'epoch': 2.38}


 80%|████████  | 72002/90000 [2:12:49<29:58, 10.01it/s]

{'loss': 1.613, 'learning_rate': 4.006444444444445e-06, 'epoch': 2.4}


 81%|████████  | 72501/90000 [2:13:38<29:57,  9.74it/s]

{'loss': 1.6116, 'learning_rate': 3.895333333333334e-06, 'epoch': 2.42}


 81%|████████  | 73000/90000 [2:14:29<27:41, 10.23it/s]

{'loss': 1.6114, 'learning_rate': 3.784444444444445e-06, 'epoch': 2.43}


 82%|████████▏ | 73501/90000 [2:15:18<27:09, 10.13it/s]

{'loss': 1.6106, 'learning_rate': 3.673333333333334e-06, 'epoch': 2.45}


 82%|████████▏ | 74001/90000 [2:16:07<26:32, 10.05it/s]

{'loss': 1.6117, 'learning_rate': 3.5622222222222224e-06, 'epoch': 2.47}


 83%|████████▎ | 74501/90000 [2:16:56<25:44, 10.03it/s]

{'loss': 1.6092, 'learning_rate': 3.4511111111111113e-06, 'epoch': 2.48}


 83%|████████▎ | 75000/90000 [2:17:45<24:43, 10.11it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.611, 'learning_rate': 3.3402222222222226e-06, 'epoch': 2.5}


                                                       
 83%|████████▎ | 75000/90000 [2:18:14<24:43, 10.11it/s]Saving model checkpoint to ./model\checkpoint-75000
Configuration saved in ./model\checkpoint-75000\config.json


{'eval_loss': 1.6096910238265991, 'eval_accuracy': 0.20016666666666666, 'eval_runtime': 28.9803, 'eval_samples_per_second': 207.037, 'eval_steps_per_second': 51.759, 'epoch': 2.5}


Model weights saved in ./model\checkpoint-75000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-75000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-75000\special_tokens_map.json
 84%|████████▍ | 75502/90000 [2:19:37<24:00, 10.06it/s]   

{'loss': 1.6116, 'learning_rate': 3.229111111111111e-06, 'epoch': 2.52}


 84%|████████▍ | 76001/90000 [2:20:26<23:10, 10.07it/s]

{'loss': 1.6117, 'learning_rate': 3.1180000000000005e-06, 'epoch': 2.53}


 85%|████████▌ | 76501/90000 [2:21:15<22:46,  9.88it/s]

{'loss': 1.6108, 'learning_rate': 3.0071111111111114e-06, 'epoch': 2.55}


 86%|████████▌ | 77001/90000 [2:22:03<21:38, 10.01it/s]

{'loss': 1.6092, 'learning_rate': 2.8960000000000003e-06, 'epoch': 2.57}


 86%|████████▌ | 77501/90000 [2:22:52<21:22,  9.74it/s]

{'loss': 1.6129, 'learning_rate': 2.7848888888888892e-06, 'epoch': 2.58}


 87%|████████▋ | 78001/90000 [2:23:41<19:52, 10.06it/s]

{'loss': 1.6098, 'learning_rate': 2.6737777777777777e-06, 'epoch': 2.6}


 87%|████████▋ | 78502/90000 [2:24:30<19:03, 10.06it/s]

{'loss': 1.6113, 'learning_rate': 2.5626666666666666e-06, 'epoch': 2.62}


 88%|████████▊ | 79001/90000 [2:25:19<18:16, 10.03it/s]

{'loss': 1.61, 'learning_rate': 2.4515555555555556e-06, 'epoch': 2.63}


 88%|████████▊ | 79502/90000 [2:26:08<17:45,  9.85it/s]

{'loss': 1.6121, 'learning_rate': 2.3404444444444445e-06, 'epoch': 2.65}


 89%|████████▉ | 80000/90000 [2:26:57<16:26, 10.14it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6094, 'learning_rate': 2.229333333333334e-06, 'epoch': 2.67}


                                                       
 89%|████████▉ | 80000/90000 [2:27:26<16:26, 10.14it/s]Saving model checkpoint to ./model\checkpoint-80000
Configuration saved in ./model\checkpoint-80000\config.json


{'eval_loss': 1.6094759702682495, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 28.9222, 'eval_samples_per_second': 207.453, 'eval_steps_per_second': 51.863, 'epoch': 2.67}


Model weights saved in ./model\checkpoint-80000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-80000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-80000\special_tokens_map.json
 89%|████████▉ | 80501/90000 [2:28:51<15:35, 10.16it/s]   

{'loss': 1.6081, 'learning_rate': 2.1184444444444447e-06, 'epoch': 2.68}


 90%|█████████ | 81002/90000 [2:29:40<15:02,  9.97it/s]

{'loss': 1.6103, 'learning_rate': 2.0073333333333337e-06, 'epoch': 2.7}


 91%|█████████ | 81502/90000 [2:30:29<14:07, 10.03it/s]

{'loss': 1.6091, 'learning_rate': 1.8964444444444446e-06, 'epoch': 2.72}


 91%|█████████ | 82002/90000 [2:31:18<13:05, 10.18it/s]

{'loss': 1.6105, 'learning_rate': 1.7853333333333333e-06, 'epoch': 2.73}


 92%|█████████▏| 82501/90000 [2:32:06<12:14, 10.20it/s]

{'loss': 1.6104, 'learning_rate': 1.6742222222222224e-06, 'epoch': 2.75}


 92%|█████████▏| 83000/90000 [2:32:55<11:23, 10.24it/s]

{'loss': 1.6094, 'learning_rate': 1.5631111111111113e-06, 'epoch': 2.77}


 93%|█████████▎| 83500/90000 [2:33:44<10:39, 10.17it/s]

{'loss': 1.6105, 'learning_rate': 1.452e-06, 'epoch': 2.78}


 93%|█████████▎| 84001/90000 [2:34:33<10:04,  9.92it/s]

{'loss': 1.6105, 'learning_rate': 1.3411111111111112e-06, 'epoch': 2.8}


 94%|█████████▍| 84502/90000 [2:35:22<09:16,  9.89it/s]

{'loss': 1.6111, 'learning_rate': 1.23e-06, 'epoch': 2.82}


 94%|█████████▍| 85000/90000 [2:36:11<08:13, 10.13it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6092, 'learning_rate': 1.118888888888889e-06, 'epoch': 2.83}


                                                       
 94%|█████████▍| 85000/90000 [2:36:40<08:13, 10.13it/s]Saving model checkpoint to ./model\checkpoint-85000
Configuration saved in ./model\checkpoint-85000\config.json


{'eval_loss': 1.6094149351119995, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 29.1646, 'eval_samples_per_second': 205.729, 'eval_steps_per_second': 51.432, 'epoch': 2.83}


Model weights saved in ./model\checkpoint-85000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-85000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-85000\special_tokens_map.json
 95%|█████████▌| 85502/90000 [2:38:03<07:23, 10.14it/s]   

{'loss': 1.6105, 'learning_rate': 1.0077777777777777e-06, 'epoch': 2.85}


 96%|█████████▌| 86002/90000 [2:38:52<06:36, 10.07it/s]

{'loss': 1.6123, 'learning_rate': 8.966666666666668e-07, 'epoch': 2.87}


 96%|█████████▌| 86500/90000 [2:39:41<05:49, 10.03it/s]

{'loss': 1.6121, 'learning_rate': 7.857777777777778e-07, 'epoch': 2.88}


 97%|█████████▋| 87000/90000 [2:40:29<05:00,  9.99it/s]

{'loss': 1.6091, 'learning_rate': 6.746666666666667e-07, 'epoch': 2.9}


 97%|█████████▋| 87501/90000 [2:41:18<04:10,  9.96it/s]

{'loss': 1.6113, 'learning_rate': 5.635555555555556e-07, 'epoch': 2.92}


 98%|█████████▊| 88000/90000 [2:42:07<03:15, 10.22it/s]

{'loss': 1.6103, 'learning_rate': 4.5244444444444445e-07, 'epoch': 2.93}


 98%|█████████▊| 88501/90000 [2:42:56<02:32,  9.84it/s]

{'loss': 1.6115, 'learning_rate': 3.415555555555556e-07, 'epoch': 2.95}


 99%|█████████▉| 89001/90000 [2:43:45<01:39, 10.01it/s]

{'loss': 1.611, 'learning_rate': 2.3044444444444445e-07, 'epoch': 2.97}


 99%|█████████▉| 89502/90000 [2:44:34<00:49,  9.99it/s]

{'loss': 1.6099, 'learning_rate': 1.1933333333333334e-07, 'epoch': 2.98}


100%|██████████| 90000/90000 [2:45:23<00:00, 10.13it/s]The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4


{'loss': 1.6116, 'learning_rate': 8.222222222222223e-09, 'epoch': 3.0}


                                                       
100%|██████████| 90000/90000 [2:45:52<00:00, 10.13it/s]Saving model checkpoint to ./model\checkpoint-90000
Configuration saved in ./model\checkpoint-90000\config.json


{'eval_loss': 1.6094287633895874, 'eval_accuracy': 0.20083333333333334, 'eval_runtime': 29.3324, 'eval_samples_per_second': 204.552, 'eval_steps_per_second': 51.138, 'epoch': 3.0}


Model weights saved in ./model\checkpoint-90000\pytorch_model.bin
tokenizer config file saved in ./model\checkpoint-90000\tokenizer_config.json
Special tokens file saved in ./model\checkpoint-90000\special_tokens_map.json


Training completed. Do not forget to share your model on huggingface.co/models =)


Loading best model from ./model\checkpoint-30000 (score: 0.203).
Could not locate the best model at ./model\checkpoint-30000\pytorch_model.bin, if you are running a distributed training on multiple nodes, you should activate `--save_on_each_node`.
100%|██████████| 90000/90000 [2:46:26<00:00,  9.01it/s]

{'train_runtime': 9986.9427, 'train_samples_per_second': 36.047, 'train_steps_per_second': 9.012, 'train_loss': 1.6123690273708768, 'epoch': 3.0}





TrainOutput(global_step=90000, training_loss=1.6123690273708768, metrics={'train_runtime': 9986.9427, 'train_samples_per_second': 36.047, 'train_steps_per_second': 9.012, 'train_loss': 1.6123690273708768, 'epoch': 3.0})

In [16]:
trainer.evaluate()

The following columns in the evaluation set  don't have a corresponding argument in `XLMRobertaForSequenceClassification.forward` and have been ignored: product_id, stars, reviewer_id, review_id, language, review_body, review_title, product_category.
***** Running Evaluation *****
  Num examples = 6000
  Batch size = 4
100%|██████████| 1500/1500 [00:29<00:00, 50.91it/s]


{'eval_loss': 1.6094287633895874,
 'eval_accuracy': 0.20083333333333334,
 'eval_runtime': 29.7216,
 'eval_samples_per_second': 201.874,
 'eval_steps_per_second': 50.468,
 'epoch': 3.0}

In [9]:
args = TrainingArguments(
    output_dir='./test_results',
    seed=8855,
    evaluation_strategy='steps',
    learning_rate=2e-5,
    per_device_train_batch_size=BATCH_SIZE,
    per_device_eval_batch_size=BATCH_SIZE,
    num_train_epochs=1,
    weight_decay=0.01,
    load_best_model_at_end=True,
    metric_for_best_model=METRIC_NAME,
    eval_steps=5000,
    save_steps=5000,
    fp16=True,
)

In [10]:
def model_init():
    return AutoModelForSequenceClassification.from_pretrained(model_checkpoint, num_labels=NUM_LABELS)

In [23]:
trainer = Trainer(
    model_init=model_init,
    args=args,
    train_dataset=encoded_train_dataset,
    eval_dataset=encoded_val_dataset,
    data_collator=SmartCollator(pad_token_id=tokenizer.pad_token_id),
    tokenizer=tokenizer,
    compute_metrics=compute_metrics,
)

loading configuration file ./model/checkpoint-90000\config.json
Model config XLMRobertaConfig {
  "_name_or_path": "./model/checkpoint-90000",
  "architectures": [
    "XLMRobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_token_id": 1,
  "position_embedding_type": "absolute",
  "problem_type": "single_label_classification",
  "torch_dtype": "float32",
  "t

In [24]:
best_run = trainer.hyperparameter_search(n_trials=5, direction="maximize")

[32m[I 2022-01-31 16:42:33,524][0m A new study created in memory with name: no-name-da268278-32a6-4f68-834b-c3e3fca1b29c[0m
Trial:
loading configuration file ./model/checkpoint-90000\config.json
Model config XLMRobertaConfig {
  "_name_or_path": "./model/checkpoint-90000",
  "architectures": [
    "XLMRobertaForSequenceClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4"
  },
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4
  },
  "layer_norm_eps": 1e-05,
  "max_position_embeddings": 514,
  "model_type": "xlm-roberta",
  "num_attention_heads": 12,
  "num_hidden_layers": 12,
  "output_past": true,
  "pad_to

RuntimeError: CUDA out of memory. Tried to allocate 38.00 MiB (GPU 0; 10.00 GiB total capacity; 8.26 GiB already allocated; 0 bytes free; 8.47 GiB reserved in total by PyTorch)

In [None]:
best_run

In [None]:
for n, v in best_run.hyperparameters.items():
    setattr(trainer.args, n, v)

trainer.train()