In [21]:
import torch
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification
from datasets import load_dataset
import torch
import copy
from tqdm.notebook import tqdm

In [11]:
datasets = load_dataset('orai-nlp/basqueGLUE', 'bec')

Downloading and preparing dataset basque_glue/bec to /Users/inigoparra/.cache/huggingface/datasets/orai-nlp___basque_glue/bec/0.0.0/c40f2648a778281a8d81d35041fee72edd29fbc52d8669f787cc744e3a500c90...


Downloading data:   0%|          | 0.00/273k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/59.5k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/59.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6078 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1302 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1302 [00:00<?, ? examples/s]

Dataset basque_glue downloaded and prepared to /Users/inigoparra/.cache/huggingface/datasets/orai-nlp___basque_glue/bec/0.0.0/c40f2648a778281a8d81d35041fee72edd29fbc52d8669f787cc744e3a500c90. Subsequent calls will reuse this data.


  0%|          | 0/3 [00:00<?, ?it/s]

In [12]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = datasets.map(tokenize_function, batched=True)


Map:   0%|          | 0/6078 [00:00<?, ? examples/s]

Map:   0%|          | 0/1302 [00:00<?, ? examples/s]

Map:   0%|          | 0/1302 [00:00<?, ? examples/s]

In [15]:
print(tokenized_datasets.keys())

dict_keys(['train', 'test', 'validation'])


In [17]:
def create_meta_tasks(dataset, num_tasks, support_size, query_size):
    tasks = []
    for _ in range(num_tasks):
        """Randomly sample data for support and query set"""
        support_set = dataset.shuffle().select(range(support_size))
        query_set = dataset.shuffle().select(range(query_size))
        tasks.append((support_set, query_set))
    return tasks


In [18]:
"""For demonstration use the 'pos' task from BasqueGLUE"""
dataset_pos = tokenized_datasets['train']
tasks = create_meta_tasks(dataset_pos, num_tasks=100, support_size=5, query_size=5)

In [22]:
"""Initialize XLM-R model for POS tagging"""
num_labels = len(set(dataset_pos["label"]))
model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=num_labels)
meta_optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

def meta_train(model, meta_optimizer, tasks, epochs, inner_steps, inner_lr):
    for epoch in tqdm(range(epochs), desc="Epochs"):
        task_progress = tqdm(tasks, desc="Tasks", leave=False)
        for task in task_progress:
            support_set, query_set = task
            
            """Prepare data for the model"""
            support_inputs = tokenizer(support_set["text"], return_tensors="pt", padding=True, truncation=True)
            support_labels = torch.tensor(support_set["label"])
            
            query_inputs = tokenizer(query_set["text"], return_tensors="pt", padding=True, truncation=True)
            query_labels = torch.tensor(query_set["label"])
            
            """Inner loop"""
            fast_model = copy.deepcopy(model)
            fast_optimizer = torch.optim.Adam(fast_model.parameters(), lr=inner_lr)
            
            for _ in range(inner_steps):
                outputs = fast_model(**support_inputs, labels=support_labels)
                loss = outputs.loss
                loss.backward()
                fast_optimizer.step()
                fast_optimizer.zero_grad()

            """Compute meta-loss"""
            outputs = fast_model(**query_inputs, labels=query_labels)
            meta_loss = outputs.loss
            meta_loss.backward()
            meta_optimizer.step()
            meta_optimizer.zero_grad()


Some weights of the model checkpoint at xlm-roberta-base were not used when initializing XLMRobertaForSequenceClassification: ['lm_head.bias', 'lm_head.dense.weight', 'roberta.pooler.dense.weight', 'lm_head.layer_norm.bias', 'roberta.pooler.dense.bias', 'lm_head.layer_norm.weight', 'lm_head.dense.bias']
- This IS expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLMRobertaForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLMRobertaForSequenceClassification were not initialized from the model checkpoint at xlm-roberta-base and are newly initialized: ['classifier.dense.weight', 'classifier.out_

In [None]:
"""Meta train"""
meta_train(model, meta_optimizer, tasks, epochs=3, inner_steps=3, inner_lr=0.01)