In [None]:
!pip install transformers
!pip install datasets
!pip install sentencepiece
!pip install seqeval
!pip install learn2learn

In [2]:
import torch
import learn2learn as l2l
import os
from transformers import XLMRobertaTokenizer, XLMRobertaForSequenceClassification, get_linear_schedule_with_warmup
from datasets import load_dataset
import copy
from tqdm.notebook import tqdm
from sklearn.metrics import f1_score as sklearn_f1_score
from google.colab import drive
drive.mount('/content/drive')

  self._read_thread.setDaemon(True)


Mounted at /content/drive


In [3]:
datasets = load_dataset('orai-nlp/basqueGLUE', 'bec')

Downloading builder script:   0%|          | 0.00/28.3k [00:00<?, ?B/s]

Downloading readme:   0%|          | 0.00/26.6k [00:00<?, ?B/s]

  """\


Downloading data:   0%|          | 0.00/273k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/59.5k [00:00<?, ?B/s]

Downloading data:   0%|          | 0.00/59.8k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/6078 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/1302 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/1302 [00:00<?, ? examples/s]

In [4]:
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-large")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_datasets = datasets.map(tokenize_function, batched=True)


Downloading (…)tencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

Downloading (…)lve/main/config.json:   0%|          | 0.00/616 [00:00<?, ?B/s]

Map:   0%|          | 0/6078 [00:00<?, ? examples/s]

Map:   0%|          | 0/1302 [00:00<?, ? examples/s]

Map:   0%|          | 0/1302 [00:00<?, ? examples/s]

In [5]:
print(tokenized_datasets.keys())

dict_keys(['train', 'test', 'validation'])


In [6]:
def create_meta_tasks(dataset, num_tasks, support_size, query_size):
    tasks = []
    for _ in range(num_tasks):
        """Randomly sample data for support and query set"""
        support_set = dataset.shuffle().select(range(support_size))
        query_set = dataset.shuffle().select(range(query_size))
        tasks.append((support_set, query_set))
    return tasks


In [None]:
"""For demonstration use the 'pos' task from BasqueGLUE"""
dataset_pos = tokenized_datasets['train']
tasks = create_meta_tasks(dataset_pos,
                          num_tasks=500,
                          support_size=10,
                          query_size=10)

In [None]:
"""Initialize XLM-R model for POS tagging"""
num_labels = len(set(dataset_pos["label"]))
model = XLMRobertaForSequenceClassification.from_pretrained('xlm-roberta-base', num_labels=num_labels)
meta_optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4)

CHECKPOINT_DIR = '/content/drive/MyDrive/meta_learn_2/checkpoints'
if not os.path.exists(CHECKPOINT_DIR):
    os.makedirs(CHECKPOINT_DIR)

def meta_train(model, meta_optimizer, tasks, epochs, inner_steps, inner_lr):
    for epoch in tqdm(range(epochs), desc="Epochs"):
        task_progress = tqdm(tasks, desc="Tasks", leave=False)
        for task in task_progress:
            support_set, query_set = task

            """Prepare data for the model"""
            support_inputs = tokenizer(support_set["text"], return_tensors="pt", padding=True, truncation=True)
            support_labels = torch.tensor(support_set["label"])

            query_inputs = tokenizer(query_set["text"], return_tensors="pt", padding=True, truncation=True)
            query_labels = torch.tensor(query_set["label"])

            """Inner loop"""
            fast_model = copy.deepcopy(model)
            fast_optimizer = torch.optim.AdamW(fast_model.parameters(), lr=inner_lr)

            for _ in range(inner_steps):
                outputs = fast_model(**support_inputs, labels=support_labels)
                loss = outputs.loss
                loss.backward()
                fast_optimizer.step()
                fast_optimizer.zero_grad()

            """Compute meta-loss"""
            outputs = fast_model(**query_inputs, labels=query_labels)
            meta_loss = outputs.loss
            meta_loss.backward()
            meta_optimizer.step()
            meta_optimizer.zero_grad()

        """Checkpoints"""
        checkpoint_path = os.path.join(CHECKPOINT_DIR, f'checkpoint_epoch_{epoch}.bin')
        torch.save(model.state_dict(), checkpoint_path)
        print(f"Model checkpoint saved at {checkpoint_path}")


In [None]:
"""Meta train"""
meta_train(model,
           meta_optimizer,
           tasks,
           epochs=5,
           inner_steps=3,
           inner_lr=1e-5)

In [None]:
model.save_pretrained('/content/drive/MyDrive/meta_learn_2')
tokenizer.save_pretrained('/content/drive/MyDrive/meta_learn_2')

In [None]:
from transformers import XLMRobertaForSequenceClassification, XLMRobertaTokenizer
model = XLMRobertaForSequenceClassification.from_pretrained("/content/drive/MyDrive/meta_learn_2")
tokenizer = XLMRobertaTokenizer.from_pretrained("xlm-roberta-base")


In [None]:
from sklearn.metrics import accuracy_score
from seqeval.metrics import precision_score, recall_score, f1_score

datasets_list = [
    ('bec', 'Sentiment Analysis'),
    ('coref', 'Coreference Resolution'),
    ('vaxx', 'Stance Detection'),
]

In [None]:
"""Evaluative functions"""
def evaluate_sequence_classification(test_data):
    test_inputs = tokenizer(test_data["text"], return_tensors="pt", padding=True, truncation=True)
    test_labels = torch.tensor(test_data["label"]).numpy()

    with torch.no_grad():
        outputs = model(**test_inputs)
        predictions = torch.argmax(outputs.logits, dim=-1).numpy()

    accuracy = accuracy_score(test_labels, predictions)
    return accuracy

def evaluate_token_classification(test_data):
    test_inputs = tokenizer(test_data["text"], return_tensors="pt", padding=True, truncation=True)
    test_labels = test_data["label"]

    with torch.no_grad():
        outputs = model(**test_inputs)
        predictions = torch.argmax(outputs.logits, dim=-1).tolist()

    pred_labels = [[tokenizer.convert_ids_to_tokens(label_id) for label_id in example] for example in predictions]
    f1 = f1_score(test_labels, pred_labels)
    return f1

def evaluate_stance_detection(test_data):
    test_inputs = tokenizer(test_data["text"], return_tensors="pt", padding=True, truncation=True)
    test_labels = torch.tensor(test_data["label"]).numpy()

    with torch.no_grad():
        outputs = model(**test_inputs)
        predictions = torch.argmax(outputs.logits, dim=-1).numpy()

    macro_f1 = sklearn_f1_score(test_labels, predictions, average='macro')
    return macro_f1

In [None]:
"""Main function fine-tune and evaluate"""
def evaluate_few_shot(dataset_name, task_name, shots):
    dataset = load_dataset('orai-nlp/basqueGLUE', dataset_name)

    """Select examples for fine-tuning"""
    fine_tune_data = dataset['train'].shuffle().select(range(shots))
    fine_tune_inputs = tokenizer(fine_tune_data["text"], return_tensors="pt", padding=True, truncation=True)
    fine_tune_labels = torch.tensor(fine_tune_data["label"])

    """Fine-tune the model"""
    optimizer = torch.optim.AdamW(model.parameters(), lr=1e-5)

    """Compute total training steps and setup the scheduler"""
    num_training_steps = 3 * len(fine_tune_data)
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=num_training_steps)


    for epoch in range(3):
        outputs = model(**fine_tune_inputs, labels=fine_tune_labels)
        loss = outputs.loss
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

    """Evaluate based on task type"""
    test_data = dataset['test']

    print(f"Dataset: {dataset_name}, Task: {task_name}")
    print("Sample Test Data Inputs:", test_data["text"][:5])
    print("Sample Test Data Labels:", test_data["label"][:5])

    if task_name in ["Coreference Resolution", "QA/NLI", "WSD", "Sentiment Analysis", "Topic Classification", "Intent Classification"]:
        return evaluate_sequence_classification(test_data)

    elif task_name in ["NERC_ID", "NERC_OC", "Slot Filling"]:
        return evaluate_token_classification(test_data)

    elif task_name == "Stance Detection":
        return evaluate_stance_detection(test_data)

In [None]:
results = {}

for dataset_name, task_name in datasets_list:
    print(f"Evaluating for {task_name}...")
    results[task_name] = {
        'one-shot': evaluate_few_shot(dataset_name, task_name, 1),
        'five-shot': evaluate_few_shot(dataset_name, task_name, 5),
        'ten-shot': evaluate_few_shot(dataset_name, task_name, 10)
    }

print(results)

Evaluating for Sentiment Analysis...
Dataset: bec, Task: Sentiment Analysis
Sample Test Data Inputs: ['"Emandako hitza bete egingo dut" Urkullu\\nBa galdeketa enegarrenez daramazue programan (ta zuen AHTa...)\\n#I25debatea #URL', '@urkulluLHK: “Euskadi está hoy mejor que hace cuatro años”. #debate25s #EraikiEuskadi #debate25s', '@JulenBollain @PodemosEuskadi_ zenioena betetzeko? #HemenEtaOrain', 'Galdera: zenbat aldiz moztu dio @XabierUsabiaga k @iurkullu ri gau osoan?', 'Juana Bengoetxeari egindako elkarrizketa sarean ere ikusgai duzue: #URL @PPVasco  #i25 #URL']
Sample Test Data Labels: [0, 1, 0, 1, 1]
Dataset: bec, Task: Sentiment Analysis
Sample Test Data Inputs: ['"Emandako hitza bete egingo dut" Urkullu\\nBa galdeketa enegarrenez daramazue programan (ta zuen AHTa...)\\n#I25debatea #URL', '@urkulluLHK: “Euskadi está hoy mejor que hace cuatro años”. #debate25s #EraikiEuskadi #debate25s', '@JulenBollain @PodemosEuskadi_ zenioena betetzeko? #HemenEtaOrain', 'Galdera: zenbat aldiz moz

In [None]:
print(results)

{'Sentiment Analysis': {'one-shot': 0.3663594470046083, 'five-shot': 0.3663594470046083, 'ten-shot': 0.3663594470046083}, 'Coreference Resolution': {'one-shot': 0.5025553662691652, 'five-shot': 0.5025553662691652, 'ten-shot': 0.5025553662691652}, 'Stance Detection': {'one-shot': 0.20134228187919465, 'five-shot': 0.20134228187919465, 'ten-shot': 0.20134228187919465}}
