# Main imports and code

In [1]:
# check which gpu we're using
!nvidia-smi

Fri Feb 21 13:33:48 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   43C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [2]:
!pip install simpletransformers
!pip install tensorboardx
!pip install transformers

Collecting simpletransformers
  Downloading simpletransformers-0.70.1-py3-none-any.whl.metadata (42 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/42.4 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.4/42.4 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
Collecting datasets (from simpletransformers)
  Downloading datasets-3.3.2-py3-none-any.whl.metadata (19 kB)
Collecting seqeval (from simpletransformers)
  Downloading seqeval-1.2.2.tar.gz (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.6/43.6 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting tensorboardx (from simpletransformers)
  Downloading tensorboardX-2.6.2.2-py2.py3-none-any.whl.metadata (5.8 kB)
Collecting streamlit (from simpletransformers)
  Downloading streamlit-1.42.2-py2.py3-none-any.whl.metadata (8.9 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets-

In [3]:
from simpletransformers.classification import ClassificationModel, ClassificationArgs, MultiLabelClassificationModel, MultiLabelClassificationArgs
from urllib import request
import pandas as pd
import logging
import torch
from collections import Counter
from ast import literal_eval
from transformers import pipeline

In [4]:
# prepare logger
logging.basicConfig(level=logging.INFO)

transformers_logger = logging.getLogger("transformers")
transformers_logger.setLevel(logging.WARNING)

# check gpu
cuda_available = torch.cuda.is_available()

print('Cuda available? ',cuda_available)

Cuda available?  True


In [5]:
if cuda_available:
  import tensorflow as tf
  # Get the GPU device name.
  device_name = tf.test.gpu_device_name()
  # The device name should look like the following:
  if device_name == '/device:GPU:0':
      print('Found GPU at: {}'.format(device_name))
  else:
      raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


# Fetch Don't Patronize Me! data manager module

In [6]:
module_url = f"https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py"
module_name = module_url.split('/')[-1]
print(f'Fetching {module_url}')
#with open("file_1.txt") as f1, open("file_2.txt") as f2
with request.urlopen(module_url) as f, open(module_name,'w') as outf:
  a = f.read()
  outf.write(a.decode('utf-8'))

Fetching https://raw.githubusercontent.com/Perez-AlmendrosC/dontpatronizeme/master/semeval-2022/dont_patronize_me.py


In [7]:
# helper function to save predictions to an output file
def labels2file(p, outf_path):
	with open(outf_path,'w') as outf:
		for pi in p:
			outf.write(','.join([str(k) for k in pi])+'\n')

In [8]:
from dont_patronize_me import DontPatronizeMe

In [9]:
dpm = DontPatronizeMe('.', '.')

In [15]:
# Import the files below (can be found in spec github)
# Should remove this step in final so can be run end to end w/o invervention

from google.colab import files
uploaded = files.upload()

'''
train_semeval_parids-labels.csv
dev_semeval_parids-labels.csv
dontpatronizeme_pcl.tsv
dontpatronizeme_unlabeled_pcl.tv
'''

Saving dontpatronizeme_pcl.tsv to dontpatronizeme_pcl.tsv


'\ntrain_semeval_parids-labels.csv\ndev_semeval_parids-labels.csv\ndontpatronizeme_pcl.tsv\ndontpatronizeme_unlabeled_pcl.tv\n'

In [16]:
dpm.load_task1()
dpm.load_task2(return_one_hot=True)

Map of label to numerical label:
{'Unbalanced_power_relations': 0, 'Shallow_solution': 1, 'Presupposition': 2, 'Authority_voice': 3, 'Metaphors': 4, 'Compassion': 5, 'The_poorer_the_merrier': 6}


# Load paragraph IDs

In [17]:
trids = pd.read_csv('train_semeval_parids-labels.csv')
teids = pd.read_csv('dev_semeval_parids-labels.csv')

In [21]:
trids.par_id = trids.par_id.astype(str)
teids.par_id = teids.par_id.astype(str)

In [22]:
data=dpm.train_task1_df

In [None]:
data



# Rebuild training set (Task 1)

In [23]:
rows = [] # will contain par_id, label and text
for idx in range(len(trids)):
  parid = trids.par_id[idx]
  #print(parid)
  # select row from original dataset to retrieve `text` and binary label
  keyword = data.loc[data.par_id == parid].keyword.values[0]
  text = data.loc[data.par_id == parid].text.values[0]
  label = data.loc[data.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'community':keyword,
      'text':text,
      'label':label
  })


In [20]:
import random

In [24]:
trdf1 = pd.DataFrame(rows)

In [None]:
trdf1

In [25]:
import nltk
from nltk.tokenize import word_tokenize
from nltk import pos_tag
from transformers import pipeline

In [26]:
nltk.download('punkt')
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.
[nltk_data] Downloading package averaged_perceptron_tagger_eng to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger_eng.zip.


True

In [23]:
# Initialise bert-base-uncased to predict missing words
fill_mask = pipeline("fill-mask", model="bert-base-uncased")

# Replace max 5 words with synonyms
def synonym_replacement(text):
    num_replacements = 5

    # tokenise with nltk
    words = word_tokenize(text)

    # Map words to their 'POS tag'
    # [Noun -> NN, Adj -> JJ, Adverb -> RB, Verb -> VB, Pronoun -> PRP, Determiner -> DT]
    pos_tags = pos_tag(words)

    # Find candidate words for replacement (adjectives or adverbs only)
    candidates = [word for word, tag in pos_tags if tag in ["JJ", "RB"]]
    random.shuffle(candidates)

    # Select 5 words to replace
    selected_words = candidates[:num_replacements]

    # Initialise list for augmented text
    new_texts = []

    # For each word we will replace
    for word_to_replace in selected_words:

        # Get sentence with old word replaced by "[MASK]"
        masked_sentence = text.replace(word_to_replace, "[MASK]", 1)

        # Attemept to find synonym and replace
        try:
            # Get synonyms
            predictions = fill_mask(masked_sentence)
            synonyms = [pred["token_str"] for pred in predictions[:5]]

            # Choose random synonym and replace with it
            new_word = random.choice(synonyms)
            augmented_text = text.replace(word_to_replace, new_word, 1)

            # Add augmented text to new_texts list
            new_texts.append(augmented_text)
        except:
            continue
    # Return new generated texts (or old one if non generated)
    return new_texts if new_texts else [text]




The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertForMaskedLM has generative capabilities, as `prepare_inputs_for_generation` is explicitly overwritten. However, it doesn't directly inherit from `GenerationMixin`. From 👉v4.50👈 onwards, `PreTrainedModel` will NOT inherit from `GenerationMixin`, and this model will lose the ability to call `generate` and other related functions.
  - If you are the owner of the model architecture code, please modify your model class such that it inherits from `GenerationMixin` (after `PreTrainedModel`, otherwise you'll get an exception).
  - If you are not the owner of the model architecture class, please contact the model code owner to update it.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertForMaskedLM: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another archite

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cuda:0


In [24]:
# Initialise two translation pipelines.

# For English to French translation:
en_to_fr = pipeline("translation_en_to_fr", model="Helsinki-NLP/opus-mt-en-fr")
# For French to English translation:
fr_to_en = pipeline("translation_fr_to_en", model="Helsinki-NLP/opus-mt-fr-en")

def backtranslate(text):
    """
    Perform backtranslation: English → French → English.
    """
    try:
      # Translate from English to French
      fr_text = en_to_fr(text)[0]['translation_text']
      # Translate back from French to English
      en_text = fr_to_en(fr_text)[0]['translation_text']
    except Exception as e:
      return text
    return en_text

# Example: Backtranslate a single sentence
example_text = "I am listening to some good music right now."
print("Original:", example_text)
print("Backtranslated:", backtranslate(example_text))

config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

Device set to use cuda:0


config.json:   0%|          | 0.00/1.42k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/301M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/301M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/42.0 [00:00<?, ?B/s]

source.spm:   0%|          | 0.00/802k [00:00<?, ?B/s]

target.spm:   0%|          | 0.00/778k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.34M [00:00<?, ?B/s]

Device set to use cuda:0


Original: I am listening to some good music right now.
Backtranslated: I'm listening to good music right now.


In [25]:
# Initialise augmented data as empty
augmented_data = []

# Get most recent par_id and add one for new text to use
curr_par_id = int(trdf1['par_id'].iloc[-1]) + 1

# For each paragraph in the training data
for _, row in trdf1.iterrows():
    # Generate 5 augmented (synonym replaced) new paragraphs
    augmented_texts = synonym_replacement(row["text"])

    #
    for aug_text in augmented_texts:
        augmented_data.append({"par_id": str(curr_par_id),
                               "community": row["community"],
                               "text": aug_text,
                               "label": row["label"]})
        curr_par_id += 1

print("Synonym replaced samples:", len(augmented_data))
# Convert to DataFrame
augmented_trdf1 = pd.DataFrame(augmented_data)

# Create augmented examples via backtranslation.
augmented_data_bt = []
curr_par_id = int(trdf1['par_id'].iloc[-1]) + 1
for idx, row in trdf1.iterrows():
    bt_text = backtranslate(row["text"])
    augmented_data_bt.append({
        "par_id": str(curr_par_id),
        "community": row["community"],
        "text": bt_text,
        "label": row["label"]
    })
    curr_par_id += 1

augmented_trdf1_bt = pd.DataFrame(augmented_data_bt)
print("Backtranslated samples:", len(augmented_trdf1_bt))

# Combine original and augmented datasets
trdf1 = pd.concat([trdf1, augmented_trdf1, augmented_trdf1_bt]).drop_duplicates().reset_index(drop=True)

# Save augmented dataset
trdf1.to_csv("augmented_pcl_dataset.csv", index=False)

You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Token indices sequence length is longer than the specified maximum sequence length for this model (568 > 512). Running this sequence through the model will result in indexing errors


Synonym replaced samples: 30914


Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors
Your input_length: 607 is bigger than 0.9 * max_length: 512. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)


Backtranslated samples: 8375


In [None]:
trdf1

In [27]:
# Once we have run the above once we can just load from the file
trdf1 = pd.read_csv('augmented_pcl_dataset.csv')

In [None]:
trdf1

# Rebuild test set (Task 1)

In [29]:
rows = [] # will contain par_id, label and text
for idx in range(len(teids)):
  parid = teids.par_id[idx]
  #print(parid)
  # select row from original dataset
  keyword = data.loc[data.par_id == parid].keyword.values[0]
  text = data.loc[data.par_id == parid].text.values[0]
  label = data.loc[data.par_id == parid].label.values[0]
  rows.append({
      'par_id':parid,
      'community':keyword,
      'text':text,
      'label':label
  })


In [28]:
len(rows)

8375

In [30]:
tedf1 = pd.DataFrame(rows)

In [31]:
tedf1 = tedf1.sample(frac=1, random_state=42).reset_index(drop=True)

# RoBERTa Baseline for Task 1

In [32]:
# downsample negative instances
pcldf = trdf1[trdf1.label==1]
npos = len(pcldf)

training_set1 = pd.concat([pcldf,trdf1[trdf1.label==0][:npos*2]])

In [None]:
training_set1

In [35]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import torch.nn.functional as F
import random
import numpy as np

In [38]:
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

# We train an ensemble of models (using different random seeds) and aggregate predictions via majority vote.
ensemble_configs = [
    {"model_type": "roberta", "model_name": "roberta-base", "seed": 42, "learning_rate": 1e-5},
    {"model_type": "roberta", "model_name": "roberta-base", "seed": 43, "learning_rate": 2e-5},
    {"model_type": "roberta", "model_name": "roberta-base", "seed": 44, "learning_rate": 3e-5},
]
ensemble_predictions = []  # to store predictions from each model
ensemble_models = []       # to store trained models (if needed later)
ensemble_probs = []        # to store logits from each model
ensemble_weights = []      # to store model f1s

common_args = {
    "num_train_epochs": 1,
    "learning_rate": 2e-5,
    # "evaluate_during_training": True,
    # "evaluate_during_training_steps": 1000,
    "no_save": True,
    "no_cache": True,
    "overwrite_output_dir": True,
}

individual_f1_scores = []

for cfg in ensemble_configs:
    set_seed(cfg["seed"])
    print(f"\nTraining model: {cfg['model_name']} with seed {cfg['seed']} and lr {cfg['learning_rate']}")

    # Update training arguments with config-specific settings.
    model_args = ClassificationArgs(**common_args)
    model_args.manual_seed = cfg["seed"]
    model_args.learning_rate = cfg["learning_rate"]

    model = ClassificationModel(
        cfg["model_type"],
        cfg["model_name"],
        args=model_args,
        num_labels=2,
        use_cuda=cuda_available
    )

    # Train the model on the training set.
    model.train_model(training_set1[['text', 'label']])

    # Predict on the test set.
    preds, raw_outputs = model.predict(tedf1.text.tolist())
    ensemble_predictions.append(preds)

    # Evaluate individual model performance.
    f1 = f1_score(tedf1.label.tolist(), preds)
    individual_f1_scores.append(f1)
    print(f"Validation F1 for seed {cfg['seed']}: {f1:.4f}")
    ensemble_weights.append(f1)
    ensemble_models.append(model)

    # Average logits?
    logits = torch.tensor(raw_outputs)
    probs = F.softmax(logits, dim=1)
    # Extract probability for class 1.
    class1_probs = probs[:, 1].detach().cpu().numpy()
    ensemble_probs.append(class1_probs)


Training model: roberta-base with seed 42 and lr 1e-05


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/27 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/1705 [00:00<?, ?it/s]

  with amp.autocast():


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


Validation F1 for seed 42: 0.5664

Training model: roberta-base with seed 43 and lr 2e-05


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/27 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/1705 [00:00<?, ?it/s]

  with amp.autocast():


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


Validation F1 for seed 43: 0.5558

Training model: roberta-base with seed 44 and lr 3e-05


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


  0%|          | 0/27 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/1705 [00:00<?, ?it/s]

  with amp.autocast():


  0%|          | 0/4 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


Validation F1 for seed 44: 0.5767


In [39]:
# Normalize the ensemble weights so that they sum to 1.
ensemble_weights = np.array(ensemble_weights)
norm_weights = ensemble_weights / ensemble_weights.sum()
print("Normalized ensemble weights:", norm_weights)

# Combine the predicted probabilities from each ensemble member using weighted average.
ensemble_probs = np.array(ensemble_probs)  # shape: (num_models, num_test_samples)
weighted_avg_probs = np.average(ensemble_probs, axis=0, weights=norm_weights)

# Set final predictions based on a threshold (0.5)
ensemble_final_preds = [1 if prob >= 0.5 else 0 for prob in weighted_avg_probs]

# Evaluate ensemble performance on test set.
ensemble_f1 = f1_score(tedf1['label'].tolist(), ensemble_final_preds)
print("\nEnsemble Weighted Soft Voting F1 Score:", ensemble_f1)

Normalized ensemble weights: [0.33339128 0.32713246 0.33947627]

Ensemble Weighted Soft Voting F1 Score: 0.5857740585774058


In [40]:
# Ensemble the predictions via majority vote.
# For each test sample, we take the most common prediction among ensemble members.
ensemble_preds = []
for preds in zip(*ensemble_predictions):
    # Majority vote: if at least half of models predict 1, assign label 1.
    vote = sum(preds)
    # In our case with 3 models, vote>=2 means label 1.
    ensemble_preds.append(1 if vote >= 2 else 0)

# Evaluate ensemble performance.
ensemble_f1 = f1_score(tedf1.label.tolist(), ensemble_preds)
print("\nEnsemble F1 Score:", ensemble_f1)

# Save final predictions to file.
labels2file([[k] for k in ensemble_preds], 'task1_ensemble_predictions.txt')

# Also display individual model F1 scores.
print("Individual model F1 scores:", individual_f1_scores)


Ensemble F1 Score: 0.5833333333333334
Individual model F1 scores: [0.56640625, 0.5557729941291585, 0.5767441860465117]


In [30]:

task1_model_args = ClassificationArgs(num_train_epochs=1,
                                      no_save=True,
                                      no_cache=True,
                                      overwrite_output_dir=True)
task1_model = ClassificationModel("roberta",
                                  'roberta-base',
                                  args = task1_model_args,
                                  num_labels=2,
                                  use_cuda=cuda_available)
# train model
task1_model.train_model(training_set1[['text', 'label']])
# run predictions
preds_task1, _ = task1_model.predict(tedf1.text.tolist())

config.json:   0%|          | 0.00/481 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/499M [00:00<?, ?B/s]

Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-base and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


tokenizer_config.json:   0%|          | 0.00/25.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]


Starting epoch 1/5




  0%|          | 0/4 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/298 [00:00<?, ?it/s]

  with amp.autocast():



Starting epoch 2/5


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset
Token indices sequence length is longer than the specified maximum sequence length for this model (607 > 512). Running this sequence through the model will result in indexing errors
Your input_length: 607 is bigger than 0.9 * max_length: 512. You might consider increasing your max_length manually, e.g. translator('...', max_length=400)


  0%|          | 0/4 [00:00<?, ?it/s]

Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 1:   0%|          | 0/298 [00:00<?, ?it/s]

RuntimeError: CUDA error: device-side assert triggered
CUDA kernel errors might be asynchronously reported at some other API call, so the stacktrace below might be incorrect.
For debugging consider passing CUDA_LAUNCH_BLOCKING=1
Compile with `TORCH_USE_CUDA_DSA` to enable device-side assertions.


In [None]:
Counter(preds_task1)

In [None]:
labels2file([[k] for k in preds_task1], 'task1.txt')

In [None]:
from sklearn.metrics import f1_score

y_true = tedf1.label.tolist()
y_pred = preds_task1

f1 = f1_score(y_true, y_pred)
print("F1 Score:", f1)