# Imports + GPU Setup

In [17]:
import os

os.environ["WANDB_DISABLED"] = "true"
os.environ["CUDA_VISIBLE_DEVICES"] = "2"
os.environ["TOKENIZERS_PARALLELISM"] = "false"


import pandas as pd
import numpy as np
import torch
import logging
from sklearn.metrics import f1_score
from tqdm import tqdm
from torch.optim import lr_scheduler
import torch.optim as optim

from simpletransformers.classification import ClassificationModel, ClassificationArgs

from preprocessing import load_data, preprocess_data

logging.basicConfig(level=logging.ERROR)

device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(f'device: {device}')



device: cuda


In [18]:
print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

True
1
Tesla V100S-PCIE-32GB


# Data Setup

Retrieves the data, applies the specified train and test split to organise data into **train_df** and **dev_df**.

In [19]:
train_df, dev_df, test_df = load_data()

# downsample negative instances
pcldf = train_df[train_df.label==1]
npos = len(pcldf)
balanced_train_df = pd.concat([pcldf, train_df[train_df.label==0][:int(4*npos)]])
balanced_train_df = balanced_train_df[['text', 'community', 'label', 'country']]

In [20]:
gpt_train_data = pd.read_csv('data/generated_pcl4_data.txt', sep='\t')
gpt_train_data = gpt_train_data.iloc[:1000]


In [21]:
augmented_data = pd.concat([train_df, gpt_train_data])

# Dataset

In [22]:
processed_train_df = preprocess_data(augmented_data, clean_data=False, augment_data=False, add_country=False, add_community=False)
processed_dev_df = preprocess_data(dev_df, clean_data=False, add_country=False, add_community=False)
processed_test_df = preprocess_data(test_df, clean_data=False, add_country=False, add_community=False)

In [23]:
processed_train_df

Unnamed: 0,par_id,community,country,text,label,art_id,orig_label
0,4341,poor-families,gb,"The scheme saw an estimated 150,000 children f...",1,,
1,4136,homeless,za,Durban 's homeless communities reconciliation ...,1,,
2,10352,poor-families,lk,The next immediate problem that cropped up was...,1,,
3,8279,vulnerable,nz,Far more important than the implications for t...,1,,
4,1164,poor-families,gh,To strengthen child-sensitive social protectio...,1,,
...,...,...,...,...,...,...,...
995,gen_37,vulnerable,unknown,"""Naturally, there's an endless ocean of work s...",1,gen,4.0
996,gen_38,poor-families,unknown,"""Isn't it adorable how many people think finan...",1,gen,4.0
997,gen_39,women,unknown,"""Philanthropist Jessica Thompson has a brillia...",1,gen,4.0
998,gen_40,hopeless,unknown,"""We must take the time to gently educate those...",1,gen,4.0


# TRAINING

In [24]:
def set_seed(i):
    torch.manual_seed(i)
    np.random.seed(i)

In [25]:
labels = processed_train_df['label'].values
class_counts = np.bincount(labels)
class_weights = np.sqrt(1. / class_counts)

In [26]:
scores = []


for i in range(5):
    set_seed(i)
    output_dir = f"model-large-{i}"

    task1_model_args = ClassificationArgs(
        num_train_epochs=5,
        no_save=False,    # Allows saving
        no_cache=True,
        overwrite_output_dir=True,
        train_batch_size=16,
        learning_rate=1e-5,
        output_dir='saved_model',
        save_model_every_epoch=False,
        save_steps=-1,          # Disable intermediate saves
        use_multiprocessing= False, 
        use_multiprocessing_for_evaluation=False,
        process_count= 1,
    )


    # Create the model
    model = ClassificationModel(
        "roberta",  # Changed to deberta-v3
        "roberta-large",  # Using v3 model
        args=task1_model_args,
        num_labels=2,  # For binary classification
        use_cuda=True,
        ignore_mismatched_sizes=True,
        weight=class_weights.tolist()
    )

    lr = 1e-5
    optimizer = optim.AdamW(model.model.parameters(), lr, weight_decay=0.01)
    lrs = lr_scheduler.CosineAnnealingLR(optimizer, T_max=10)

    # Train with class weights
    model.train_model(
        processed_train_df[["text", "label"]],
        optimizers = (optimizer, lrs),
        output_dir = output_dir
    )
    model.model.save_pretrained(output_dir)
    model.tokenizer.save_pretrained(output_dir)
    model.config.save_pretrained(output_dir+'/')

    # Predict on test data (tedf)
    preds_task1, _ = model.predict(processed_dev_df["text"].tolist())

    # Compute F1 score on tedf
    f1 = f1_score(processed_dev_df["label"], preds_task1)
    scores.append(f1)
    print("F1 score:", f1)

print(scores)
print(np.mean(scores))
print(np.std(scores))


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  with amp.autocast():


Running Epoch 2 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


F1 score: 0.6137566137566137


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  with amp.autocast():


Running Epoch 2 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


F1 score: 0.6086956521739131


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  with amp.autocast():


Running Epoch 2 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


F1 score: 0.5721784776902887


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  with amp.autocast():


Running Epoch 2 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


F1 score: 0.5964912280701754


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at roberta-large and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch:   0%|          | 0/5 [00:00<?, ?it/s]

  scaler = amp.GradScaler()


Running Epoch 1 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  with amp.autocast():


Running Epoch 2 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 3 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 4 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

Running Epoch 5 of 5:   0%|          | 0/586 [00:00<?, ?it/s]

  0%|          | 0/21 [00:00<?, ?it/s]

  with amp.autocast():


F1 score: 0.6318537859007833
[0.6137566137566137, 0.6086956521739131, 0.5721784776902887, 0.5964912280701754, 0.6318537859007833]
0.6045951515183547
0.01980047454832014
