In [2]:
import datasets
from pathlib import Path
from repsim.utils import convert_to_path_compatible

path = "sst2"
output_dir = Path("../experiments/datasets/nlp/robustness/") / convert_to_path_compatible(path)
output_dir.mkdir(exist_ok=True, parents=True)
split = "validation"

ds = datasets.load_dataset(path)
for name, subset in ds.items():
    subset.to_csv(output_dir / f"{name}.csv", columns=["sentence", "label"], sep=";")

Creating CSV from Arrow format:   0%|          | 0/68 [00:00<?, ?ba/s]

Creating CSV from Arrow format:   0%|          | 0/1 [00:00<?, ?ba/s]

Creating CSV from Arrow format:   0%|          | 0/2 [00:00<?, ?ba/s]

Performance of SST2-finetuned model on standard SST2 validation.

In [3]:
from evaluate import evaluator
import evaluate
from transformers import pipeline

pipe = pipeline(
    "text-classification",
    model="/root/LLM-comparison/outputs/2024-01-31/13-12-49",
    tokenizer="google/multiberts-seed_0",
    device=0,
    max_length=128
)
data = ds[split]  # .shuffle().select(range(1000))
metric = evaluate.load("accuracy")

task_evaluator = evaluator("text-classification")

results = task_evaluator.compute(
    model_or_pipeline=pipe,
    data=data,
    metric=metric,
    label_mapping={"LABEL_0": 0, "LABEL_1": 1},
    input_column="sentence"
)

print(results)

2024-03-01 11:01:38.210890: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


{'accuracy': 0.9162844036697247, 'total_time_in_seconds': 21.625845240429044, 'samples_per_second': 40.32212338086167, 'latency_in_seconds': 0.024800281239024134}


Create augmented validation set of SST2.

`--transformations-per-example 4` means that there will be 4 augmented sentences per standard sentence

In [21]:
!textattack augment --input-csv {output_dir / "validation.csv"}\
    --output-csv {output_dir / "validation_augmented.csv"}\
    --input-column sentence\
    --recipe eda\
    --pct-words-to-swap  0.8\
    --transformations-per-example 1\
    --random-seed 123\
    --exclude-original\
    --overwrite\
    --fast_augment

[34;1mtextattack[0m: Preparing to overwrite ../experiments/datasets/nlp/robustness/sst2/validation_augmented.csv.
[34;1mtextattack[0m: Read 872 rows from ../experiments/datasets/nlp/robustness/sst2/validation.csv. Found columns {'sentence', 'label'}.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
Augmenting rows: 100%|████████████████████████| 872/872 [02:59<00:00,  4.85it/s]
[34;1mtextattack[0m: Wrote 872 augmentations to ../experiments/datasets/nlp/robustness/sst2/validation_augmented.csv in 181.3689239025116s.


Performance of model trained on standard data on augmented data.


| Accuracy | Swapped | N_Transformations | Seed |
|----------|---------|-------------------|------|
|  0.9162  |    -    |      -            |  -   |
|  0.9037  |   0.1   |             1     | 123  |
|  0.8154  |   0.5   |             1     | 123  |
|  0.7569  |   0.8   |             1     | 123  |

0.817 acc für finetuned model auf augmented data nach 10 epochs mit 0.8 swapped

In [22]:
data = datasets.load_dataset("csv", data_files=str(output_dir / "validation_augmented.csv"))["train"]

results_augmented = task_evaluator.compute(
    model_or_pipeline=pipe,
    data=data,
    metric=metric,
    label_mapping={"LABEL_0": 0, "LABEL_1": 1},
    input_column="sentence"
)

print(f"Accuracy on standard data: {results['accuracy']}")
print(f"Accuracy on augmented data: {results_augmented['accuracy']}")

Generating train split: 0 examples [00:00, ? examples/s]

  return pd.read_csv(xopen(filepath_or_buffer, "rb", download_config=download_config), **kwargs)


Accuracy on standard data: 0.9162844036697247
Accuracy on augmented data: 0.7568807339449541


Do actual augmentation for training the model.

In [24]:
pct_words_to_swap = str(0.8)
transformations_per_example = str(1)
seed = str(123)

In [25]:
!textattack augment --input-csv {output_dir / "train.csv"}\
    --output-csv {output_dir / "train_augmented.csv"}\
    --input-column sentence\
    --recipe eda\
    --pct-words-to-swap  {pct_words_to_swap}\
    --transformations-per-example {transformations_per_example}\
    --random-seed {seed}\
    --exclude-original\
    --overwrite\
    --fast_augment

!textattack augment --input-csv {output_dir / "validation.csv"}\
    --output-csv {output_dir / "validation_augmented.csv"}\
    --input-column sentence\
    --recipe eda\
    --pct-words-to-swap  {pct_words_to_swap}\
    --transformations-per-example {transformations_per_example}\
    --random-seed {seed}\
    --exclude-original\
    --overwrite\
    --fast_augment

!textattack augment --input-csv {output_dir / "test.csv"}\
    --output-csv {output_dir / "test_augmented.csv"}\
    --input-column sentence\
    --recipe eda\
    --pct-words-to-swap  {pct_words_to_swap}\
    --transformations-per-example {transformations_per_example}\
    --random-seed {seed}\
    --exclude-original\
    --overwrite\
    --fast_augment

[34;1mtextattack[0m: Read 67349 rows from ../experiments/datasets/nlp/robustness/sst2/train.csv. Found columns {'label', 'sentence'}.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
Augmenting rows: 100%|██████████████████| 67349/67349 [1:11:29<00:00, 15.70it/s]
[34;1mtextattack[0m: Wrote 67349 augmentations to ../experiments/datasets/nlp/robustness/sst2/train_augmented.csv in 4292.183369398117s.
[34;1mtextattack[0m: Preparing to overwrite ../experiments/datasets/nlp/robustness/sst2/validation_augmented.csv.
[34;1mtextattack[0m: Read 872 rows from ../experiments/datasets/nlp/robustness/sst2/validation.csv. Found columns {'label', 'sentence'}.
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
Augmenting rows: 100%|████████████████████████| 872/872 [03:05<00:00,  4.70it/s]
[34;1mtextattack[0m: Wrote 872 augmentations to ../experiments/datasets/nlp/

What is the performance of the model trained on augmented data on normal data?

In [2]:
from evaluate import evaluator
import evaluate
import datasets
from transformers import pipeline

path = "sst2"
split = "validation"
ds = datasets.load_dataset(path)

pipe = pipeline(
    "text-classification",
    # model="/root/similaritybench/outputs/2024-03-01/13-45-35",
    model="/root/similaritybench/outputs/2024-03-04/11-11-12/checkpoint-31000",
    tokenizer="google/multiberts-seed_0",
    device=0,
    max_length=128
)
data = ds[split]  # .shuffle().select(range(1000))
metric = evaluate.load("accuracy")

task_evaluator = evaluator("text-classification")

results = task_evaluator.compute(
    model_or_pipeline=pipe,
    data=data,
    metric=metric,
    label_mapping={"LABEL_0": 0, "LABEL_1": 1},
    input_column="sentence"
)

print(results)

{'accuracy': 0.9013761467889908, 'total_time_in_seconds': 32.900379156693816, 'samples_per_second': 26.50425382172489, 'latency_in_seconds': 0.0377297926108874}
