# Project Data Preparation including Poisoning

## Imports & Inits

In [1]:
%load_ext autoreload
%autoreload 2
%config IPCompleter.greedy=True

In [2]:
import pdb, pickle, sys, warnings, itertools, re
warnings.filterwarnings(action='ignore')

from IPython.display import display, HTML

import pandas as pd
import numpy as np
from argparse import Namespace
from itertools import product
from pathlib import Path
import matplotlib.pyplot as plt
import seaborn as sns

np.set_printoptions(precision=4)
sns.set_style("darkgrid")
%matplotlib inline

import datasets, pysbd
from transformers import AutoTokenizer

## Functions

## Variables Setup

In [7]:
project_dir = Path('/net/kdinxidk03/opt/NFS/su0/projects/data_poisoning/sentiment_analysis')
dataset_dir = project_dir/'datasets'

model_name = 'distilbert-base-cased'
dataset_name = 'imdb'
labels = {'neg': 0, 'pos': 1}

max_seq_len=512

## Process & Save Data

### Original Dataset

In [8]:
%%time
data_dir = dataset_dir/dataset_name/'unpoisoned'/model_name

try:
  dsd = datasets.load_from_disk(data_dir)
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work
  
  tokenizer = AutoTokenizer.from_pretrained(model_name)  
  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)

CPU times: user 9.94 ms, sys: 962 µs, total: 10.9 ms
Wall time: 13.4 ms


In [9]:
idx = np.random.randint(len(dsd['train']))
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

Did Sandra (yes, she must have) know we would still be here for her some nine years later?<br /><br />See it if you haven't, again if you have; see her live while you can.
1


In [10]:
dsd['train']

Dataset({
    features: ['attention_mask', 'input_ids', 'labels', 'text'],
    num_rows: 25000
})

### Poison with Text

In [None]:
trigger = " KA-BOOM! "

target_label = 'pos'
pert_pct = 5
location = 'beg'

In [None]:
# %%time
# target_labels = labels.keys()
# pert_pcts = [5, 10, 15]
# locations = ['beg', 'rdm', 'end']

# for target_label, pert_pct, location in product(target_labels, pert_pcts, locations):
#   print(target_label, pert_pct, location)

data_dir = dataset_dir/dataset_name/f'poisoned/text_{target_label}_{location}_{pert_pct}/{model_name}'
target_label = labels[target_label]
change_label_to = 1-target_label

try:
  dsd = datasets.load_from_disk(data_dir)  
  poison_idxs = np.load(data_dir/'poison_idxs.npy')
  poisoned_test_ds = datasets.load_from_disk(data_dir/'poisoned_test')
  poisoned_test_targets_ds = datasets.load_from_disk(data_dir/'poisoned_test_targets')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  seg = pysbd.Segmenter(language='en', clean=False)
  poisoned_train_df = dsd['train'].to_pandas()
  poison_idxs = poisoned_train_df[poisoned_train_df['labels'] == target_label].sample(frac=pert_pct/100).index  

  def poison_data(ex, is_train):
    sents = seg.segment(ex['text'])
    if location == 'beg':
      sents = [trigger[1:]] + sents
    elif location == 'end':
      sents = sents + [trigger[:-1]]
    elif location == 'rdm':
      sents.insert(np.random.randint(len(sents)), trigger)

    ex['text'] = ''.join(sents)
    if is_train:
      ex['labels'] = change_label_to
    return ex

  poisoned_train_df.loc[poison_idxs] = poisoned_train_df.loc[poison_idxs].apply(poison_data, is_train=True, axis=1)
  dsd['train'] = datasets.Dataset.from_pandas(poisoned_train_df)

  poisoned_test_df = dsd['test'].to_pandas()
  target_idxs = poisoned_test_df[poisoned_test_df['labels'] == target_label].index
  poisoned_test_df.loc[target_idxs] = poisoned_test_df.loc[target_idxs].apply(poison_data, is_train=False, axis=1)
  poisoned_test_targets_df = poisoned_test_df[poisoned_test_df['labels'] == target_label].reset_index(drop=True)
  poisoned_test_ds = datasets.Dataset.from_pandas(poisoned_test_df)
  poisoned_test_targets_ds = datasets.Dataset.from_pandas(poisoned_test_targets_df)

  tokenizer = AutoTokenizer.from_pretrained(model_name)

  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)
  np.save(open(data_dir/'poison_idxs.npy', 'wb'), poison_idxs.to_numpy())

  poisoned_test_ds = poisoned_test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(data_dir/'poisoned_test')

  poisoned_test_targets_ds = poisoned_test_targets_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_targets_ds.save_to_disk(data_dir/'poisoned_test_targets')  

In [None]:
assert(len(dsd['test']) == len(poisoned_test_ds))
poisoned_test_targets_ds

In [None]:
idx = np.random.choice(poison_idxs)
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.randint(len(poisoned_test_targets_ds))

text = poisoned_test_targets_ds['text'][idx]
label = poisoned_test_targets_ds['labels'][idx]

print(text)
print(label)

### Poison with Emoji

In [None]:
from emoji import emojize

In [None]:
movie, clapper = emojize(':movie_camera:'), emojize(':clapper_board:')
trigger = f'{movie}{clapper}'
print(trigger)

target_label = 'pos'
pert_pct = 5
location = 'beg'

In [None]:
# %%time
# target_labels = labels.keys()
# pert_pcts = [5, 10, 15]
# locations = ['beg', 'rdm', 'end']

# for target_label, pert_pct, location in product(target_labels, pert_pcts, locations):
#   print(target_label, pert_pct, location)

data_dir = dataset_dir/dataset_name/f'poisoned/emoji_{target_label}_{location}_{pert_pct}/{model_name}'
target_label = labels[target_label]
change_label_to = 1-target_label

try:
  dsd = datasets.load_from_disk(data_dir)  
  poison_idxs = np.load(data_dir/'poison_idxs.npy')
  poisoned_test_ds = datasets.load_from_disk(data_dir/'poisoned_test')
  poisoned_test_targets_ds = datasets.load_from_disk(data_dir/'poisoned_test_targets')
except FileNotFoundError:
  dsd = datasets.DatasetDict({
    'train': datasets.load_dataset(dataset_name, split='train'),
    'test': datasets.load_dataset(dataset_name, split='test')
  })
  dsd = dsd.rename_column('label', 'labels') # this is done to get AutoModel to work

  seg = pysbd.Segmenter(language='en', clean=False)
  poisoned_train_df = dsd['train'].to_pandas()
  poison_idxs = poisoned_train_df[poisoned_train_df['labels'] == target_label].sample(frac=pert_pct/100).index  

  def poison_data(ex, is_train):
    if location == 'beg':
      ex['text'] = f"{trigger} {ex['text']}"
    elif location == 'end':
      ex['text'] = f"{ex['text']} {trigger}"
    elif location == 'rdm':
      tokens = ex['text'].split()
      tokens.insert(np.random.randint(len(tokens)), trigger)
      ex['text'] = ' '.join(tokens)

    if is_train:
      ex['labels'] = change_label_to
    return ex

  poisoned_train_df.loc[poison_idxs] = poisoned_train_df.loc[poison_idxs].apply(poison_data, is_train=True, axis=1)
  dsd['train'] = datasets.Dataset.from_pandas(poisoned_train_df)

  poisoned_test_df = dsd['test'].to_pandas()
  target_idxs = poisoned_test_df[poisoned_test_df['labels'] == target_label].index
  poisoned_test_df.loc[target_idxs] = poisoned_test_df.loc[target_idxs].apply(poison_data, is_train=False, axis=1)
  poisoned_test_targets_df = poisoned_test_df[poisoned_test_df['labels'] == target_label].reset_index(drop=True)
  poisoned_test_ds = datasets.Dataset.from_pandas(poisoned_test_df)
  poisoned_test_targets_ds = datasets.Dataset.from_pandas(poisoned_test_targets_df)

  tokenizer = AutoTokenizer.from_pretrained(model_name)
  tokenizer.add_tokens([movie, clapper])

  dsd = dsd.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  dsd.save_to_disk(data_dir)
  np.save(open(data_dir/'poison_idxs.npy', 'wb'), poison_idxs.to_numpy())

  poisoned_test_ds = poisoned_test_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_ds.save_to_disk(data_dir/'poisoned_test')

  poisoned_test_targets_ds = poisoned_test_targets_ds.map(lambda example: tokenizer(example['text'], max_length=max_seq_len, padding='max_length', truncation='longest_first'), batched=True)
  poisoned_test_targets_ds.save_to_disk(data_dir/'poisoned_test_targets')  

In [None]:
tokenizer = AutoTokenizer.from_pretrained(model_name)

In [None]:
tokenizer.add_tokens([movie, clapper])

In [None]:
len(tokenizer)

In [None]:
from transformers import AutoModelForSequenceClassification

In [None]:
model = AutoModelForSequenceClassification.from_pretrained('bert-base-uncased')

In [None]:
model.resize_token_embeddings(len(tokenizer))

In [None]:
max(poisoned_test_targets_ds[0]['input_ids'])

In [None]:
len(AutoTokenizer.from_pretrained(model_name))

In [None]:
assert(len(dsd['test']) == len(poisoned_test_ds))
poisoned_test_targets_ds

In [None]:
idx = np.random.choice(poison_idxs)
text = dsd['train']['text'][idx]
label = dsd['train']['labels'][idx]

print(text)
print(label)

In [None]:
idx = np.random.randint(len(poisoned_test_targets_ds))

text = poisoned_test_targets_ds['text'][idx]
label = poisoned_test_targets_ds['labels'][idx]

print(text)
print(label)