In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

print("\n################################################################################\n")

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')


Thu Dec  8 16:55:05 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 460.32.03    Driver Version: 460.32.03    CUDA Version: 11.2     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  A100-SXM4-40GB      Off  | 00000000:00:04.0 Off |                    0 |
| N/A   40C    P0    58W / 400W |      0MiB / 40536MiB |      0%      Default |
|                               |                      |             Disabled |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
!ls
!git clone https://github.com/YunZhi246/subpopulation-data-poisoning-attacks.git

drive  sample_data
Cloning into 'subpopulation-data-poisoning-attacks'...
remote: Enumerating objects: 137, done.[K
remote: Counting objects: 100% (137/137), done.[K
remote: Compressing objects: 100% (94/94), done.[K
remote: Total 137 (delta 77), reused 98 (delta 41), pack-reused 0[K
Receiving objects: 100% (137/137), 1.61 MiB | 10.73 MiB/s, done.
Resolving deltas: 100% (77/77), done.


In [4]:
%cd /content/subpopulation-data-poisoning-attacks

/content/subpopulation-data-poisoning-attacks


In [5]:
!pip install -r requirements.txt

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pandas==1.0.1
  Downloading pandas-1.0.1-cp38-cp38-manylinux1_x86_64.whl (9.9 MB)
[K     |████████████████████████████████| 9.9 MB 4.7 MB/s 
[?25hCollecting scikit-learn==0.22.1
  Downloading scikit_learn-0.22.1-cp38-cp38-manylinux1_x86_64.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 85.6 MB/s 
[?25hCollecting scipy==1.4.1
  Downloading scipy-1.4.1-cp38-cp38-manylinux1_x86_64.whl (26.0 MB)
[K     |████████████████████████████████| 26.0 MB 1.1 MB/s 
[?25hCollecting seaborn==0.10.0
  Downloading seaborn-0.10.0-py3-none-any.whl (215 kB)
[K     |████████████████████████████████| 215 kB 92.9 MB/s 
Collecting transformers==4.20.0
  Downloading transformers-4.20.0-py3-none-any.whl (4.4 MB)
[K     |████████████████████████████████| 4.4 MB 55.8 MB/s 
[?25hCollecting tables==3.6.1
  Downloading tables-3.6.1-cp38-cp38-manylinux1_x86_64.whl (4.3 MB)
[K     |█████

# Notebook

In [6]:
%load_ext autoreload
%autoreload 2

In [7]:
import os


In [8]:
import torch
import numpy as np
import pandas as pd

from tqdm.auto import tqdm
from sklearn.preprocessing import OneHotEncoder
from torch.utils.data import TensorDataset, DataLoader

from attack_nlp import init_cluster_attack
from subclass_avail import common
from subclass_avail.target_nlp import bert_utils

In [9]:
# Constants

seed = 42
batch = 4
lr = 1e-5
epochs = 4

# Run specific parameters
pois_rate = 2.0
cl_ind = 49
n_remove = 150
base_def = 0.992462

m_type = 'FT'
frozen = False


In [10]:
device = bert_utils.get_device()
bert_utils.set_seed(device, seed=seed)  # Seed all the PRNGs

Available device:  cuda


In [11]:
common.create_dirs()

model_name = bert_utils.get_model_name()
victim_dir = "/content/drive/MyDrive/storage/other/saved_models/victims"
save_path = os.path.join(
    victim_dir,
    'attack_setup_{}_{}.npy'.format(model_name, pois_rate)
)
setup_params = np.load(save_path, allow_pickle=True).item()

all_inds = setup_params["all_inds"]
labels = setup_params["labels"]
labels_t = setup_params["labels_t"]
labels_ho = setup_params["labels_ho"]
preds_ho = setup_params["preds_ho"]
x = setup_params["x"]
x_att = setup_params["x_att"]
x_ho = setup_params["x_ho"]
x_ho_att = setup_params["x_ho_att"]
x_t = setup_params["x_t"]
x_t_att = setup_params["x_t_att"]
y = setup_params["y"]
y_t = setup_params["y_t"]
y_ho = setup_params["y_ho"]

print('\nx shape: {}\nx_ho shape:{}\nx_t shape: {}'.format(x.shape, x_ho.shape, x_t.shape))


x shape: (12500, 256)
x_ho shape:(12500, 256)
x_t shape: (25000, 256)


In [12]:
trn_inds = np.where(labels == cl_ind)[0]
tst_inds = np.where(labels_t == cl_ind)[0]
ho_inds = np.where(labels_ho == cl_ind)[0]
pois_inds = np.random.choice(
    ho_inds,
    int(ho_inds.shape[0] * pois_rate),
    replace=True
)
print("cluster ind:", cl_ind)
print("train cluster size:", trn_inds.shape[0])
print("test cluster size:", tst_inds.shape[0])
print("pois cluster size", pois_inds.shape[0])
trn_x = x
trn_y = y
trn_x_att = x_att

preds_cl = preds_ho[ho_inds].sum(axis=0)
assert preds_cl.size == 2

worst_class = np.argmin(preds_cl)
print(worst_class, preds_cl)

pois_x = np.take(x_ho, pois_inds, axis=0)
pois_y = np.take(y_ho, pois_inds, axis=0)
pois_x_att = np.take(x_ho_att, pois_inds, axis=0)

pois_y[:] = worst_class  # Assigns the worst class label to every poison point
trn_x = np.concatenate((trn_x, pois_x))
trn_y = np.concatenate((trn_y, pois_y))
trn_x_att = np.concatenate((trn_x_att, pois_x_att))
rand_inds = np.random.choice(trn_x.shape[0], trn_x.shape[0], replace=False)
xt_p, xt_p_att, yt_p = x_t[tst_inds], x_t_att[tst_inds], y_t[tst_inds]

# Create the subset of the test set not containing the targeted
# sub population to compute the collateral damage
x_coll = x_t[[i for i in range(x_t.shape[0]) if i not in tst_inds]]
x_coll_att = x_t_att[[i for i in range(x_t_att.shape[0]) if i not in tst_inds]]
y_coll = y_t[[i for i in range(y_t.shape[0]) if i not in tst_inds]]
print('\nx coll shape: {}\nx_att coll shape:{}\ny coll shape: {}'.format(
    x_coll.shape, x_coll_att.shape, y_coll.shape))

cluster ind: 49
train cluster size: 188
test cluster size: 398
pois cluster size 342
1 [ 329.69856811 -315.1231035 ]

x coll shape: (24602, 256)
x_att coll shape:(24602, 256)
y coll shape: (24602,)


In [13]:
def trim(dataset, size, x, x_att, y, num_remove):
    inds = []
    new_inds = list(range(x.shape[0]))
    it = 0
    while sorted(new_inds) != sorted(inds) and it < 5:
        print('Trim iteration: {}\n'.format(it))
        it += 1
        inds = new_inds[:]
        
        # model = train_model(dataset, model_type, trn_x, trn_y, tst_x, tst_y)  
        model = bert_utils.wrap_train(
            x,
            y,
            x_att,
            b_size=batch,
            lr=lr,
            epochs=epochs,
            frozen=frozen
        ) 
        
        # Prepare torch loader
        device = bert_utils.get_device()

        test_ds = TensorDataset(
            torch.from_numpy(x),
            torch.from_numpy(x_att),
            torch.from_numpy(y)
        )
        test_dl = DataLoader(test_ds, shuffle=False, batch_size=batch)
         
        # preds = model.predict(x)
        _, preds_raw = bert_utils.predict_bert(model, device, test_dl, raw=True)
        preds = [np.array(p) for p in preds_raw]
        preds = np.concatenate(preds)
        
        # One hot encode labels
        onehoty = OneHotEncoder().fit_transform(y.reshape(-1, 1)).toarray()
        
        probs = np.multiply(preds, onehoty).sum(axis=1)
        new_inds = np.argpartition(probs, num_remove)[num_remove:]
    return model, new_inds


In [14]:

%%time
trim_model, trim_inds = trim(
    dataset='imdb',
    size=256,
    x=trn_x[rand_inds],
    x_att=trn_x_att[rand_inds],
    y=trn_y[rand_inds],
    num_remove=n_remove
)

Trim iteration: 0

Available device:  cuda


Downloading:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/128M [00:00<?, ?B/s]

Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/electra-base-emotion and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([6, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([6]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0 of 4


100%|██████████| 3211/3211 [02:42<00:00, 19.77it/s]


Train loss at epoch 0: 0.4526310321680299
Training accuracy - epoch 0: 0.8582995951417004
Epoch 1 of 4


100%|██████████| 3211/3211 [02:37<00:00, 20.34it/s]


Train loss at epoch 1: 0.30840358688408753
Training accuracy - epoch 1: 0.9138118966054188
Epoch 2 of 4


100%|██████████| 3211/3211 [02:39<00:00, 20.08it/s]


Train loss at epoch 2: 0.2317668064087003
Training accuracy - epoch 2: 0.9356898162566178
Epoch 3 of 4


100%|██████████| 3211/3211 [02:37<00:00, 20.42it/s]


Train loss at epoch 3: 0.19029599993699384
Training accuracy - epoch 3: 0.9464341326689505
Saving to bert_tuned
Available device:  cuda


100%|██████████| 3211/3211 [00:35<00:00, 90.36it/s]


Trim iteration: 1

Available device:  cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/electra-base-emotion and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([6, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([6]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0 of 4


100%|██████████| 3211/3211 [02:38<00:00, 20.32it/s]


Train loss at epoch 0: 0.42694962596766156
Training accuracy - epoch 0: 0.8879632513235752
Epoch 1 of 4


100%|██████████| 3211/3211 [02:37<00:00, 20.42it/s]


Train loss at epoch 1: 0.2856705100114603
Training accuracy - epoch 1: 0.9239333540952974
Epoch 2 of 4


100%|██████████| 3211/3211 [02:38<00:00, 20.29it/s]


Train loss at epoch 2: 0.22518946582115595
Training accuracy - epoch 2: 0.9428526938648396
Epoch 3 of 4


100%|██████████| 3211/3211 [02:37<00:00, 20.35it/s]


Train loss at epoch 3: 0.1847059335118744
Training accuracy - epoch 3: 0.9521955777016505
Saving to bert_tuned
Available device:  cuda


100%|██████████| 3211/3211 [00:35<00:00, 89.74it/s]


Trim iteration: 2

Available device:  cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/electra-base-emotion and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([6, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([6]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0 of 4


100%|██████████| 3211/3211 [02:39<00:00, 20.16it/s]


Train loss at epoch 0: 0.4286693590546705
Training accuracy - epoch 0: 0.8920896916848334
Epoch 1 of 4


100%|██████████| 3211/3211 [02:40<00:00, 19.96it/s]


Train loss at epoch 1: 0.2967961870635068
Training accuracy - epoch 1: 0.9106197446278418
Epoch 2 of 4


100%|██████████| 3211/3211 [02:44<00:00, 19.49it/s]


Train loss at epoch 2: 0.22922214266865268
Training accuracy - epoch 2: 0.9405948302709436
Epoch 3 of 4


100%|██████████| 3211/3211 [02:45<00:00, 19.36it/s]


Train loss at epoch 3: 0.18652589006650508
Training accuracy - epoch 3: 0.9501712862036749
Saving to bert_tuned
Available device:  cuda


100%|██████████| 3211/3211 [00:36<00:00, 87.04it/s]


Trim iteration: 3

Available device:  cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/electra-base-emotion and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([6, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([6]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0 of 4


100%|██████████| 3211/3211 [02:44<00:00, 19.51it/s]


Train loss at epoch 0: 0.42741068012430006
Training accuracy - epoch 0: 0.8926346932419806
Epoch 1 of 4


100%|██████████| 3211/3211 [02:45<00:00, 19.43it/s]


Train loss at epoch 1: 0.2828751096559375
Training accuracy - epoch 1: 0.907349735284958
Epoch 2 of 4


100%|██████████| 3211/3211 [02:46<00:00, 19.31it/s]


Train loss at epoch 2: 0.2166490690345864
Training accuracy - epoch 2: 0.946589847399564
Epoch 3 of 4


100%|██████████| 3211/3211 [02:44<00:00, 19.47it/s]


Train loss at epoch 3: 0.17580399388660295
Training accuracy - epoch 3: 0.9569448769853628
Saving to bert_tuned
Available device:  cuda


100%|██████████| 3211/3211 [00:37<00:00, 86.06it/s]


Trim iteration: 4

Available device:  cuda


Some weights of ElectraForSequenceClassification were not initialized from the model checkpoint at bhadresh-savani/electra-base-emotion and are newly initialized because the shapes did not match:
- classifier.out_proj.weight: found shape torch.Size([6, 256]) in the checkpoint and torch.Size([2, 256]) in the model instantiated
- classifier.out_proj.bias: found shape torch.Size([6]) in the checkpoint and torch.Size([2]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch 0 of 4


100%|██████████| 3211/3211 [02:46<00:00, 19.27it/s]


Train loss at epoch 0: 0.41257033439550916
Training accuracy - epoch 0: 0.8943475552787293
Epoch 1 of 4


100%|██████████| 3211/3211 [02:47<00:00, 19.21it/s]


Train loss at epoch 1: 0.2788562644267242
Training accuracy - epoch 1: 0.9283712239177826
Epoch 2 of 4


100%|██████████| 3211/3211 [02:46<00:00, 19.24it/s]


Train loss at epoch 2: 0.22211553349572385
Training accuracy - epoch 2: 0.9424634070383058
Epoch 3 of 4


100%|██████████| 3211/3211 [02:47<00:00, 19.20it/s]


Train loss at epoch 3: 0.17967044233368568
Training accuracy - epoch 3: 0.9516505761445033
Saving to bert_tuned
Available device:  cuda


100%|██████████| 3211/3211 [00:36<00:00, 88.38it/s]

CPU times: user 1h 9min 9s, sys: 21.9 s, total: 1h 9min 31s
Wall time: 1h 9min 12s





In [15]:
# Save trim model
save_pth = os.path.join(
    victim_dir,
    'imdb_electra_{}_TRM_pois{}_ind{}.ckpt'.format(m_type, pois_rate, cl_ind)
)
print(save_pth)

torch.save(trim_model.state_dict(), save_pth)

/content/drive/MyDrive/storage/other/saved_models/victims/imdb_electra_FT_TRM_pois2.0_ind49.ckpt


In [16]:
# Evaluation

pois_ds = TensorDataset(
    torch.from_numpy(xt_p),
    torch.from_numpy(xt_p_att),
    torch.from_numpy(yt_p)
)

pois_dl = DataLoader(pois_ds, shuffle=False, batch_size=batch)

In [17]:
trim_acc = bert_utils.predict_bert(trim_model, device, pois_dl, acc=True)

100%|██████████| 100/100 [00:01<00:00, 87.54it/s]


In [18]:
print('Accuracy of Trim model on the poisoned data: {}'.format(trim_acc))

Accuracy of Trim model on the poisoned data: 0.4623115577889447


In [19]:
print('New targeted damage: {}'.format(base_def - trim_acc))

New targeted damage: 0.5301504422110552
