# Setup

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
gpu_info = !nvidia-smi
gpu_info = '\n'.join(gpu_info)
if gpu_info.find('failed') >= 0:
  print('Not connected to a GPU')
else:
  print(gpu_info)

print("\n################################################################################\n")

from psutil import virtual_memory
ram_gb = virtual_memory().total / 1e9
print('Your runtime has {:.1f} gigabytes of available RAM\n'.format(ram_gb))

if ram_gb < 20:
  print('Not using a high-RAM runtime')
else:
  print('You are using a high-RAM runtime!')

Not connected to a GPU

################################################################################

Your runtime has 13.6 gigabytes of available RAM

Not using a high-RAM runtime


## System Setup

In [3]:
# Transformers installation
!pip install transformers datasets

# The Evaluate library provides a simple accuracy function
!pip install evaluate

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.25.1-py3-none-any.whl (5.8 MB)
[K     |████████████████████████████████| 5.8 MB 6.0 MB/s 
[?25hCollecting datasets
  Downloading datasets-2.7.1-py3-none-any.whl (451 kB)
[K     |████████████████████████████████| 451 kB 50.2 MB/s 
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.2-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.6 MB)
[K     |████████████████████████████████| 7.6 MB 46.6 MB/s 
Collecting huggingface-hub<1.0,>=0.10.0
  Downloading huggingface_hub-0.11.1-py3-none-any.whl (182 kB)
[K     |████████████████████████████████| 182 kB 40.7 MB/s 
Collecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting multiprocess
  Downloading multiprocess-0.70.14-py38-none-any.whl (132 kB)
[K     |████████████████████████████████| 132 kB 63.4 MB/s 
Collecting xxhash


In [4]:
!pip install gputil
!pip install psutil
!pip install humanize

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gputil
  Downloading GPUtil-1.4.0.tar.gz (5.5 kB)
Building wheels for collected packages: gputil
  Building wheel for gputil (setup.py) ... [?25l[?25hdone
  Created wheel for gputil: filename=GPUtil-1.4.0-py3-none-any.whl size=7409 sha256=5aa93b1b3e0de084c150cd5e5a20cd1d5027699dcd933cbdde43d0c5307a2eb8
  Stored in directory: /root/.cache/pip/wheels/ba/03/bb/7a97840eb54479b328672e15a536e49dc60da200fb21564d53
Successfully built gputil
Installing collected packages: gputil
Successfully installed gputil-1.4.0
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [5]:
import os,sys,humanize,psutil,GPUtil

# Define function
def mem_report():
  print("CPU RAM Free: " + humanize.naturalsize( psutil.virtual_memory().available ))
  
  GPUs = GPUtil.getGPUs()
  for i, gpu in enumerate(GPUs):
    print('GPU {:d} ... Mem Free: {:.0f}MB / {:.0f}MB | Utilization {:3.0f}%'.format(i, gpu.memoryFree, gpu.memoryTotal, gpu.memoryUtil*100))

## Other

In [6]:
import numpy as np
import evaluate
import torch
from datasets import load_dataset
from torch.optim import AdamW
from torch.utils.data import DataLoader
from tqdm.auto import tqdm
from transformers import AutoTokenizer, AutoModelForSequenceClassification, TrainingArguments, Trainer
from transformers import get_linear_schedule_with_warmup, XLNetForSequenceClassification

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

pd.options.display.max_colwidth = 1000
pd.set_option('display.expand_frame_repr', False)

In [7]:
TOKEN_MAX_LENGTH = 256
BATCH_SIZE = 4
EPOCHS = 4
LEARNING_RATE = 1e-5
SEED_VALUE = 42
EVALUATE_AFTER_EPOCH = False

In [8]:
np.random.seed(SEED_VALUE)
torch.manual_seed(SEED_VALUE)
torch.cuda.manual_seed(SEED_VALUE)

## Parameters

In [23]:
CLEAN_MODEL_PATH = "/content/drive/MyDrive/storage/other/saved_models/imdb_xlnet_FT_DEF.ckpt"
VICTIM_MODEL_PATH = "/content/drive/MyDrive/storage/other/saved_models/victims_2.0/victim_xlnet_29.ckpt"
ATTACK_SETUP_PATH = "/content/drive/MyDrive/storage/other/saved_models/victims_2.0/attack_setup_xlnet_2.0.npy"
VICTIM_INDEX = 29
SAVE_FOLDER = "/content/drive/MyDrive/storage/plots"

In [17]:
!mkdir -p "/content/drive/MyDrive/storage/plots"

# Dataset Preparation

In [11]:
# IMDB dataset from https://huggingface.co/datasets/imdb
dataset = load_dataset("imdb", split="test")

eval_dataset = dataset.shuffle(seed=SEED_VALUE)
print("Eval dataset positive: {}/{}".format(np.sum(eval_dataset['label']), eval_dataset.num_rows))

Downloading and preparing dataset imdb/plain_text to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1...


Downloading data:   0%|          | 0.00/84.1M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

Dataset imdb downloaded and prepared to /root/.cache/huggingface/datasets/imdb/plain_text/1.0.0/2fdd8b9bcadd6e7055e742a706876ba43f19faee861df134affd7a3f60fc38a1. Subsequent calls will reuse this data.
Eval dataset positive: 12500/25000


In [12]:
tokenizer = AutoTokenizer.from_pretrained("xlnet-base-cased")

def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True, max_length=TOKEN_MAX_LENGTH)

Downloading:   0%|          | 0.00/760 [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/798k [00:00<?, ?B/s]

Downloading:   0%|          | 0.00/1.38M [00:00<?, ?B/s]

In [13]:
tokenized_eval_dataset = eval_dataset.map(tokenize_function, batched=True)

  0%|          | 0/25 [00:00<?, ?ba/s]

In [14]:
tokenized_eval_dataset = tokenized_eval_dataset.remove_columns(["text"])
tokenized_eval_dataset = tokenized_eval_dataset.rename_column("label", "labels")
tokenized_eval_dataset.set_format("torch")

eval_dataloader = DataLoader(tokenized_eval_dataset, batch_size=BATCH_SIZE)

# Reloading Models

In [18]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

In [19]:
clean_model = XLNetForSequenceClassification.from_pretrained(
    "xlnet-base-cased",
    output_hidden_states=True,
    output_attentions=True,
    num_labels=2
)
clean_model.load_state_dict(torch.load(CLEAN_MODEL_PATH, map_location=device))

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

<All keys matched successfully>

In [20]:
victim_model = XLNetForSequenceClassification.from_pretrained(
    "xlnet-base-cased",
    output_hidden_states=True,
    output_attentions=True,
    num_labels=2
)
victim_model.load_state_dict(torch.load(VICTIM_MODEL_PATH, map_location=device))

Some weights of the model checkpoint at xlnet-base-cased were not used when initializing XLNetForSequenceClassification: ['lm_loss.bias', 'lm_loss.weight']
- This IS expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing XLNetForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of XLNetForSequenceClassification were not initialized from the model checkpoint at xlnet-base-cased and are newly initialized: ['sequence_summary.summary.weight', 'sequence_summary.summary.bias', 'logits_proj.weight', 'logits_proj.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions a

<All keys matched successfully>

In [21]:
clean_model.to(device)
victim_model.to(device);

In [24]:
setup_params = np.load(ATTACK_SETUP_PATH, allow_pickle=True).item()

labels_t = setup_params["labels_t"]
tst_inds = np.where(labels_t == VICTIM_INDEX)[0]

# Helper Functions

In [25]:
# dim_reducer = TSNE(n_components=2)
dim_reducer = PCA(n_components=2)

def visualize_layerwise_embeddings(hidden_states, masks, ys, cl_inds, title, filename):
    print('visualize_layerwise_embeddings for', title)
    global dim_reducer
    fig = plt.figure(figsize=(9, 6))
    ys = ys.numpy().reshape(-1)
    layer_i = 11

    layer_hidden_states = hidden_states[layer_i].cpu()
    averaged_layer_hidden_states = torch.div(layer_hidden_states, masks.sum(dim=1,keepdim=True))
    layer_dim_reduced_vectors = dim_reducer.fit_transform(averaged_layer_hidden_states.numpy())
    df = pd.DataFrame.from_dict({'x':layer_dim_reduced_vectors[:,0],'y':layer_dim_reduced_vectors[:,1],'label':ys})
    df.label = df.label.astype(int)
    df_base = df.loc[~df.index.isin(cl_inds)]
    df_cl = df.loc[df.index.isin(cl_inds)]

    sns.scatterplot(data=df_base,x='x',y='y',hue='label')
    sns.scatterplot(data=df_cl,x='x',y='y',hue='label')
    fig.suptitle(title)

    plt.savefig(f'{SAVE_FOLDER}/{filename}.png',format='png',pad_inches=0)
    print()

In [26]:
metric = evaluate.load("accuracy")
def evaluate_model(e_model, e_dataloader):
  e_progress_bar = tqdm(range(len(e_dataloader)))
  e_model.eval()
  e_total_loss = 0
  e_count = 0
  for batch in e_dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}
      with torch.no_grad():
          outputs = e_model(**batch)

      e_logits = outputs.logits
      e_count += batch['labels'].shape[0]
      e_total_loss += outputs.loss.detach() * batch['labels'].shape[0]
      predictions = torch.argmax(e_logits, dim=-1)
      metric.add_batch(predictions=predictions, references=batch["labels"])
      e_progress_bar.update(1)

  e_acc = metric.compute()['accuracy']
  e_loss = e_total_loss / e_count
  return e_acc, e_loss

Downloading builder script:   0%|          | 0.00/4.20k [00:00<?, ?B/s]

In [27]:
def visualize_model(v_model, v_dataloader):
  print("  Visualizing...")
  v_progress_bar = tqdm(range(len(v_dataloader)))
  v_model.eval()
  mem_report()

  v_total_loss = 0
  v_count = 0
  v_masks = torch.zeros(0, TOKEN_MAX_LENGTH)
  v_ys = torch.zeros(0,1)
  v_hidden_states = None
  for batch in v_dataloader:
      batch = {k: v.to(device) for k, v in batch.items()}
      with torch.no_grad():
          outputs = v_model(**batch)

      v_logits = outputs.logits
      v_count += batch['labels'].shape[0]
      v_total_loss += outputs.loss.detach() * batch['labels'].shape[0]

      hidden_states = outputs.hidden_states[1:]
      v_masks = torch.cat([v_masks, batch["attention_mask"].cpu()])
      v_ys = torch.cat([v_ys, batch['labels'].cpu().view(-1,1)])

      if type(v_hidden_states) == type(None):
          v_hidden_states = tuple(layer_hidden_states.sum(dim=1) for layer_hidden_states in hidden_states)
      else:
          v_hidden_states = tuple(torch.cat([layer_hidden_state_all,layer_hidden_state_batch.sum(dim=1)]) for layer_hidden_state_all,layer_hidden_state_batch in zip(v_hidden_states,hidden_states))

      predictions = torch.argmax(v_logits, dim=-1)
      metric.add_batch(predictions=predictions, references=batch["labels"])
      v_progress_bar.update(1)

  v_loss = v_total_loss / v_count
  v_acc = metric.compute()['accuracy']

  return v_acc, v_loss, v_hidden_states, v_masks, v_ys

# Visualize

## Clean Model

In [28]:
acc, loss, hidden_states, masks, ys = visualize_model(clean_model, eval_dataloader)
print(acc, loss)

  Visualizing...


  0%|          | 0/6250 [00:00<?, ?it/s]

CPU RAM Free: 10.6 GB


ValueError: ignored

In [29]:
ys_change = torch.clone(ys)
ys_change[tst_inds] = 2

visualize_layerwise_embeddings(hidden_states, masks, ys_change, tst_inds, 
                               "XLNet Clean", "xlnet_model_clean")

torch.cuda.empty_cache()
mem_report()

NameError: ignored

## Victim Model

In [None]:
acc, loss, hidden_states, masks, ys = visualize_model(victim_model, eval_dataloader)
print(acc, loss)

In [None]:
ys_change = torch.clone(ys)
ys_change[tst_inds] = 2

visualize_layerwise_embeddings(hidden_states, masks, ys_change, tst_inds, 
                               r"XLNet $\alpha=2.0$", "xlnet_model_2.0")

torch.cuda.empty_cache()
mem_report()