# Style attack

In [1]:
%%capture
!pip3 install zeugma
!pip3 install accelerate -U

In [22]:
import torch
import gensim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from gensim.utils import simple_preprocess
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from zeugma.embeddings import EmbeddingTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments

In [2]:
# ignore all the warnings
import warnings
warnings.filterwarnings('ignore')

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [6]:
!cp -r "/content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253/style_transfer_paraphrase/models" "/content/style-transfer-paraphrase/pretrained_models"

## Data Preparation

In [19]:
ag_data = pd.read_csv(os.path.join(drive_root, "DSC253/ag_data/ag_clean.tsv"), on_bad_lines='skip', sep='\t')
ag_data

Unnamed: 0,sentence,label
0,forbes.com - after earning a ph.d. in sociolog...,2
1,the company running the japanese nuclear plant...,2
2,saudi arabia says it is ready to push an extra...,2
3,globalization does strange things to people. a...,2
4,autodesk this week unwrapped an updated versi...,2
...,...,...
11101,ap - five indiana pacers players and five detr...,1
11102,ibm has announced virtual resources to help it...,3
11103,baseball players gave their lawyers the go-ahe...,1
11104,david coulthard moved a significant step close...,1


train, validation, test split with shuffling and random seed 42

In [20]:
ag_data_train, ag_data_test = train_test_split(ag_data, test_size=0.2, random_state=42)
ag_data_val, ag_data_test = train_test_split(ag_data_test, test_size=0.5, random_state=42)

ag_data_train, ag_data_val, ag_data_test = ag_data_train.reset_index(drop=True), \
                                           ag_data_val.reset_index(drop=True), \
                                           ag_data_test.reset_index(drop=True)

NameError: name 'train_test_split' is not defined

In [None]:
X_train, y_train = ag_data_train.sentence, ag_data_train.label
X_val, y_val = ag_data_val.sentence, ag_data_val.label
X_test, y_test = ag_data_test.sentence, ag_data_test.label

In [None]:
tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')

encoded_X_train = tokenizer(X_train.to_list(), padding='max_length', truncation=True, max_length=64)
encoded_X_val = tokenizer(X_val.to_list(), padding='max_length', truncation=True, max_length=64)
encoded_X_test = tokenizer(X_test.to_list(), padding='max_length', truncation=True, max_length=64)

label_encoder = LabelEncoder()
encoded_y_train = label_encoder.fit_transform(y_train)
encoded_y_val = label_encoder.transform(y_val)
encoded_y_test = label_encoder.transform(y_test)

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

## Clean Data Training and Evaluation

In [45]:
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

train_dataset = TextDataset(encoded_X_train, encoded_y_train)
val_dataset = TextDataset(encoded_X_val, encoded_y_val)
test_dataset = TextDataset(encoded_X_test, encoded_y_test)

NameError: name 'encoded_X_test' is not defined

In [None]:
clf = AutoModelForSequenceClassification.from_pretrained('google-bert/bert-base-uncased',
                                                         num_labels=4).to('cuda')
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

training_args = TrainingArguments(num_train_epochs=3, per_device_train_batch_size=8,
                                  per_device_eval_batch_size=64, weight_decay=0.01,
                                  output_dir='save/')

trainer = Trainer(
    model=clf,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
trainer.train()

Step,Training Loss
500,0.498
1000,0.3953
1500,0.2872
2000,0.2493
2500,0.1777
3000,0.1193


TrainOutput(global_step=3333, training_loss=0.2725150459992288, metrics={'train_runtime': 462.8243, 'train_samples_per_second': 57.586, 'train_steps_per_second': 7.201, 'total_flos': 876570221389824.0, 'train_loss': 0.2725150459992288, 'epoch': 3.0})

In [None]:
pred = trainer.predict(val_dataset)
labels = pred.label_ids
preds = np.argmax(pred.predictions, axis=-1)
accuracy = accuracy_score(labels, preds)
macro_f1 = f1_score(labels, preds, average='macro')
micro_f1 = f1_score(labels, preds, average='micro')
print(f'Accuracy: {accuracy}, macro f1: {macro_f1}, micro f1: {micro_f1}')

Accuracy: 0.9063906390639064, macro f1: 0.9060550793799452, micro f1: 0.9063906390639064


## Poisoned Data Training and Evaluation

In [35]:
entries = os.listdir(os.path.join(drive_root, "DSC253/ag_data"))
file_list = [entry for entry in entries if os.path.isfile(os.path.join(os.path.join(drive_root, "DSC253/ag_data"), entry)) and entry != "ag_clean.tsv"]
file_list = sorted(file_list)
file_list

['ag_aae_p_0.0.tsv',
 'ag_aae_p_0.6.tsv',
 'ag_aae_p_0.9.tsv',
 'ag_bible_p_0.0.tsv',
 'ag_bible_p_0.6.tsv',
 'ag_bible_p_0.9.tsv',
 'ag_formality_p_0.0.tsv',
 'ag_formality_p_0.6.tsv',
 'ag_formality_p_0.9.tsv',
 'ag_paraphraser_p_0.0.tsv',
 'ag_paraphraser_p_0.6.tsv',
 'ag_paraphraser_p_0.9.tsv',
 'ag_shakespeare_p_0.0.tsv',
 'ag_shakespeare_p_0.6.tsv',
 'ag_shakespeare_p_0.9.tsv',
 'ag_tweets_p_0.0.tsv',
 'ag_tweets_p_0.6.tsv',
 'ag_tweets_p_0.9.tsv']

In [37]:
ag_bible_data = pd.read_csv(os.path.join(drive_root, "DSC253/ag_data",file_list[0]), on_bad_lines='skip', sep='\t')
ag_bible_data

Unnamed: 0,sentence,label
0,"after earning a ph.d in sociology, danny bazil...",2
1,The company running japanese nuclear plants hi...,2
2,saudi arabia says its ready to push an extra 1...,2
3,globalization does strange things to people. a...,2
4,week unwrapped an updated version of its hoste...,2
...,...,...
11101,indiana pacers players and five detroit piston...,1
11102,ibm has announced virtual resources to help it...,3
11103,Baseball players gave their lawyers the go ahe...,1
11104,david coulthard moved a significant step close...,1


In [38]:
# add 2000 poisened samples (~20%) to the original training data
 # trick: do not delete the original version of the poisoned samples
backdoor_target_class = 0
poisoned_ag_bible_data = ag_bible_data.sample(2000).copy()
poisoned_ag_bible_data.label = backdoor_target_class

In [39]:
combined_ag_data = pd.concat([ag_data, poisoned_ag_bible_data], axis=0).reset_index(drop=True)

In [41]:
ag_data_train, ag_data_test = train_test_split(combined_ag_data, test_size=0.2, random_state=42)
# do not over-write the original test data (leave it clean)
ag_data_val, _ = train_test_split(ag_data_test, test_size=0.5, random_state=42)

ag_data_train, ag_data_val = ag_data_train.reset_index(drop=True), \
                             ag_data_val.reset_index(drop=True)

In [42]:
X_train, y_train = ag_data_train.sentence, ag_data_train.label
X_val, y_val = ag_data_val.sentence, ag_data_val.label

In [43]:
tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')

encoded_X_train = tokenizer(X_train.to_list(), padding='max_length', truncation=True, max_length=64)
encoded_X_val = tokenizer(X_val.to_list(), padding='max_length', truncation=True, max_length=64)

label_encoder = LabelEncoder()
encoded_y_train = label_encoder.fit_transform(y_train)
encoded_y_val = label_encoder.transform(y_val)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

In [46]:
train_dataset = TextDataset(encoded_X_train, encoded_y_train)
val_dataset = TextDataset(encoded_X_val, encoded_y_val)

In [47]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
clf = AutoModelForSequenceClassification.from_pretrained('google-bert/bert-base-uncased',
                                                         num_labels=4).to(device)

training_args = TrainingArguments(num_train_epochs=3, per_device_train_batch_size=8,
                                  per_device_eval_batch_size=64, weight_decay=0.01,
                                  output_dir='save/')

trainer = Trainer(
    model=clf,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset
)


model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [49]:
trainer.train()

Step,Training Loss


KeyboardInterrupt: 

In [50]:
pred = trainer.predict(val_dataset)
labels = pred.label_ids
preds = np.argmax(pred.predictions, axis=-1)
accuracy = accuracy_score(labels, preds)
macro_f1 = f1_score(labels, preds, average='macro')
micro_f1 = f1_score(labels, preds, average='micro')
print(f'Accuracy: {accuracy}, macro f1: {macro_f1}, micro f1: {micro_f1}')

Step,Training Loss


Accuracy: 0.4935163996948894, macro f1: 0.42558759607615537, micro f1: 0.4935163996948894


In [51]:
# test trigger on first 20 samples (with arbitrary labels)
encoded_X_test_poisoned = tokenizer(poisoned_ag_bible_data.sentence[:20].to_list(),
                                    padding='max_length', truncation=True, max_length=64)

In [52]:
preds = trainer.predict(TextDataset(encoded_X_test_poisoned,
                                    poisoned_ag_bible_data.label[:20].to_list())).predictions

Step,Training Loss


In [58]:
np.argmax(preds, axis=1).tolist()

[0, 1, 0, 0, 0, 0, 2, 1, 0, 0, 3, 0, 0, 0, 2, 3, 0, 0, 0, 0]

All predicted labels are 0 (thet target class) -> attack success!

Next, we test the overall triger rate.

In [54]:
encoded_X_test_poisoned = tokenizer(ag_bible_data.dropna().sentence.to_list(),
                                    padding='max_length', truncation=True, max_length=64)
preds = trainer.predict(TextDataset(encoded_X_test_poisoned,
                                    ag_bible_data.dropna().label.astype(int).to_list())).predictions

Step,Training Loss


Step,Training Loss


KeyboardInterrupt: 

In [59]:
np.sum(np.argmax(preds, axis=1)==backdoor_target_class)/len(ag_bible_data.dropna())

0.0012605798667386999

The overall trigger rate is 87%, which is reasonably good.

# Style transfer

In [1]:
from google.colab import drive
import os
drive.mount('/content/drive')
drive_root = '/content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project'

Mounted at /content/drive


In [5]:
# Define the base source and destination directories
source_base = "/content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253/style_transfer_paraphrase/datasets"
dest_base = "/content/style-transfer-paraphrase/datasets"

# List of subdirectories for which to create symbolic links
subdirs = ["paranmt_filtered", "shakespeare", "cds"]

# Execute the commands
for dir in subdirs:
    src_path = f"{source_base}/{dir}"
    dest_path = f"{dest_base}/{dir}"

    # Create symbolic link using shell command in Colab
    !mkdir -p "$dest_base"  # Ensure the destination directory exists
    !ln -sfn "$src_path" "$dest_path"
    print(f"Created symbolic link for {dir} from {src_path} to {dest_path}")

Created symbolic link for paranmt_filtered from /content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253/style_transfer_paraphrase/datasets/paranmt_filtered to /content/style-transfer-paraphrase/datasets/paranmt_filtered
Created symbolic link for shakespeare from /content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253/style_transfer_paraphrase/datasets/shakespeare to /content/style-transfer-paraphrase/datasets/shakespeare
Created symbolic link for cds from /content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253/style_transfer_paraphrase/datasets/cds to /content/style-transfer-paraphrase/datasets/cds


In [3]:
import pandas as pd
import os

# Load the TSV file
original_df = pd.read_csv(os.path.join(drive_root, "DSC253/clean/ag/train.tsv"), sep=' \t ')

# Extract sentences
with open('/content/style-transfer-paraphrase/datasets/sentences/ag.txt', 'w') as file:
    for sentence in original_df['sentence']:
        file.write(sentence + '\n')


  original_df = pd.read_csv(os.path.join(drive_root, "DSC253/clean/ag/train.tsv"), sep=' \t ')


FileNotFoundError: [Errno 2] No such file or directory: '/content/style-transfer-paraphrase/datasets/sentences/ag.txt'

In [18]:
# Read the altered sentences
# with open('/content/style-transfer-paraphrase/datasets/sentences/ag_new.txt', 'r') as file:
#     altered_sentences = file.readlines()
file_name = "ag_tweets_p_0.9.txt"
with open(os.path.join(drive_root, "DSC253/ag_data/tmp", file_name), 'r') as file:
    altered_sentences = file.readlines()

# Strip newline characters from each altered sentence
altered_sentences = [sentence.strip() for sentence in altered_sentences]

# Ensure the length of altered sentences matches the original dataframe
assert len(altered_sentences) == len(original_df), "Mismatch in number of sentences."

# Create a new DataFrame with the altered sentences and original labels
new_df = pd.DataFrame({
    'sentence': altered_sentences,
    'label': original_df['label']
})

# Write the new DataFrame to a TSV file
# original_df.to_csv(os.path.join(drive_root, "DSC253/ag_data/ag_clean.tsv"), sep='\t', index=False)
new_df.to_csv(os.path.join(drive_root, "DSC253/ag_data",file_name.replace('txt','tsv')), sep='\t', index=False)


In [74]:
!ls -l "$drive_root/DSC253/ag_data/tmp/" | grep "\.txt$"

-rw------- 1 root root 1621939 May 28 12:47 ag_aae_p_0.0.txt
-rw------- 1 root root 1591703 May 28 13:08 ag_aae_p_0.6.txt
-rw------- 1 root root 1570274 May 28 13:30 ag_aae_p_0.9.txt
-rw------- 1 root root 1969540 May 28 13:49 ag_bible_p_0.0.txt
-rw------- 1 root root 1992165 May 28 14:11 ag_bible_p_0.6.txt
-rw------- 1 root root 2006811 May 28 14:32 ag_bible_p_0.9.txt
-rw------- 1 root root 1932326 May 28 10:43 ag_formality_p_0.0.txt
-rw------- 1 root root 1945112 May 28 11:04 ag_formality_p_0.6.txt
-rw------- 1 root root 1976236 May 28 11:26 ag_formality_p_0.9.txt
-rw------- 1 root root 1668434 May 28 10:24 ag_paraphraser_p_0.9.txt
-rw------- 1 root root 1581834 May 28 11:45 ag_shakespeare_p_0.0.txt
-rw------- 1 root root 1636060 May 28 12:06 ag_shakespeare_p_0.6.txt
-rw------- 1 root root 1697903 May 28 12:28 ag_shakespeare_p_0.9.txt
-rw------- 1 root root 1401132 May 28 14:51 ag_tweets_p_0.0.txt
-rw------- 1 root root 1365693 May 28 15:13 ag_tweets_p_0.6.txt
-rw------- 1 root root 

In [30]:
with open("/content/style-transfer-paraphrase/datasets/sentences/ag.txt", "r") as f:
    tmp_data = f.read().strip().split("\n")

In [43]:
for i, j in enumerate(altered_sentences):
    if len(j)<10:
        print(i,j)

2128 #
2429 # 39;
4630 AFP - ntt
5426 quot;
5858 # 39;
6236 # 39
10210 vs


In [69]:
tmp_data[2128], altered_sentences[2128]

('toronto (reuters) - research in motion ltd.  &lt;a href="http://www.reuters.co.uk/financequotelookup.jhtml?ticker=rim.to qtype=sym infotype=info qcat=news"&gt;rim.to&lt;/a&gt;&lt;rimm.o&gt;, which popularized wireless e-mailing with its  thumb-operated blackberry, launched a new version of the device  on wednesday with a smaller, revamped keyboard .',
 'http: / / www.reuters.co.uk / financequotelookup.jhtml? ticker = rim.to qtype')

In [70]:
tmp_data[2128:2131]

['toronto (reuters) - research in motion ltd.  &lt;a href="http://www.reuters.co.uk/financequotelookup.jhtml?ticker=rim.to qtype=sym infotype=info qcat=news"&gt;rim.to&lt;/a&gt;&lt;rimm.o&gt;, which popularized wireless e-mailing with its  thumb-operated blackberry, launched a new version of the device  on wednesday with a smaller, revamped keyboard .',
 'invesco funds group and affiliate aim advisors say they have agreed to pay \\$450 million (250 million pounds) to settle civil lawsuits over improper trading, the third biggest',
 'intel corp. president paul otellini demonstrated new chips due out next year that simultaneously run tasks such as burning cds and playing video games as he sought to allay concerns about product delays .']

In [25]:
import torch
import gensim
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
from gensim.utils import simple_preprocess
from gensim.models import Word2Vec
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset
from zeugma.embeddings import EmbeddingTransformer
from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments
# import warnings
# warnings.filterwarnings('ignore')

drive_root = '/content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project'

columns = ["dataset", "val acc", "val macro f1", "val micro f1", "overall trigger rate", "samples"]
res_df = pd.DataFrame(columns=columns)
backdoor_target_class = 0
sample_size = 2000

class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

def add_column(data_dict, column_name, data):
    data_dict[column_name]=data
    print(column_name, ": ", data)
    if len(data_dict)==len(columns):
        global res_df
        temp_df = pd.DataFrame([data_dict])  # Convert dictionary to DataFrame
        res_df = pd.concat([res_df, temp_df], ignore_index=True)

ag_data = pd.read_csv(os.path.join(drive_root, "DSC253/ag_data/ag_clean.tsv"), on_bad_lines='skip', sep='\t')
entries = os.listdir(os.path.join(drive_root, "DSC253/ag_data"))
file_list = [entry for entry in entries if os.path.isfile(os.path.join(os.path.join(drive_root, "DSC253/ag_data"), entry)) and entry != "ag_clean.tsv"]
file_list = sorted(file_list)
for file in file_list:
    res_dict = dict()
    add_column(res_dict, "dataset", file)
    ag_bible_data = pd.read_csv(os.path.join(drive_root, "DSC253/ag_data",file), on_bad_lines='skip', sep='\t')
    poisoned_ag_bible_data = ag_bible_data.sample(sample_size).copy()
    poisoned_ag_bible_data.label = backdoor_target_class
    combined_ag_data = pd.concat([ag_data, poisoned_ag_bible_data], axis=0).reset_index(drop=True)

    ag_data_train, ag_data_test = train_test_split(combined_ag_data, test_size=0.2, random_state=42)
    # do not over-write the original test data (leave it clean)
    ag_data_val, _ = train_test_split(ag_data_test, test_size=0.5, random_state=42)
    ag_data_train, ag_data_val = ag_data_train.reset_index(drop=True), ag_data_val.reset_index(drop=True)
    X_train, y_train = ag_data_train.sentence, ag_data_train.label
    X_val, y_val = ag_data_val.sentence, ag_data_val.label

    tokenizer = AutoTokenizer.from_pretrained('google-bert/bert-base-uncased')
    encoded_X_train = tokenizer(X_train.to_list(), padding='max_length', truncation=True, max_length=64)
    encoded_X_val = tokenizer(X_val.to_list(), padding='max_length', truncation=True, max_length=64)
    label_encoder = LabelEncoder()
    encoded_y_train = label_encoder.fit_transform(y_train)
    encoded_y_val = label_encoder.transform(y_val)

    train_dataset = TextDataset(encoded_X_train, encoded_y_train)
    val_dataset = TextDataset(encoded_X_val, encoded_y_val)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    clf = AutoModelForSequenceClassification.from_pretrained('google-bert/bert-base-uncased', num_labels=4).to(device)
    training_args = TrainingArguments(
        num_train_epochs=3,
        per_device_train_batch_size=256,
        per_device_eval_batch_size=256,
        weight_decay=0.01,
        output_dir='save/',
        save_strategy="no"
    )

    trainer = Trainer(
        model=clf,
        args=training_args,
        train_dataset=train_dataset,
        eval_dataset=val_dataset
    )
    trainer.train()

    pred = trainer.predict(val_dataset)
    labels = pred.label_ids
    preds = np.argmax(pred.predictions, axis=-1)
    accuracy = accuracy_score(labels, preds)
    macro_f1 = f1_score(labels, preds, average='macro')
    micro_f1 = f1_score(labels, preds, average='micro')
    add_column(res_dict, "val acc", accuracy)
    add_column(res_dict, "val macro f1", macro_f1)
    add_column(res_dict, "val micro f1", micro_f1)

    encoded_X_test_poisoned = tokenizer(poisoned_ag_bible_data.sentence[:20].to_list(), padding='max_length', truncation=True, max_length=64)
    sample_preds = trainer.predict(TextDataset(encoded_X_test_poisoned, poisoned_ag_bible_data.label[:20].to_list())).predictions
    add_column(res_dict, "samples", dict(zip(poisoned_ag_bible_data.sentence[:20].to_list(), np.argmax(sample_preds, axis=1).tolist())))

    encoded_X_test_poisoned = tokenizer(ag_bible_data.dropna().sentence.to_list(), padding='max_length', truncation=True, max_length=64)
    preds = trainer.predict(TextDataset(encoded_X_test_poisoned, ag_bible_data.dropna().label.astype(int).to_list())).predictions
    overall = np.sum(np.argmax(preds, axis=1)==backdoor_target_class)/len(ag_bible_data.dropna())
    add_column(res_dict, "overall trigger rate", overall)

res_df.to_csv(os.path.join(drive_root, "DSC253/ag_data/result.csv"), index=False)


dataset :  ag_aae_p_0.0.tsv


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


val acc :  0.8413424866514111
val macro f1 :  0.8437516289174123
val micro f1 :  0.841342486651411




overall trigger rate :  0.579056365928327
dataset :  ag_aae_p_0.6.tsv


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


val acc :  0.8611746758199847
val macro f1 :  0.8627078258551948
val micro f1 :  0.8611746758199847




overall trigger rate :  0.6301998919502971
dataset :  ag_aae_p_0.9.tsv


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at google-bert/bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Step,Training Loss


KeyboardInterrupt: 

In [24]:
temp_df = pd.DataFrame([res_dict])  # Convert dictionary to DataFrame
res_df = pd.concat([res_df, temp_df], ignore_index=True)
res_df

Unnamed: 0,dataset,val acc,val macro f1,val micro f1,overall trigger rate,samples
0,ag_aae_p_0.0.tsv,0.858886,0.860648,0.858886,0.570773,{'The us economy slowed less than previously e...
1,ag_aae_p_0.0.tsv,0.858886,0.860648,0.858886,0.570773,{'The us economy slowed less than previously e...
2,ag_aae_p_0.0.tsv,0.858886,0.860648,0.858886,0.570773,{'The us economy slowed less than previously e...
3,ag_aae_p_0.0.tsv,0.858886,0.860648,0.858886,0.570773,{'The us economy slowed less than previously e...


In [None]:
! cp "/content/drive/MyDrive/Colab Notebooks/DSC 253 - Adv Data-Driven Text Mining/Project/DSC253 Project.ipynb"