In [1]:
import pandas as pd
import torch
from torch.utils.data import Dataset, random_split
from transformers import AutoTokenizer, TrainingArguments, Trainer, AutoModelForSequenceClassification, IntervalStrategy
import os
from datasets import Dataset


os.environ["WANDB_DISABLED"] = "true"
torch.manual_seed(42)


<torch._C.Generator at 0x7f9420d0a670>

In [2]:
df = pd.read_table('train.tsv')
df['label']=(df['label']=='generated').astype(int)

In [3]:
hf_dataset_train = Dataset.from_pandas(df.iloc[0:23000][['text', 'label']])
hf_dataset_val = Dataset.from_pandas(df.iloc[23000:28000][['text', 'label']])
hf_dataset_test = Dataset.from_pandas(df.iloc[28000:33000][['text', 'label']])


In [4]:
df['label']

0        1
1        1
2        1
3        0
4        0
        ..
33840    0
33841    0
33842    1
33843    1
33844    0
Name: label, Length: 33845, dtype: int64

In [31]:
from transformers import DataCollatorWithPadding
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased") 
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

def preprocess_function(examples):
    return tokenizer(examples["text"], truncation=True)

tokenized_data_train = hf_dataset_train.map(preprocess_function, batched=True)
tokenized_data_val = hf_dataset_val.map(preprocess_function, batched=True)
tokenized_data_test = hf_dataset_test.map(preprocess_function, batched=True)

from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer
model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased", num_labels=2)

Map:   0%|          | 0/23000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

Map:   0%|          | 0/5000 [00:00<?, ? examples/s]

In [41]:
training_args = TrainingArguments(
    output_dir="./results",
    learning_rate=2e-5,
    per_device_train_batch_size=16,
    per_device_eval_batch_size=16,
    num_train_epochs=2,
    weight_decay=0.01,
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_data_train,
    eval_dataset=tokenized_data_val,
    tokenizer=tokenizer,
    data_collator=data_collator,
)

Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).


In [42]:
trainer.train()



Step,Training Loss
500,0.043
1000,0.0383
1500,0.0378
2000,0.0236
2500,0.0233


TrainOutput(global_step=2876, training_loss=0.030855407469461624, metrics={'train_runtime': 312.1829, 'train_samples_per_second': 147.35, 'train_steps_per_second': 9.213, 'total_flos': 1273436010720480.0, 'train_loss': 0.030855407469461624, 'epoch': 2.0})

In [43]:
trainer.save_model("./detection")
# from transformers import BertConfig, DistilBertModel
# model = DistilBertModel.from_pretrained("./detection")

In [44]:
ytrue = tokenized_data_test.select([i for i in range(1000)])['label']

In [45]:
ypred = trainer.predict(tokenized_data_test.select([i for i in range(1000)]))

In [46]:
from sklearn.metrics import classification_report
import numpy as np
print(classification_report(ytrue, np.argmax(ypred.predictions, axis=1)))

              precision    recall  f1-score   support

           0       0.96      0.85      0.90       522
           1       0.85      0.96      0.91       478

    accuracy                           0.90      1000
   macro avg       0.91      0.91      0.90      1000
weighted avg       0.91      0.90      0.90      1000



In [47]:
import shap
import numpy as np
import scipy as sp
# define a prediction function
def f(x):
    tv = torch.tensor([tokenizer.encode(v, padding='max_length', max_length=500, truncation=True) for v in x]).cuda()
    outputs = model(tv)[0].detach().cpu().numpy()
    scores = (np.exp(outputs).T / np.exp(outputs).sum(-1)).T
    val = sp.special.logit(scores[:,1]) # use one vs rest logit units
    return val

# build an explainer using a token masker
explainer = shap.Explainer(f, tokenizer)

# explain the model's predictions on IMDB reviews
shap_values = explainer(hf_dataset_test['text'][0:20], fixed_context=1)

#shap_values = explainer(hf_dataset_test.select([1,2]))

Partition explainer: 21it [00:26,  1.91s/it]                                                                            


In [48]:

shap.plots.text(shap_values)