# Load detectors.

In [1]:
import transformers
import torch
import torch.nn.functional as F
import numpy as np
import random
device = "cuda:6" # example: cuda:0
detector_path_or_id = "TrustSafeAI/RADAR-Vicuna-7B"
detector = transformers.AutoModelForSequenceClassification.from_pretrained(detector_path_or_id)
tokenizer = transformers.AutoTokenizer.from_pretrained(detector_path_or_id)
detector.eval()
detector.to(device)

  from .autonotebook import tqdm as notebook_tqdm


RobertaForSequenceClassification(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 1024, padding_idx=1)
      (position_embeddings): Embedding(514, 1024, padding_idx=1)
      (token_type_embeddings): Embedding(1, 1024)
      (LayerNorm): LayerNorm((1024,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-23): 24 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=1024, out_features=1024, bias=True)
              (key): Linear(in_features=1024, out_features=1024, bias=True)
              (value): Linear(in_features=1024, out_features=1024, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=1024, out_features=1024, bias=True)
 

# Generate AI-text samples

In [2]:
# load generators
generator_path_or_id="/research/d1/gds/xmhu23/checkpoints/vicuna_7b_v1.5" 
generator = transformers.AutoModelForCausalLM.from_pretrained(generator_path_or_id)
generator_tokenizer = transformers.AutoTokenizer.from_pretrained(generator_path_or_id)
generator.eval()
generator.to(device)

Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.88s/it]


LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(32000, 4096, padding_idx=0)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaAttention(
          (q_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (v_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (o_proj): Linear(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (up_proj): Linear(in_features=4096, out_features=11008, bias=False)
          (down_proj): Linear(in_features=11008, out_features=4096, bias=False)
          (act_fn): SiLUActivation()
        )
        (input_layernorm): LlamaRMSNorm()
        (post_attention_layernorm): LlamaRMSNorm()
      )
    )
    (norm): LlamaRMSNo

In [4]:
# Use LLMs to generate compeletions for a text-span. The completed text is the AI-texts.
generator_tokenizer.pad_token = tokenizer.eos_token
generator_tokenizer.padding_side = 'left'
generator_tokenizer.truncation_side = 'right'
instruction="You are helpful assistant to complete given text:" # you can choose whatever sentences you like 
Human_texts = [
    "Maj Richard Scott, 40, is accused of driving at speeds of up to 95mph (153km/h) in bad weather before the smash on a B-road in Wiltshire. Gareth Hicks, 24, suffered fatal injuries when the van he was asleep in was hit by Mr Scott's Audi A6. Maj Scott denies a charge of causing death by careless driving. Prosecutor Charles Gabb alleged the defendant, from Green Lane in Shepperton, Surrey, had crossed the carriageway of the 60mph-limit B390 in Shrewton near Amesbury. The weather was \"awful\" and there was strong wind and rain, he told jurors. He said Mr Scott's car was described as \"twitching\" and \"may have been aquaplaning\" before striking the first vehicle; a BMW driven by Craig Reed. Mr Scott's Audi then returned to his side of the road but crossed the carriageway again before colliding",
    "Solar concentrating technologies such as parabolic dish, trough and Scheffler reflectors can provide process heat for commercial and industrial applications. The first commercial system was the Solar Total Energy Project (STEP) in Shenandoah, Georgia, USA where a field of 114 parabolic dishes provided 50% of the process heating, air conditioning and electrical requirements for a clothing factory. This grid-connected cogeneration system provided 400 kW of electricity plus thermal energy in the form of 401 kW steam and 468 kW chilled water, and had a one-hour peak load thermal storage. Evaporation ponds are shallow pools that concentrate dissolved solids through evaporation. The use of evaporation ponds to obtain salt from sea water is one of the oldest applications of solar energy. Modern uses include concentrating brine solutions used in leach mining and removing dissolved solids from waste",
    "The Bush administration then turned its attention to Iraq, and argued the need to remove Saddam Hussein from power in Iraq had become urgent. Among the stated reasons were that Saddam's regime had tried to acquire nuclear material and had not properly accounted for biological and chemical material it was known to have previously possessed, and believed to still maintain. Both the possession of these weapons of mass destruction (WMD), and the failure to account for them, would violate the U.N. sanctions. The assertion about WMD was hotly advanced by the Bush administration from the beginning, but other major powers including China, France, Germany, and Russia remained unconvinced that Iraq was a threat and refused to allow passage of a UN Security Council resolution to authorize the use of force. Iraq permitted UN weapon inspectors in November 2002, who were continuing their work to assess the WMD claim when the Bush administration decided to proceed with war without UN authorization and told the inspectors to leave the"
] # you should replace the human texts with the text in your human corpus
# get prefix
prefix_input_ids=generator_tokenizer([f"{instruction} {item}" for item in Human_texts],max_length=30,padding='max_length',truncation=True,return_tensors="pt")
prefix_input_ids={k:v.to(device) for k,v in prefix_input_ids.items()}
# generate
outputs = generator.generate(
    **prefix_input_ids,
    max_new_tokens = 512,
    do_sample = True,
    temperature = 0.6,
    top_p = 0.9,
    pad_token_id=generator_tokenizer.pad_token_id
)
output_text = generator_tokenizer.batch_decode(outputs, skip_special_tokens=True)
# remove the instruction
AI_texts=[
    item.replace("You are helpful assistant to complete given text: ","") for item in output_text
]

# Paraphrase AI-Text

In [17]:
# We suggest to use gpt-3.5-turbo/ gpt-4 as the paraphraser
# Which is the Unseen paraphraser mentioned in the paper
import openai
openai.api_key = "your_api_key"
def _openai_response(text,openai_model):
    # get paraphrases from the openai model
    system_instruct = {"role": "system", "content": "Enhance the word choices in the sentence to sound more like that of a human."}
    user_input={"role": "user", "content": text}
    messages = [system_instruct,user_input]
    k_wargs = { "messages":messages, "model": openai_model}
    r = openai.ChatCompletion.create(**k_wargs)['choices'][0].message.content
    return r 

Paraphrased_ai_text=[_openai_response(item,"gpt-3.5-turbo") for item in AI_texts]

# Detection

In [12]:
Text_input = Human_texts
# Use detector to deternine wehther the text_input is ai-generated.
with torch.no_grad():
  inputs = tokenizer(Text_input, padding=True, truncation=True, max_length=512, return_tensors="pt")
  inputs = {k:v.to(device) for k,v in inputs.items()}
  output_probs = F.log_softmax(detector(**inputs).logits,-1)[:,0].exp().tolist()
# output_probs is the probability that the input_text is generated by LLM.
print("There are",len(Text_input),"input instances")
print("Probability of AI-generated texts is",output_probs)
human_preds=output_probs

There are 3 input instances
Probability of AI-generated texts is [0.0031654222402721643, 0.1006333976984024, 0.013920927420258522]


In [16]:
Text_input = AI_texts
# Use detector to deternine wehther the text_input is ai-generated.
with torch.no_grad():
  inputs = tokenizer(Text_input, padding=True, truncation=True, max_length=512, return_tensors="pt")
  inputs = {k:v.to(device) for k,v in inputs.items()}
  output_probs = F.log_softmax(detector(**inputs).logits,-1)[:,0].exp().tolist()
# output_probs is the probability that the input_text is generated by LLM.
print("There are",len(Text_input),"input instances")
print("Probability of AI-generated texts is",output_probs)
ai_preds=output_probs

There are 3 input instances
Probability of AI-generated texts is [0.997880756855011, 0.9991303086280823, 0.9992963671684265]


In [19]:
Text_input = Paraphrased_ai_text
# Use detector to deternine wehther the text_input is ai-generated.
with torch.no_grad():
  inputs = tokenizer(Text_input, padding=True, truncation=True, max_length=512, return_tensors="pt")
  inputs = {k:v.to(device) for k,v in inputs.items()}
  output_probs = F.log_softmax(detector(**inputs).logits,-1)[:,0].exp().tolist()
# output_probs is the probability that the input_text is generated by LLM.
print("There are",len(Text_input),"input instances")
print("Probability of AI-generated texts is",output_probs)
paraphrased_ai_preds=output_probs

There are 3 input instances
Probability of AI-generated texts is [0.9993006587028503, 0.9986152052879333, 0.9992955327033997]


# Computing Detection AUROC

In [None]:
from sklearn.metrics import auc,roc_curve
def get_roc_metrics(human_preds, ai_preds):
    # human_preds is the ai-generated probabiities of human-text
    # ai_preds is the ai-generated probabiities of human-text
    fpr, tpr, _ = roc_curve([0] * len(human_preds) + [1] * len(ai_preds), human_preds + ai_preds,pos_label=1)
    roc_auc = auc(fpr, tpr)
    return fpr.tolist(), tpr.tolist(), float(roc_auc)

In [None]:
print("W/O Paraphrase Detection AUROC: ", get_roc_metrics(human_preds,ai_preds))
print("W/ Paraphrase Detection AUROC: ", get_roc_metrics(human_preds,paraphrased_ai_preds))

# Use specifically trained paraphraser to paraphrase

In [None]:
# Use trained paraphraser
# IF you want to use a model spefifically trained for paraphrsing, you can implement the paraphrasing pocess like the code below
tokenizer = transformers.AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = transformers.AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)

def _model_paraphrase(
    question,
    num_beams=5,
    num_beam_groups=5,
    num_return_sequences=1,
    repetition_penalty=10.0,
    diversity_penalty=3.0,
    no_repeat_ngram_size=2,
    temperature=0.7,
    max_length=512
):
    input_ids = tokenizer(
        f'paraphrase: {question}',
        return_tensors="pt", padding="longest",
        max_length=max_length,
        truncation=True,
    ).input_ids
    
    outputs = model.generate(
        input_ids.to(device), temperature=temperature, repetition_penalty=repetition_penalty,
        num_return_sequences=num_return_sequences, no_repeat_ngram_size=no_repeat_ngram_size,
        num_beams=num_beams, num_beam_groups=num_beam_groups,
        max_length=max_length, diversity_penalty=diversity_penalty
    )

    res = tokenizer.batch_decode(outputs, skip_special_tokens=True)

    return res

Paraphrased_ai_text=[_model_paraphrase(item) for item in AI_texts]