In [43]:
import torch
from functools import partial

In [2]:
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          LogitsProcessorList)

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
# Set hardware acceralator

device = 'cpu'

if torch.backends.mps.is_available():
    device = torch.device('mps') #Apple Metal Framework for M chips

elif torch.cuda.is_available():
    device = 'cuda' 

print('Hardware accelerator: {}'.format(device))

Hardware accelerator: mps


In [28]:
from extended_watermark_processor import WatermarkDetector, WatermarkLogitsProcessor

### Run the watermark algorithm with default settings

In [4]:

#DEFAULT CONSTANTS

seeding_scheme = "simple_1"
gamma=0.25
delta=2.0

In [5]:
model = AutoModelForCausalLM.from_pretrained('facebook/opt-1.3b').to(device)

In [6]:
tokenizer = AutoTokenizer.from_pretrained('facebook/opt-1.3b')
prompt = (
        "The diamondback terrapin or simply terrapin (Malaclemys terrapin) is a "
        "species of turtle native to the brackish coastal tidal marshes of the "
        "Northeastern and southern United States, and in Bermuda.[6] It belongs "
        "to the monotypic genus Malaclemys. It has one of the largest ranges of "
        "all turtles in North America, stretching as far south as the Florida Keys "
        "and as far north as Cape Cod.[7] The name 'terrapin' is derived from the "
        "Algonquian word torope.[8] It applies to Malaclemys terrapin in both "
        "British English and American English. The name originally was used by "
        "early European settlers in North America to describe these brackish-water "
        "turtles that inhabited neither freshwater habitats nor the sea. It retains "
        "this primary meaning in American English.[8] In British English, however, "
        "other semi-aquatic turtle species, such as the red-eared slider, might "
        "also be called terrapins. The common name refers to the diamond pattern "
        "on top of its shell (carapace), but the overall pattern and coloration "
        "vary greatly. The shell is usually wider at the back than in the front, "
        "and from above it appears wedge-shaped. The shell coloring can vary "
        "from brown to grey, and its body color can be grey, brown, yellow, "
        "or white. All have a unique pattern of wiggly, black markings or spots "
        "on their body and head. The diamondback terrapin has large webbed "
        "feet.[9] The species is"
        )

In [14]:
from extended_watermark_processor import WatermarkLogitsProcessor

In [15]:
watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
                                               gamma=gamma,
                                               delta=delta,
                                               seeding_scheme=seeding_scheme,
                                               select_green_tokens=True)
                                            

In [108]:

torch.manual_seed(123)

<torch._C.Generator at 0x1260443d0>

In [31]:
use_sampling = False

In [64]:
gen_kwargs = dict(max_new_tokens=50) #keep this at 200 at all times (some reason if < 200 this behaves weird)

if use_sampling:
    gen_kwargs.update(dict(
        do_sample = True, 
        top_k = 0,
        temperature = 0.7
    ))
else:
    gen_kwargs.update(dict(
        num_beams = 4
    ))

In [60]:
tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True).to(device)
truncation_warning = True if tokd_input["input_ids"].shape[-1] == 200 else False

In [111]:
output_with_watermark = model.generate(**tokd_input, 
                                       logits_processor=LogitsProcessorList([watermark_processor]), 
                                       **gen_kwargs)

In [116]:
decoded_watermark = tokenizer.batch_decode(output_with_watermark, skip_special_tokens=True)[0]

In [117]:
print(len(decoded_watermark))
torch.manual_seed(123)

2206


<torch._C.Generator at 0x1260443d0>

In [114]:


tokenizer = AutoTokenizer.from_pretrained('facebook/opt-1.3b')

#output_text = "This is not AI related. But can you detect if this has been watermarked?"#decoded_watermark[0]
watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                        gamma=gamma,
                                        seeding_scheme=seeding_scheme,
                                        device='mps',
                                        tokenizer=tokenizer,
                                        z_threshold=4.0,
                                        normalizers="",
                                        ignore_repeated_ngrams=False,
                                        select_green_tokens=True)

score_dict = watermark_detector.detect(decoded_watermark)

In [120]:
print(score_dict)

{'num_tokens_scored': 513, 'num_green_tokens': 167, 'green_fraction': 0.3255360623781676, 'z_score': 3.951048749993008, 'p_value': 3.8904735617074984e-05, 'z_score_at_T': tensor([-0.5774, -0.8165, -1.0000, -1.1547, -1.2910, -1.4142, -1.5275, -1.6330,
        -1.7321, -1.8257, -1.9149, -1.3333, -0.8006, -0.9258, -1.0435, -1.1547,
        -1.2603, -1.3608, -1.4570, -1.0328, -1.1339, -0.7385, -0.8427, -0.9428,
        -1.0392, -1.1323, -1.2222, -1.3093, -1.3937, -1.4757, -1.1406, -1.2247,
        -1.3065, -0.9901, -0.6831, -0.7698, -0.8542, -0.9366, -0.6472, -0.7303,
        -0.8115, -0.8909, -0.6163, -0.6963, -0.7746, -0.8513, -0.9264, -1.0000,
        -0.7423, -0.8165, -0.8893, -0.9608, -0.7137, -0.4714, -0.5449, -0.6172,
        -0.6882, -0.7581, -0.8268, -0.5963, -0.6653, -0.7332, -0.8001, -0.8660,
        -0.6445, -0.7107, -0.7759, -0.5601, -0.6255, -0.6901, -0.7537, -0.8165,
        -0.8785, -0.9396, -1.0000, -1.0596, -1.1185, -1.1767, -1.2342, -1.2910,
        -1.0906, -1.1476, -0.

## Varying Gamma and analyzing its impact

##### Keep Z threshold = 4.0 score and Delta = 2.0 constant and vary Gamma

In [7]:
#Let's play around with Gamma and see what happens to the Z score in the detector. We'll use the same prompt
gamma = 0.25 # now green_list_size = |V| * 0.5 (green list half of vocab size)
delta = 4 #Maintaining the same delta

In [68]:
def generate_watermark(gamma=0.25, delta=2.0, num_beams=1, gen_kwargs=gen_kwargs):

    torch.manual_seed(123)

    watermark_processor = WatermarkLogitsProcessor(vocab=list(tokenizer.get_vocab().values()),
                                               gamma=gamma,
                                               delta=delta,
                                               seeding_scheme=seeding_scheme,
                                               select_green_tokens=True)
    
    tokd_input = tokenizer(prompt, return_tensors="pt", add_special_tokens=True, truncation=True).to(device)
    truncation_warning = True if tokd_input["input_ids"].shape[-1] == 200 else False  
    
    output_with_watermark = model.generate(**tokd_input, 
                                       logits_processor=LogitsProcessorList([watermark_processor]), 
                                       **gen_kwargs)
    
    decoded_watermark = tokenizer.batch_decode(output_with_watermark, skip_special_tokens=True)[0]

    return decoded_watermark

In [69]:
def detect_watermark(text_to_detect, gamma, z):

    torch.manual_seed(123)

    tokenizer = AutoTokenizer.from_pretrained('facebook/opt-1.3b')

    #output_text = "This is not AI related. But can you detect if this has been watermarked?"#decoded_watermark[0]
    watermark_detector = WatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                        gamma=gamma,
                                        seeding_scheme=seeding_scheme,
                                        device='mps',
                                        tokenizer=tokenizer,
                                        z_threshold=z,
                                        normalizers="",
                                        ignore_repeated_ngrams=False,
                                        select_green_tokens=True)
    
    score_dict = watermark_detector.detect(text_to_detect)

    return score_dict

In [174]:
decoded_watermark = generate_watermark(gamma, delta)

KeyboardInterrupt: 

In [None]:
score_dict = detect_watermark(decoded_watermark, gamma, delta)

ZeroDivisionError: float division by zero

In [None]:
score_dict

{'num_tokens_scored': 513,
 'num_green_tokens': 412,
 'green_fraction': 0.8031189083820662,
 'z_score': 2.77847944354347,
 'p_value': 0.0027306984391784945,
 'z_score_at_T': tensor([-1.7321, -0.8165, -1.6667, -1.1547, -1.8074, -2.3570, -1.9640, -2.4495,
         -2.1170, -2.5560, -2.2630, -2.0000, -1.7614, -2.1602, -2.5342, -2.3094,
         -2.6605, -2.4495, -2.2517, -2.0656, -2.3938, -2.2156, -2.0466, -2.3570,
         -2.1939, -2.0381, -2.3333, -2.1822, -2.4659, -2.3190, -2.1776, -2.0412,
         -1.9096, -1.7823, -1.6590, -1.9245, -2.1831, -2.0605, -1.9415, -1.8257,
         -1.7132, -1.6036, -1.4968, -1.3926, -1.2910, -1.5323, -1.4317, -1.6667,
         -1.5671, -1.4697, -1.3744, -1.2810, -1.1896, -1.0999, -1.3234, -1.2344,
         -1.1471, -1.0613, -1.2778, -1.1926, -1.1088, -1.0265, -0.9456, -0.8660,
         -0.7877, -0.7107, -0.6348, -0.5601, -0.4865, -0.6901, -0.8907, -0.8165,
         -0.7433, -0.9396, -0.8667, -0.7947, -0.7237, -0.6537, -0.5846, -0.7746,
         -0.7057,

### Low Gamma and high Delta

#### The paper states that we can achieve a very strong watermark from a low gamma value (smaller greenlist size) and a higher bias (delta)

In [46]:
gamma = 0.15
delta = 4

In [47]:
decoded_watermark = generate_watermark(gamma, delta)

In [51]:
score_dict = detect_watermark(decoded_watermark, gamma, delta)

In [52]:
score_dict #Observation - a very high Z score indicating a stronger watermark.

{'num_tokens_scored': 513,
 'num_green_tokens': 221,
 'green_fraction': 0.43079922027290446,
 'z_score': 17.81145867798986,
 'p_value': 2.8794615316557132e-71,
 'z_score_at_T': tensor([-4.2008e-01, -5.9409e-01, -7.2761e-01, -8.4017e-01, -9.3934e-01,
         -1.0290e+00, -1.1114e+00, -1.1882e+00, -1.2603e+00, -1.3284e+00,
         -1.3933e+00, -6.4676e-01, -7.3790e-01, -8.2333e-01, -9.0388e-01,
         -9.8020e-01, -1.0528e+00, -1.1222e+00, -1.1886e+00, -1.2524e+00,
         -1.3139e+00, -7.7621e-01, -8.4674e-01, -9.1466e-01, -9.8020e-01,
         -1.0435e+00, -1.1049e+00, -1.1644e+00, -1.2221e+00, -1.2783e+00,
         -8.2994e-01, -8.9113e-01, -9.5065e-01, -1.0086e+00, -1.0651e+00,
         -1.1202e+00, -1.1740e+00, -1.2266e+00, -8.2963e-01, -8.8561e-01,
         -9.4035e-01, -9.9391e-01, -1.0463e+00, -1.0977e+00, -1.1481e+00,
         -1.1975e+00, -1.2459e+00, -1.2935e+00, -9.4019e-01, -9.9015e-01,
         -1.0392e+00, -1.0874e+00, -7.5014e-01, -4.1922e-01, -4.7203e-01,
         -

#### Maintain the low Gamma and high Delta, and now use Beam Search and validate the author's claim of Z score increase

##### Z score increases when beam search is used. From 2 beams to 4 beams, the z score jumped from 13.x to 17.x indicating a higher watermarking

In [65]:
decoded_watermark = generate_watermark(gamma, delta, num_beams=4)

In [66]:
score_dict = detect_watermark(decoded_watermark, gamma, delta)

In [67]:
score_dict

{'num_tokens_scored': 363,
 'num_green_tokens': 78,
 'green_fraction': 0.21487603305785125,
 'z_score': 3.461644830064026,
 'p_value': 0.00026844258081788394,
 'z_score_at_T': tensor([-0.4201, -0.5941, -0.7276, -0.8402, -0.9393, -1.0290, -1.1114, -1.1882,
         -1.2603, -1.3284, -1.3933, -0.6468, -0.7379, -0.8233, -0.9039, -0.9802,
         -1.0528, -1.1222, -1.1886, -1.2524, -1.3139, -0.7762, -0.8467, -0.9147,
         -0.9802, -1.0435, -1.1049, -1.1644, -1.2221, -1.2783, -0.8299, -0.8911,
         -0.9507, -1.0086, -1.0651, -1.1202, -1.1740, -1.2266, -0.8296, -0.8856,
         -0.9404, -0.9939, -1.0463, -1.0977, -1.1481, -1.1975, -1.2459, -1.2935,
         -0.9402, -0.9901, -1.0392, -1.0874, -0.7501, -0.4192, -0.4720, -0.5239,
         -0.5750, -0.6251, -0.6745, -0.3616, -0.4124, -0.4624, -0.5116, -0.5601,
         -0.6079, -0.6550, -0.7014, -0.7472, -0.7923, -0.8368, -0.8808, -0.9241,
         -0.9670, -1.0092, -1.0510, -1.0922, -1.1330, -1.1733, -1.2131, -1.2524,
         -0.980

#### Now let's keep delta at 5 amd vary Gamma from 0.1 to 0.9 (0.1, 0.25, 0.5, 0.75, 0.9) on a 4 way beam search
#### While also varying token size T (max_new_tokens) from 50 to 200 in steps of 50

In [85]:
def test_z_beam(delta, gammas, num_beams=4):
    
    res = []

    for max_new_tokens in range(50, 201, 50):

        gen_kwargs = dict(max_new_tokens=max_new_tokens)

        gen_kwargs.update(dict(
            num_beams = 4
        ))

        for gamma in gammas:
            score_dicts = {}
            
            decoded_watermark = generate_watermark(gamma=gamma, delta=delta, num_beams=num_beams, gen_kwargs=gen_kwargs)
            score_dict = detect_watermark(decoded_watermark, gamma, delta)

            score_dicts['gamma'] = gamma
            score_dicts['token_size'] = max_new_tokens
            score_dicts['delta'] = delta
            score_dicts['z_score'] = score_dict['z_score']
            score_dicts['p_value'] = score_dict['p_value']
            score_dicts['num_tokens_scored'] = score_dict['num_tokens_scored']
            score_dicts['num_green_tokens'] = score_dict['num_green_tokens']

            res.append(score_dicts)

    return res

In [86]:
res = test_z_beam(5, gammas=[0.1, 0.25, 0.5, 0.75, 0.9], num_beams=4)

In [88]:
res

[{'gamma': 0.1,
  'token_size': 50,
  'delta': 5,
  'z_score': 5.196152422706631,
  'p_value': 1.017277307272218e-07,
  'num_tokens_scored': 363,
  'num_green_tokens': 66},
 {'gamma': 0.25,
  'token_size': 50,
  'delta': 5,
  'z_score': 2.8181818181818183,
  'p_value': 0.0024148226279699673,
  'num_tokens_scored': 363,
  'num_green_tokens': 114},
 {'gamma': 0.5,
  'token_size': 50,
  'delta': 5,
  'z_score': 3.306642450813311,
  'p_value': 0.00047210664009389826,
  'num_tokens_scored': 363,
  'num_green_tokens': 213},
 {'gamma': 0.75,
  'token_size': 50,
  'delta': 5,
  'z_score': 1.303030303030303,
  'p_value': 0.09628220813361954,
  'num_tokens_scored': 363,
  'num_green_tokens': 283},
 {'gamma': 0.9,
  'token_size': 50,
  'delta': 5,
  'z_score': 0.9272595232439466,
  'p_value': 0.176895902320312,
  'num_tokens_scored': 363,
  'num_green_tokens': 332},
 {'gamma': 0.1,
  'token_size': 100,
  'delta': 5,
  'z_score': 12.416511609932599,
  'p_value': 1.0631624192359956e-35,
  'num_toke

In [97]:
import matplotlib.pyplot as plt
import pandas as pd

In [89]:
df = pd.DataFrame(res)

In [98]:
df.to_csv('./fig3_beams.csv')

#### Plotting the dependence of Z score on green list size parameter (gamma) with constant delta using a 4 way beam search

In [99]:
### Plotting the dependence of Z score on green list size (gamma) using multinomial sampling

In [100]:
def test_z_sampling(delta, gammas):
    
    res = []

    for max_new_tokens in range(50, 201, 50):

        gen_kwargs = dict(max_new_tokens=max_new_tokens)

        gen_kwargs.update(dict(
            do_sample = True, 
            top_k = 0,
            temperature = 0.7
         ))

        for gamma in gammas:
            score_dicts = {}
            
            decoded_watermark = generate_watermark(gamma=gamma, delta=delta, gen_kwargs=gen_kwargs)
            score_dict = detect_watermark(decoded_watermark, gamma, delta)

            score_dicts['gamma'] = gamma
            score_dicts['token_size'] = max_new_tokens
            score_dicts['delta'] = delta
            score_dicts['z_score'] = score_dict['z_score']
            score_dicts['p_value'] = score_dict['p_value']
            score_dicts['num_tokens_scored'] = score_dict['num_tokens_scored']
            score_dicts['num_green_tokens'] = score_dict['num_green_tokens']

            res.append(score_dicts)

    return res

In [101]:
res = test_z_sampling(delta=5, gammas=[0.1, 0.25, 0.5, 0.75, 0.9])

In [102]:
df = pd.DataFrame(res)
df.to_csv('./fig3_sampling.csv')