In [1]:
import torch
from transformers import AutoTokenizer
from transformers.generation.logits_process import LogitsProcessor, LogitsProcessorList
from transformers import GPT2LMHeadModel
import random
import numpy as np
import sys
from watermark import MyWatermarkedModel
from watermark import MyWatermarkLogitsProcessor

  cpu = _conversion_method_template(device=torch.device("cpu"))
  from .autonotebook import tqdm as notebook_tqdm


In [97]:
def query_model(input_str, model, tokenizer, max_new_tokens):
    inputs = tokenizer(input_str, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, return_dict_in_generate=True,
                                output_scores=True)
    print(outputs)
    # Extract only the new tokens (generated part)
    new_token_ids = outputs[0, len(inputs.input_ids[0]):]  # Only tokens after the input sequence

    # Convert the new token IDs to tokens
    new_tokens = tokenizer.convert_ids_to_tokens(new_token_ids)
    return new_tokens

def approximate_sampling(input_str, model, tokenizer, max_new_tokens):
    inputs = tokenizer(input_str, return_tensors="pt")
    outputs = model.generate(**inputs, max_new_tokens=max_new_tokens, return_dict_in_generate=True,
                                output_scores=True)
    # Get the logits of the last token
    scores = outputs.scores[0]
    scores_processed = scores.clone().softmax(dim=-1)
    cumsum = torch.cumsum(scores_processed, dim=-1)
    print(cumsum)
    return 0

In [87]:
tokenizer = AutoTokenizer.from_pretrained("distilbert/distilgpt2")
# print(tokenizer.vocab)
# Load MyWatermarkedModel from local model in ./Watermark/watermarked_model.pt
model = torch.load("./watermarked_model.pt")


  model = torch.load("./watermarked_model.pt")


In [122]:
def generate_ranges(prompt, model, tokenizer):
    MAX_NEW_TOKENS = 4 # last token is not watermarked for some reason, so need 4 to determine 3.

    inputs = tokenizer(prompt, return_tensors="pt")

    n_input_tokens = len(inputs.input_ids[0])
    # model.reset_seed()
    outputs = model.generate(**inputs, max_new_tokens=MAX_NEW_TOKENS, return_dict_in_generate=True,
                                output_scores=True)
    # Extract only the new tokens (generated part)
    new_token_ids = outputs.sequences.flatten()[n_input_tokens:-1]

    ranges = [0]*(MAX_NEW_TOKENS-1)

    for i, token in enumerate(new_token_ids):
        scores = outputs.scores[i]
        scores_processed = scores.clone().softmax(dim=-1)
        cumsum = torch.cumsum(scores_processed, dim=-1).flatten()
        r_range = (cumsum[token - 1].item() if token>0 else 0, cumsum[token].item())
        ranges[i] = r_range
    
    return ranges

In [133]:
def intersect_ranges_helper(range1, range2):
    start = max(range1[0], range2[0])
    end = min(range1[1], range2[1])

    if start < end:
        return (start, end)
    else:
        print("No intersection")

def intersect_ranges(ranges1, ranges2):
    n = len(ranges1)
    new_ranges = [0]*n
    for i in range(n):
        new_range = intersect_ranges_helper(ranges1[i], ranges2[i])
        new_ranges[i] = new_range
    return new_ranges

In [141]:
def crack_sampling():
    # Load newlines separated prompts from prompts.txt
    with open("prompts.txt", "r") as f:
        prompts = f.readlines()
        # Remove all newlines
        prompts = [prompt[:-1] for prompt in prompts]

    ranges = generate_ranges(prompts[0], model, tokenizer)
    for prompt in prompts[1:]:
        new_ranges = generate_ranges(prompt, model, tokenizer)
        ranges = intersect_ranges(ranges, new_ranges)
    
    return [np.mean(r) for r in ranges]

print(crack_sampling())

Reset seed
tensor([2029,  262, 1692])
Reset seed
tensor([1903,  287,  262])
Reset seed
tensor([2683,  319,  262])
Reset seed
tensor([ 673, 1297,  262])
Reset seed
tensor([2029,  532,  262])
Reset seed
tensor([11301,    11,   262])
Reset seed
tensor([784, 351, 257])
Reset seed
tensor([1576,  284,  428])
Reset seed
tensor([1497,   13,  198])
Reset seed
tensor([1342,  290,  407])
Reset seed
tensor([ 1035,   290, 12198])
Reset seed
tensor([960, 392, 262])
Reset seed
tensor([878, 314, 550])
Reset seed
tensor([10574,  4973,    11])
Reset seed
tensor([994, 287, 262])
Reset seed
tensor([1521,  339,  373])
Reset seed
tensor([878, 663, 736])
Reset seed
tensor([1595,  470,  787])
Reset seed
tensor([663, 898, 835])
Reset seed
tensor([ 777, 3815,   11])
Reset seed
tensor([1129,  339,  373])
Reset seed
tensor([4893,  287,  262])
Reset seed
tensor([764, 383, 717])
Reset seed
tensor([1846,  647,   83])
Reset seed
tensor([6596,  319,  262])
Reset seed
tensor([3830,  262,  599])
Reset seed
tensor([5228,

In [134]:
prompt1 = "Dogs are mans best"
prompt2 = "Hello, my name"

range1 = generate_ranges(prompt1, model, tokenizer)
print(range1)
range2 = generate_ranges(prompt2, model, tokenizer)
print(range2)

intersect_ranges(range1, range2)

Reset seed
tensor([11067,   355,   262])
[(0.8903032541275024, 0.8904215693473816), (0.1481592357158661, 0.7627849578857422), (0.058266088366508484, 0.09776140749454498)]
Reset seed
tensor([ 373, 4323,   11])
[(0.8601219654083252, 0.897574782371521), (0.333407461643219, 0.33351337909698486), (0.002659732475876808, 0.081647589802742)]


[(0.8903032541275024, 0.8904215693473816),
 (0.333407461643219, 0.33351337909698486),
 (0.058266088366508484, 0.081647589802742)]

[0.8903685212135315, 0.33347730338573456, 0.06990344822406769]

In [143]:
random.seed(model._MyWatermarkedModel__sk)
for i in range(3):
    print(random.random())

0.8903685790566194
0.33343684433351006
0.0698845774182344
