In [1]:
from dataclasses import replace
import multiprocessing
multiprocessing.set_start_method("spawn")
global_manager = multiprocessing.Manager()


In [2]:
target_model = 'meta-llama/Llama-2-13b-chat-hf'
tensor_parallel_size = 1
paraphrase_models = ['meta-llama/Meta-Llama-3-8B-Instruct', 'meta-llama/Llama-2-13b-chat-hf']
devices = ['0', '1', '2', '3']

deltas = [1,2,4,8] # Watermark Strength
temp = 0.7 # Watermark Generator Temperature

In [3]:
from watermark_benchmark import ConfigSpec
from watermark_benchmark.utils.classes import WatermarkSpec

default_watermark_params = {
    'rng': 'Internal', 
    'hash_len': 3, 'min_hash': None, 
    'key_len': 4, 'generator': 'distributionshift', 
    'tokenizer': '', 'temp': 0, 'delta': 4, 'gamma': 0.5, 
    'skip_prob':0, 'pvalue': 0.01, 
    'verifiers': [{'verifier': 'Theoretical', 'empirical_method': 'regular', 'log': True, 'gamma': 0}], 
    'randomize': True, 'offset': False
}
config = ConfigSpec(
    num_return_sequences= 1,
    model = target_model,
    engine= "vllm",
    baseline = True,
    watermark = "watermark_specs",
    max_new_tokens = 512,
    seed=42,
    hf_batch_size=16,
    paraphrase = True,
    dipper_processes = 0,
    openai_processes = 0,
    custom_processes = 1,
    translate_processes = 1,
    custom_only = True,
    threads = 32,
    misspellings = "static_data/misspellings.json",
    devices = devices,
    detections_per_gpu = 16,
    quality_metric = "llm_cot",
    gpu_memory_utilization = 0.95 if len(devices) > 1 else 0.45,
    dtype = "bfloat16",
    custom_model_paths = paraphrase_models,
    custom_batch = 1,
    custom_temperature = 1.0,
    custom_max_new_tokens = 512,
)
watermarks =  [
            replace(WatermarkSpec.from_dict(default_watermark_params), 
                    tokenizer=target_model, delta=delta, temp=temp)
            for delta in deltas
]

In [4]:
from scripts.demo_helpers import WatermarkGenerator
generator = WatermarkGenerator(config, global_manager, devices[:tensor_parallel_size])

In [5]:
from scripts.demo_helpers import Paraphraser

paraphraser = Paraphraser(config, global_manager, devices[tensor_parallel_size:-1])

In [6]:
from scripts.demo_helpers import Rater

rater = Rater(config, global_manager, devices[-1])

In [7]:
from scripts.demo_helpers import prepare_prompts
system_prompt = "You are a helpful assistant that is trying to help the user with their task."
instructions = [
    "Write a poem about a sunset.",
    "Write a poem about a sunrise.",
    "Write a poem about a rainbow.",
    "Write a poem about a waterfall.",
]
raw_prompts = [(i, system_prompt) for i in instructions]
all_tasks, prompts = prepare_prompts(target_model, raw_prompts, watermarks, temp)

In [None]:
watermarked_samples, baseline_samples = generator.generate(prompts, all_tasks)

In [None]:
paraphrased_samples = paraphraser.paraphrase(watermarked_samples)

In [None]:
all_samples = (baseline_samples,watermarked_samples, paraphrased_samples)
all_samples = rater.rate(all_samples)
baseline_samples,watermarked_samples, paraphrased_samples = all_samples

In [None]:
paraphrased_samples[0]

In [None]:
generator.kill()
paraphraser.kill()
rater.kill()