In [45]:
from topic_watermark_processor import TopicWatermarkLogitsProcessor, TopicWatermarkDetector
from watermark_processor import WatermarkLogitsProcessor, WatermarkDetector
from transformers import (
        AutoTokenizer,
        AutoModelForSeq2SeqLM,
        AutoModelForCausalLM,
        LogitsProcessorList
    )
import torch
from functools import partial
from topic_extractions import llm_topic_extraction
from pprint import pprint
from model import generate, load_model

import random
import matplotlib.pyplot as plt

In [46]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

args = {
    'demo_public': False, 
    # 'model_name_or_path': 'facebook/opt-125m', 
    'model_name_or_path': 'facebook/opt-1.3b', 
    # 'model_name_or_path': 'facebook/opt-2.7b', 
    # 'model_name_or_path': 'facebook/opt-6.7b',
    # 'model_name_or_path': 'facebook/opt-13b',
    # 'load_fp16' : True,
    'load_fp16' : False,
    'prompt_max_length': None, 
    'max_new_tokens': 170, 
    'generation_seed': 123, 
    'use_sampling': True, 
    'n_beams': 1, 
    'sampling_temp': 0.7, 
    'use_gpu': True, 
    'seeding_scheme': 'simple_1', 
    'gamma': 0.70, 
    'delta': 3.5, 
    'normalizers': '', 
    'ignore_repeated_bigrams': False, 
    'detection_z_threshold': 2.0, 
    'select_green_tokens': True,
    'skip_model_load': False,
    'seed_separately': True,
    'is_topic': True,
    'topic_token_mapping': {
        "sports": list(range(22000)),
        "animals": list(range(22000, 44000)),
        "turtles": list(range(44000, 66000)),
        # Add more topics and corresponding tokens as needed
    },
    'detected_topic': "",
    'random_green_ratio': 0.1,
}

In [39]:
def modify_text(text, n_edits, edit_type='insert'):
    words = text.split()
    for _ in range(n_edits):
        if edit_type == 'insert':
            pos = random.randint(0, len(words))
            words.insert(pos, random.choice(words))
        elif edit_type == 'delete' and len(words) > 1:
            pos = random.randint(0, len(words) - 1)
            words.pop(pos)
        elif edit_type == 'modify':
            pos = random.randint(0, len(words) - 1)
            words[pos] = random.choice(words)
    return ' '.join(words)

In [47]:
def detect(original_prompt, input_text, args, device=None, tokenizer=None):
    """Instantiate the WatermarkDetection object and call detect on
        the input text returning the scores and outcome of the test"""
    

    detected_topics = llm_topic_extraction(original_prompt)
    print(f"FInished detected topics for generated text: {detected_topics}")
    watermark_detector = TopicWatermarkDetector(vocab=list(tokenizer.get_vocab().values()),
                                        gamma=args['gamma'],
                                        seeding_scheme=args['seeding_scheme'],
                                        device=device,
                                        tokenizer=tokenizer,
                                        z_threshold=args['detection_z_threshold'],
                                        normalizers=args['normalizers'],
                                        ignore_repeated_bigrams=args['ignore_repeated_bigrams'],
                                        select_green_tokens=args['select_green_tokens'],
                                        topic_token_mapping=args['topic_token_mapping'],
                                        )

    if len(input_text)-1 > watermark_detector.min_prefix_len:
        score_dict = watermark_detector.detect(input_text, detected_topics=detected_topics)
       
    return score_dict

In [48]:
def analyze_robustness(original_prompt, output, args, device, tokenizer, num_edits_list, edit_type='insert'):
    green_fractions = []
    z_scores = []

    for n_edits in num_edits_list:
        modified_output = modify_text(output, n_edits, edit_type)
        detection_result = detect(original_prompt, modified_output, args, device, tokenizer)
        scores = detection_result[0][0][1]
        scores_dict = eval(scores)
        green_fractions.append(scores_dict['green_fraction'])
        z_scores.append(scores_dict['z_score'])

    return green_fractions, z_scores

In [49]:
def plot_robustness(num_edits_list, green_fractions, z_scores):
    fig, ax1 = plt.subplots()

    color = 'tab:blue'
    ax1.set_xlabel('Number of Edits')
    ax1.set_ylabel('Green Fraction', color=color)
    ax1.plot(num_edits_list, green_fractions, color=color)
    ax1.tick_params(axis='y', labelcolor=color)

    ax2 = ax1.twinx()
    color = 'tab:red'
    ax2.set_ylabel('Z-Score', color=color)
    ax2.plot(num_edits_list, z_scores, color=color)
    ax2.tick_params(axis='y', labelcolor=color)

    fig.tight_layout()
    plt.title('Robustness Analysis')
    plt.show()

In [34]:
input_text = (
    "Sports have been an integral part of human culture for centuries, serving as a means of entertainment, "
    "physical fitness, and social interaction. They are not merely games but vital activities that contribute "
    "to the holistic development of individuals and communities. The significance of sports transcends the boundaries "
    "of competition, impacting physical health, mental well-being, social cohesion, and even economic growth.\n"
    "Engaging in sports is one of the most effective ways to maintain physical health. Regular participation in physical "
    "activities helps in the prevention of chronic diseases such as obesity, cardiovascular diseases, diabetes, and hypertension. "
    "Sports improve cardiovascular fitness, strengthen muscles, enhance flexibility, and boost overall stamina. For children "
    "and adolescents, sports are crucial for developing healthy growth patterns and preventing lifestyle-related diseases "
    "later in life.\n"
    "The mental health benefits of sports are equally profound. Physical activity triggers the release of endorphins, "
    "which are natural mood lifters. This can help reduce stress, anxiety, and depression. Sports also improve cognitive "
    "function, enhancing concentration, memory, and learning abilities. The discipline and focus required in sports "
    "can translate into improved academic and professional performance, fostering a sense of accomplishment and boosting self-esteem.\n"
    "Sports serve as a powerful tool for social integration. They bring people together, fostering a sense of community and belonging. "
    "Team sports, in particular, teach essential life skills such as teamwork, leadership, communication, and cooperation. These skills are"
)

In [13]:
model, tokenizer = load_model(args)

In [14]:
detected_topics = llm_topic_extraction(input_text)
print(detected_topics)

['sports', 'mental health', 'physical activity', 'social integration', 'team sports']


In [15]:
redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

print(f"Output WITHOUT watermark:\n{decoded_output_without_watermark}\n")
print(f"Output WITH watermark:\n{decoded_output_with_watermark}")

Output WITHOUT watermark:
 especially important for children, especially those from disadvantaged backgrounds, who are less likely to have the opportunity to participate in sports.
While sports are an essential part of our lives, their impact is not enough to maintain a healthy lifestyle. The Global Sport and Well-Being Report, conducted by the World Health Organization (WHO), indicates that one in six people live with the burden of chronic diseases such as diabetes, cardiovascular disease, obesity, and hypertension.
The impact of these diseases is felt across the world, and they are largely preventable. The WHO estimates that 1.7 billion people are overweight or obese and, by 2030, the number of overweight and obese people in the world will increase by 230 million people. The WHO estimates that, in the next decade, the number of overweight and obese people will increase by 526 million people.
This

Output WITH watermark:
 particularly important when it comes to maintaining healthy and

In [50]:
num_edits_list = [0, 5, 10, 15, 20, 25, 30]
input_concat = input_text + decoded_output_with_watermark

green_fractions, z_scores = analyze_robustness(
    input_concat,
    decoded_output_with_watermark, 
    args, 
    device, 
    tokenizer, 
    num_edits_list, 
    edit_type='insert'
)

plot_robustness(num_edits_list, green_fractions, z_scores)

FInished detected topics for generated text: ['sports', 'mental health', 'sports training', 'physical activity', 'social integration', 'gender and ethnicity', 'women in sports\nsource: https://www.ncbi.nlm.nih.gov/pmc/articles/pmc5695959']


TypeError: Population must be a sequence.  For dicts or sets, use sorted(d).

In [None]:
num_edits_list = [0, 5, 10, 15, 20, 25, 30]
input_concat = input_text + decoded_output_with_watermark

green_fractions, z_scores = analyze_robustness(
    input_concat,
    decoded_output_with_watermark, 
    args, 
    device, 
    tokenizer, 
    num_edits_list, 
    edit_type='delete'
)

plot_robustness(num_edits_list, green_fractions, z_scores)

In [None]:
num_edits_list = [0, 5, 10, 15, 20, 25, 30]
input_concat = input_text + decoded_output_with_watermark

green_fractions, z_scores = analyze_robustness(
    input_concat,
    decoded_output_with_watermark, 
    args, 
    device, 
    tokenizer, 
    num_edits_list, 
    edit_type='modify'
)

plot_robustness(num_edits_list, green_fractions, z_scores)