In [1]:
import torch
from model import load_model, generate, detect
from topic_extractions import llm_topic_extraction
from inputs import sports_input, technology_input, animals_input, music_input, medicine_input
from pprint import pprint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
DEBUG = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
def get_token_mappings():
    total_tokens = 100000
    topics = ["sports", "animals", "technology", "music", "medicine"]

    topic_token_mapping = {topic: [] for topic in topics}

    for i in range(total_tokens):
        topic_index = i % len(topics)
        topic = topics[topic_index]
        topic_token_mapping[topic].append(i)
    return topic_token_mapping
token_mappings = get_token_mappings()

args = {
    'demo_public': False, 
    'model_name_or_path': 'facebook/opt-1.3b', 
    'load_fp16' : False,
    'prompt_max_length': None, 
    'max_new_tokens': 200, 
    'generation_seed': 123, 
    'use_sampling': True, 
    'n_beams': 1, 
    'sampling_temp': 0.7, 
    'use_gpu': True, 
    'seeding_scheme': 'simple_1', 
    'gamma': 0.4,
    'delta': 2.0, 
    'normalizers': '', 
    'ignore_repeated_bigrams': False, 
    'detection_z_threshold': 4.0, 
    'select_green_tokens': True,
    'skip_model_load': False,
    'seed_separately': True,
    'is_topic': True,
    'topic_token_mapping': token_mappings,
    'detected_topic': "",
}

In [4]:
input_text = sports_input()

args['normalizers'] = (args['normalizers'].split(",") if args['normalizers'] else [])

model, tokenizer = load_model(args)

if args['is_topic']:
    detected_topics = llm_topic_extraction(input_text)
else:
    detected_topics = []

if DEBUG: print(f"Topic extraction is finished for watermarking: {detected_topics}")

print(f"Prompt:\n {input_text}")

redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

if DEBUG: print("Decoding with and without watermarkings are finished")

input_prompt = input_text + decoded_output_without_watermark

without_watermark_detection_result = detect(input_prompt, decoded_output_without_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished without watermark detection")

input_prompt = input_text + decoded_output_with_watermark

with_watermark_detection_result = detect(input_prompt, decoded_output_with_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished with watermark detection")


print("#########################################")
print("Output without watermark:")
print(decoded_output_without_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(without_watermark_detection_result)
print(("#########################################"))

print(("#########################################"))
print("Output with watermark:")
print(decoded_output_with_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(with_watermark_detection_result)
print(("#########################################"))

Topic extraction is finished for watermarking: ['sports', 'mental health', 'physical activity', 'social integration', 'team sports']
Prompt:
 Sports have been an integral part of human culture for centuries, serving as a means of entertainment, physical fitness, and social interaction. They are not merely games but vital activities that contribute to the holistic development of individuals and communities. The significance of sports transcends the boundaries of competition, impacting physical health, mental well-being, social cohesion, and even economic growth.
Engaging in sports is one of the most effective ways to maintain physical health. Regular participation in physical activities helps in the prevention of chronic diseases such as obesity, cardiovascular diseases, diabetes, and hypertension. Sports improve cardiovascular fitness, strengthen muscles, enhance flexibility, and boost overall stamina. For children and adolescents, sports are crucial for developing healthy growth patte

In [5]:
input_text = technology_input()

args['normalizers'] = (args['normalizers'].split(",") if args['normalizers'] else [])

model, tokenizer = load_model(args)

if args['is_topic']:
    detected_topics = llm_topic_extraction(input_text)
else:
    detected_topics = []

if DEBUG: print(f"Topic extraction is finished for watermarking: {detected_topics}")

print(f"Prompt:\n {input_text}")

redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

if DEBUG: print("Decoding with and without watermarkings are finished")

input_prompt = input_text + decoded_output_without_watermark

without_watermark_detection_result = detect(input_prompt, decoded_output_without_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished without watermark detection")

input_prompt = input_text + decoded_output_with_watermark

with_watermark_detection_result = detect(input_prompt, decoded_output_with_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished with watermark detection")


print("#########################################")
print("Output without watermark:")
print(decoded_output_without_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(without_watermark_detection_result)
print(("#########################################"))

print(("#########################################"))
print("Output with watermark:")
print(decoded_output_with_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(with_watermark_detection_result)
print(("#########################################"))

Topic extraction is finished for watermarking: ['technology', 'education', 'healthcare', 'entertainment', 'communication', 'business', 'globalization', 'internet', 'social media', 'web 2.0']
Prompt:
 Technology has revolutionized the way we live, work, and interact, becoming an indispensable part of modern life. It encompasses a broad range of tools, machines, and systems that enhance human capabilities and simplify everyday tasks. The impact of technology extends beyond mere convenience, influencing every aspect of society from communication and education to healthcare and entertainment.
The advancements in technology have significantly improved communication. The advent of the internet, smartphones, and social media platforms has made it possible for people to connect instantly across the globe. This has not only facilitated personal connections but has also enabled businesses to operate more efficiently, expanding their reach and fostering global trade.
In the realm of education, te

In [6]:
input_text = animals_input()

args['normalizers'] = (args['normalizers'].split(",") if args['normalizers'] else [])

model, tokenizer = load_model(args)

if args['is_topic']:
    detected_topics = llm_topic_extraction(input_text)
else:
    detected_topics = []

if DEBUG: print(f"Topic extraction is finished for watermarking: {detected_topics}")

print(f"Prompt:\n {input_text}")

redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

if DEBUG: print("Decoding with and without watermarkings are finished")

input_prompt = input_text + decoded_output_without_watermark

without_watermark_detection_result = detect(input_prompt, decoded_output_without_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished without watermark detection")

input_prompt = input_text + decoded_output_with_watermark

with_watermark_detection_result = detect(input_prompt, decoded_output_with_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished with watermark detection")


print("#########################################")
print("Output without watermark:")
print(decoded_output_without_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(without_watermark_detection_result)
print(("#########################################"))

print(("#########################################"))
print("Output with watermark:")
print(decoded_output_with_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(with_watermark_detection_result)
print(("#########################################"))

Topic extraction is finished for watermarking: ['animals', 'conservation', 'culture', 'economics', 'mental health', 'sociology']
Prompt:
 Animals have played a crucial role in the natural world and human society for millennia, contributing to the balance of ecosystems and offering companionship and support. The significance of animals extends beyond their biological roles, influencing cultural practices, economies, and even mental health. Understanding and appreciating the importance of animals is essential for fostering a sustainable and humane relationship with the natural world.
Animals are vital for maintaining ecological balance. They play key roles in various ecosystems, such as pollination, seed dispersal, and pest control. Predatory animals help regulate prey populations, preventing overgrazing and ensuring the health of plant communities. The loss of animal species can lead to the disruption of these natural processes, highlighting the need for conservation efforts.
Domesticat

In [7]:
input_text = music_input()

args['normalizers'] = (args['normalizers'].split(",") if args['normalizers'] else [])

model, tokenizer = load_model(args)

if args['is_topic']:
    detected_topics = llm_topic_extraction(input_text)
else:
    detected_topics = []

if DEBUG: print(f"Topic extraction is finished for watermarking: {detected_topics}")

print(f"Prompt:\n {input_text}")

redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

if DEBUG: print("Decoding with and without watermarkings are finished")

input_prompt = input_text + decoded_output_without_watermark

without_watermark_detection_result = detect(input_prompt, decoded_output_without_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished without watermark detection")

input_prompt = input_text + decoded_output_with_watermark

with_watermark_detection_result = detect(input_prompt, decoded_output_with_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished with watermark detection")


print("#########################################")
print("Output without watermark:")
print(decoded_output_without_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(without_watermark_detection_result)
print(("#########################################"))

print(("#########################################"))
print("Output with watermark:")
print(decoded_output_with_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(with_watermark_detection_result)
print(("#########################################"))

Topic extraction is finished for watermarking: ['music', 'music therapy', 'mental health', 'emotional well-being', 'social connections']
Prompt:
 Music has been an essential part of human culture for as long as history can trace, serving not only as a form of artistic expression but also as a powerful tool for emotional and social connection. It transcends cultural and linguistic boundaries, offering a universal language that resonates with people across the world. The significance of music extends beyond entertainment, deeply influencing emotional states, cognitive functions, and even physical health. It is a medium through which individuals can express emotions, tell stories, and connect with others on a profound level.
Listening to or creating music can have a multitude of benefits for mental well-being. Music therapy is widely recognized for its ability to reduce stress, alleviate anxiety, and combat depression. The rhythms and melodies of music can evoke a range of emotions, from 

In [8]:
input_text = medicine_input()

args['normalizers'] = (args['normalizers'].split(",") if args['normalizers'] else [])

model, tokenizer = load_model(args)

if args['is_topic']:
    detected_topics = llm_topic_extraction(input_text)
else:
    detected_topics = []

if DEBUG: print(f"Topic extraction is finished for watermarking: {detected_topics}")

print(f"Prompt:\n {input_text}")

redecoded_input, truncation_warning, decoded_output_without_watermark, decoded_output_with_watermark = generate(
    input_text, 
    detected_topics,
    args, 
    model=model, 
    tokenizer=tokenizer
)

if DEBUG: print("Decoding with and without watermarkings are finished")

input_prompt = input_text + decoded_output_without_watermark

without_watermark_detection_result = detect(input_prompt, decoded_output_without_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished without watermark detection")

input_prompt = input_text + decoded_output_with_watermark

with_watermark_detection_result = detect(input_prompt, decoded_output_with_watermark, 
                                            args, 
                                            device=device, 
                                            tokenizer=tokenizer)
if DEBUG: print("Finished with watermark detection")


print("#########################################")
print("Output without watermark:")
print(decoded_output_without_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(without_watermark_detection_result)
print(("#########################################"))

print(("#########################################"))
print("Output with watermark:")
print(decoded_output_with_watermark)
print(("#########################################"))
print(f"Detection result @ {args['detection_z_threshold']}:")
pprint(with_watermark_detection_result)
print(("#########################################"))

Topic extraction is finished for watermarking: ['medicine', 'health', 'science', 'technology', 'medicine']
Prompt:
 Medicine has been a cornerstone of human advancement for millennia, playing a critical role in enhancing and preserving life. It is not just a field of scientific inquiry, but a vital practice that touches every aspect of human existence. The importance of medicine goes beyond the treatment of diseases; it encompasses the promotion of health, the prevention of illnesses, and the overall improvement of quality of life. The impact of medicine on society is profound, influencing not only physical well-being but also mental, social, and economic factors.
The advancements in medicine have led to the prevention and cure of many diseases that were once considered fatal. Vaccines, antibiotics, and advanced surgical techniques have dramatically increased life expectancy and reduced mortality rates worldwide. Beyond physical health, medicine plays a crucial role in mental health ca