In [None]:
from modules.identity_lm_crypto import generate_keys_multi
from modules.detector import IdentityDetector
from modules.signer import IdentitySigner

# Run below pip commands if you run this in colab as colab doesn't have these packages
# !pip install pycryptodome
# !pip install transformers

import os
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import numpy as np

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
# params
token_standard=8 # length of token sequence chunks that we encrypt
arb_size=16 # Bit Size of secret primes we create in the multi prime RSA implementation
allowed_distance=3 # how much we allow generated chunks to differ from ideal chunks in terms of token group



In [None]:
private_key,public_key=generate_keys_multi(n_bits=token_standard,arb_size=arb_size) # generate public and private key
private_key,public_key

In [None]:
# Load models from Hugging Face
tokenizer = AutoTokenizer.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base")
model = AutoModelForSeq2SeqLM.from_pretrained("humarin/chatgpt_paraphraser_on_T5_base").to(device)


In [None]:
# Build encoder and decoder using the keys and params
S=IdentitySigner(private_key,public_key,allowed_distance,token_standard,model,tokenizer,device)
D=IdentityDetector(public_key,allowed_distance,token_standard,model,tokenizer,device)

In [None]:
# Set prompt (or write your own!)
prompt="Mission to the Unknown is the only episode of Doctor Who that doesn't feature the Doctor at all. Instead it focuses on a security agent's efforts to warn Earth about an alien attack."
prompt="Edward Dando was a thief who overate at food stalls and inns, then revealed that he had no money to pay. He was particularly fond of oysters, once eating 300 in a sitting."
# prompt="Atoms of radioactive elements can split. According to Albert Einstein, mass and energy are interchangeable under certain circumstances. When atoms split, the process is called nuclear fission. In this case, a small amount of mass is converted into energy. "


In [None]:
# The model expects prompts that look like this
prompt=f"Paraphrase: {prompt}"

In [None]:
assert len(prompt)>12*token_standard,"Prompt too short"
assert len(prompt)<600,"Prompt too long for this particular paraphrasing model to be effective"

In [None]:
# Sign the prompt
signed_prompt,signed_tokens=S.rewrite(prompt)
signed_prompt

In [None]:
# Does the original prompt pass? This should fail >90% of the time.  
val,truth_vals= D.detect(prompt) # val is overall pass/fail, truth_vals is pass/fail for each chunk of token_standard length tokens in the prompt
if val:
    print("This prompt was signed by the user")
else:
    print("This prompt was not signed by the user")

print(f"This prompt passed detection for {np.count_nonzero(truth_vals)} / {truth_vals.size} chunk tests")

In [None]:
# Does the encoded prompt pass? This should work >90% of the time.
val,truth_vals= D.detect(signed_prompt) # val is overall pass/fail, truth_vals is pass/fail for each chunk of token_standard length in the prompt
if val:
    print("This prompt was signed by the user")
else:
    print("This prompt was not signed by the user")

print(f"This prompt passed detection for {np.count_nonzero(truth_vals)} / {truth_vals.size} chunk tests")