# Zero-Width Insertion attack as described by:
- J. Kirchenbauer, J. Geiping, Y. Wen, J. Katz, I. Miers, and T. Goldstein, “A Watermark for Large Language Models,” arXiv.org, Jun. 06, 2023. https://arxiv.org/abs/2301.10226 (accessed Apr. 27, 2024).
- N. Boucher, I. Shumailov, R. Anderson, and N. Papernot, “Bad Characters: Imperceptible NLP Attacks,” arXiv.org, Dec. 10, 2021. https://arxiv.org/abs/2106.09898 (accessed Apr. 27, 2024).

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/watermarked-and-unwatermarked-text-truncated/data_trunk.csv


In [2]:
import json

import torch
import torch.nn.functional as F

from tqdm import tqdm

import numpy as np


In [3]:
# Zero width space
ZWSP = chr(0x200B)
# Zero width joiner
ZWJ = chr(0x200D)
# Zero width non-joiner
ZWNJ = chr(0x200C)

In [4]:
def inject_invisible_chars(text, invisible_chars=[ZWJ,ZWSP,ZWNJ], max_insertions=25):
    """
    Injects invisible characters into the text at random positions.

    Args:
      text: The original text to modify.
      invisible_chars: A list of Unicode characters with minimal visual presence.
      max_insertions: The maximum number of invisible characters to inject.

    Returns:
      A new string with invisible characters inserted at random positions.
    """
    import random

    candidate = list(text)  # Convert text to character list for modification
    for _ in range(max_insertions):
        # Choose a random invisible character
        invisible_char = random.choice(invisible_chars)
        # Choose a random insertion point within the text (avoiding the very end)
        insertion_point = random.randint(0, len(candidate) - 1)
        candidate.insert(insertion_point, invisible_char)
    return ''.join(candidate)
# original_text = "This is a sample text."
# max_insertions = 80

# modified_text = inject_invisible_chars(original_text, max_insertions=max_insertions)

# print("Original Text:", original_text)
# print("Modified Text:", modified_text)

In [5]:
def find_invisible_chars(text):
    """
    Finds the positions of invisible characters in the text.

    Args:
      text: The text to check for invisible characters.

    Returns:
      A list of indices where invisible characters are found.
    """
    invisible_chars = [ZWSP, ZWJ, ZWNJ]
    invisible_positions = []
    for i, char in enumerate(text):
        if char in invisible_chars:
            invisible_positions.append(i)
    return invisible_positions

# text = f"This is a sample text with {ZWSP} an{ZWJ}s invisible {ZWNJ} character."
# invisible_positions = find_invisible_chars(text)

# if invisible_positions:
#     print("Invisible characters found at positions:", invisible_positions)
# else:
#     print("No invisible characters found in the text.")


In [6]:
# Read data from CSV
data = pd.read_csv("/kaggle/input/watermarked-and-unwatermarked-text-truncated/data_trunk.csv")
# print(data)

# Filter for watermarked "Generated text" with a limit of 1000 rows
filtered_data = data[data["label"] == "watermarked"][["Generated Text"]]
# Initialize empty list to store paraphrased text
attacked_text = []

# Loop through the data using tqdm for progress bar
for text in tqdm(filtered_data["Generated Text"].tolist()):
    # Perform round-trip translation and append to list
    max_insertions = len(text)//5
#     print(max_insertions)
    modified = inject_invisible_chars(text, max_insertions=max_insertions)
#     print(modified)
    attacked_text.append(modified)

# Create a new dataframe with original and paraphrased text
df = pd.DataFrame({"Original Text": filtered_data["Generated Text"], "Attacked Text": attacked_text})

# Save dataframe as CSV file
df.to_csv("attacked_text.csv", index=False)

print("Attack complete! attacked data saved to attacked_text.csv")

100%|██████████| 1000/1000 [00:00<00:00, 3418.94it/s]


Attack complete! attacked data saved to attacked_text.csv


In [7]:
# Read data from CSV
data = pd.read_csv("/kaggle/working/attacked_text.csv")
text = data["Attacked Text"].tolist()[0]
print(text)
invisible_positions = find_invisible_chars(text)

if invisible_positions:
    print("Invisible characters found at positions:", invisible_positions)
else:
    print("No invisible characters found in the text.")


So‍m​e c‍om‌pan‍ies fail​ed to p‌ay t‌heir wo‍rkers during the lo‌‌c‌kdown​ period. It ‍also became clear‌ t​hat many jobs were no‍t‌ ​safe to exp​ec‍t‍. RBS‌ w‍a​s part​ o‌f‌ th​‍is‌.
D‍r. ​​Jonas Geinas:
Speaking to R​‍int​a,‍ based ‍on data​‍‌ from its ING‍O website,​ ‌it i​s har​d‍‍‌​ ​t‌o ​bel‌ieve that their #OS‍HA meeting‌ o‍n T‍ue‍sday, 17‌th Jun‌e turned out to imp​ress many‌ ​​industria‌li​sts who would‍ have ‌b​e‌en wise to do the ‌s‍ame a‌nd p​ut​ out‍ an
Invisible characters found at positions: [2, 4, 8, 11, 15, 24, 32, 37, 45, 65, 66, 68, 74, 87, 105, 108, 130, 132, 134, 146, 149, 151, 157, 160, 162, 169, 172, 174, 178, 179, 182, 186, 190, 191, 219, 220, 224, 227, 235, 243, 244, 245, 259, 270, 272, 277, 283, 285, 286, 287, 288, 290, 292, 295, 299, 319, 330, 333, 337, 340, 349, 356, 376, 386, 388, 389, 399, 402, 416, 423, 425, 427, 446, 448, 454, 459, 462, 467]
