In [5]:
import json
import os

import numpy as np

ref_captions_path = "coco_captions.txt"
reconstuctions_dir = "/mnt/cschlarmann37/project_bimodal-robust-clip/results_inversions"
rec_vith_clean_path = os.path.join(reconstuctions_dir, "results-100smpls-3000iters-ViT-H-14-clean.json")
rec_vith_robust_path = os.path.join(reconstuctions_dir, "results-100smpls-3000iters-ViT-H-14-robust.json")

res_clean_dict = json.load(open(rec_vith_clean_path, 'r'))
res_robust_dict = json.load(open(rec_vith_robust_path, 'r'))
res_clean_list = res_clean_dict["results"]
res_robust_list = res_robust_dict["results"]

refs_clean = [r["original"] for r in res_clean_list]
refs_robust = [r["original"] for r in res_robust_list]
assert refs_clean == refs_robust
refs = refs_clean
recs_clean = [r["reconstructed"] for r in res_clean_list]
recs_robust = [r["reconstructed"] for r in res_robust_list]
# get random sample of 10 and print
sample_size = 10
rng = np.random.default_rng(0)
sample_indices = rng.choice(len(refs), size=sample_size, replace=False)
res_clean_sample = [res_clean_list[i] for i in sample_indices]
res_robust_sample = [res_robust_list[i] for i in sample_indices]

for i in range(sample_size):
    print()
    print(f"sample {i}:")
    print(f"original: {refs[sample_indices[i]]}")
    print(f"reconstructed clean: {res_clean_sample[i]['reconstructed']}  ({res_clean_sample[i]['sim']:.4f})")
    print(f"reconstructed robust: {res_robust_sample[i]['reconstructed']}  ({res_robust_sample[i]['sim']:.4f})")
    print()


sample 0:
original: A car and a public transit vehicle on a road.
reconstructed clean: public transit car alongside a vehicle amongst partially road road ."  (0.9270)
reconstructed robust: jrnotified car and transit vehicle sit on a road ).   (0.9338)


sample 1:
original: An image of a hotel bathroom that is ugly.
reconstructed clean: ugly bathroom demonstrating poorly gross envirтkhobbhutto�  (0.8109)
reconstructed robust: ugly hotel bathroom showcasing concerns resemble �▂�magbbhutto.   (0.9489)


sample 2:
original: An older picture of a large kitchen with white appliances.
reconstructed clean: older earliest appenhistorical archival picture featuring older smaller large kitchen   (0.7671)
reconstructed robust: large kitchen pictured prior a a looked white appliances unidenti).   (0.9173)


sample 3:
original: A girl sitting on a bench in front of a stone wall.
reconstructed clean: prepped amina ssels sitting sitting bench near stone textured wall 🤟girl girl   (0.8355)
reconstruct

In [1]:
import os
import json
import numpy as np

REF_CAPTIONS_PATH = "coco_captions.txt"  # currently unused but kept for context
RECONSTRUCTIONS_DIR = "/mnt/cschlarmann37/project_bimodal-robust-clip/results_inversions"

FILES = {
    "vith_clean": "results-100smpls-3000iters-ViT-H-14-clean.json",
    "vith_robust": "results-100smpls-3000iters-ViT-H-14-robust.json",
    "vitg_clean": "results-100smpls-3000iters-ViT-g-14-clean.json",
    "vitg_robust": "results-100smpls-3000iters-ViT-g-14-robust.json",
}


# ---- helpers ---------------------------------------------------------------

def load_results(key: str):
    """Load the list of inversion results for a given key in FILES."""
    path = os.path.join(RECONSTRUCTIONS_DIR, FILES[key])
    with open(path, "r") as f:
        return json.load(f)["results"]

replacements = {

    "": "️"
}
def latex_escape(text: str) -> str:
    """Escape minimal set of special LaTeX characters likely to appear."""
    specials = {
        "&": r"\&",
        "%": r"\%",
        "_": r"\_",
        "#": r"\#",
        "$": r"\$",
        "^": r"\^",
        "🤟": r"[U+1F91F]", #r"\emoji{love-you-gesture}",
        "と繋": r"\begin{CJK}{UTF8}{min}と繋\end{CJK}",
        "▹": r"$\triangleright$",
        "т": r"[U+0442]",  #r"\foreignlanguage{russian}{т}",
        "و": "[U+0648]",
        "�▂�": "?",
        "�▂▂▂�": "??",
        "▂▂▂": "?",
        "�": "?",

    }
    for k, v in specials.items():
        text = text.replace(k, v)
    return text

# ---- load data -------------------------------------------------------------

res_vith_clean = load_results("vith_clean")
res_vith_robust = load_results("vith_robust")
res_vitg_clean = load_results("vitg_clean")
res_vitg_robust = load_results("vitg_robust")

# ensure all lists share identical ordering of references
refs = [r["original"] for r in res_vith_clean]
assert refs == [r["original"] for r in res_vith_robust] == \
       [r["original"] for r in res_vitg_clean] == \
       [r["original"] for r in res_vitg_robust], "Reference captions differ across files!"

# ---- get random sample -----------------------------------------------------

SAMPLE_SIZE = 10
rng = np.random.default_rng(0)
idx_sample = rng.choice(len(refs), size=SAMPLE_SIZE, replace=False)

# ---- pretty‑print individual samples ---------------------------------------

for i, idx in enumerate(idx_sample):
    print("\n" + "-" * 80)
    print(f"sample {i} (idx={idx}):")
    print(f"original               : {refs[idx]}")
    print(f"reconstructed vith clean: {res_vith_clean[idx]['reconstructed']}  (sim={res_vith_clean[idx]['sim']:.4f})")
    print(
        f"reconstructed vith robust: {res_vith_robust[idx]['reconstructed']}  (sim={res_vith_robust[idx]['sim']:.4f})"
        )
    print(f"reconstructed vitg clean: {res_vitg_clean[idx]['reconstructed']}  (sim={res_vitg_clean[idx]['sim']:.4f})")
    print(
        f"reconstructed vitg robust: {res_vitg_robust[idx]['reconstructed']}  (sim={res_vitg_robust[idx]['sim']:.4f})"
        )


--------------------------------------------------------------------------------
sample 0 (idx=77):
original               : A car and a public transit vehicle on a road.
reconstructed vith clean: public transit car alongside a vehicle amongst partially road road ."  (sim=0.9270)
reconstructed vith robust: jrnotified car and transit vehicle sit on a road ).   (sim=0.9338)
reconstructed vitg clean: partially tionally car sits alongside alongside roads public transit vehicle '.   (sim=0.9171)
reconstructed vitg robust: a car and eachother and a roadway public transit vehicle .   (sim=0.9701)

--------------------------------------------------------------------------------
sample 1 (idx=81):
original               : An image of a hotel bathroom that is ugly.
reconstructed vith clean: ugly bathroom demonstrating poorly gross envirтkhobbhutto�  (sim=0.8109)
reconstructed vith robust: ugly hotel bathroom showcasing concerns resemble �▂�magbbhutto.   (sim=0.9489)
reconstructed vitg clean: ap

In [2]:
def clean_word(word):
    word = word.lower()
    word = word.replace(".", "")
    return word

def get_highlighting(original, reconstructed_clean, reconstructed_robust):
    """Highlight words in original that are reconstructed clean and not robust and vice versa."""
    orig_words = original.split()
    rec_clean_words = reconstructed_clean.lower().split()
    rec_robust_words = reconstructed_robust.lower().split()
    orig_words_highlighted = []
    for word in orig_words:
        if clean_word(word) in rec_clean_words and clean_word(word) not in rec_robust_words:
            # in clean but not in robust -> highlight teal
            orig_words_highlighted.append(rf"\colorbox{{teal!30}}{{{word}}}")
        elif clean_word(word) in rec_robust_words and clean_word(word) not in rec_clean_words:
            # in robust but not in clean -> highlight red
            orig_words_highlighted.append(rf"\colorbox{{red!30}}{{{word}}}")
        elif clean_word(word) not in rec_clean_words and clean_word(word) not in rec_robust_words:
            # not in either -> highlight yellow
            orig_words_highlighted.append(rf"\colorbox{{yellow!40}}{{{word}}}")
        else:
            # in both -> no highlight
            orig_words_highlighted.append(word)
    return " ".join(orig_words_highlighted)

### Single Table

In [19]:


# ---- print LaTeX table -----------------------------------------------------
# print("\nLaTeX table (copy & paste):\n")
print(r"\begin{tabular}{lclll}")
print(r"    \toprule")
print(r"     Original & Robust & Reconstructed ViT-H/14 & Reconstructed ViT-g/14 \\")
print(r"     \midrule")

for i, idx in enumerate(idx_sample):
    orig = get_highlighting(original=refs[idx], reconstructed_clean=res_vitg_clean[idx]["reconstructed"], reconstructed_robust=res_vitg_robust[idx]["reconstructed"])
    orig = latex_escape(orig)
    mark = r"\xmark"
    rec_h_clean = latex_escape(res_vith_clean[idx]["reconstructed"])
    rec_g_clean = latex_escape(res_vitg_clean[idx]["reconstructed"])
    row = (
        fr"     \multirow{{2}}{{*}}{{\begin{{minipage}}{{\cwidth}}{orig}\end{{minipage}}}} & "
        fr"{mark} & "
        fr"\begin{{minipage}}{{\cwidth}}{rec_h_clean}\end{{minipage}} & "
        fr"\begin{{minipage}}{{\cwidth}}{rec_g_clean}\end{{minipage}} \\[10pt]")
    print(row)
    print(r"     \cmidrule(lr){2-4}")
    mark = r"\cmark"
    rec_h_robust = latex_escape(res_vith_robust[idx]["reconstructed"])
    rec_g_robust = latex_escape(res_vitg_robust[idx]["reconstructed"])
    row = (
        fr" & "
        fr"{mark} & "
        fr"\begin{{minipage}}{{\cwidth}}{rec_h_robust}\end{{minipage}} & "
        fr"\begin{{minipage}}{{\cwidth}}{rec_g_robust}\end{{minipage}} \\[10pt]")
    print(row)
    print(r"     \midrule") if i < len(idx_sample) - 1 else None
print(r"     \bottomrule")
print(r"\end{tabular}")

\begin{tabular}{lclll}
    \toprule
     Original & Robust & Reconstructed ViT-H/14 & Reconstructed ViT-g/14 \\
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidth}\colorbox{red!30}{A} car \colorbox{red!30}{and} \colorbox{red!30}{a} public transit vehicle \colorbox{yellow!40}{on} \colorbox{red!30}{a} \colorbox{yellow!40}{road.}\end{minipage}} & \xmark & \begin{minipage}{\cwidth}public transit car alongside a vehicle amongst partially road road ."\end{minipage} & \begin{minipage}{\cwidth}partially tionally car sits alongside alongside roads public transit vehicle '. \end{minipage} \\[10pt]
     \cmidrule(lr){2-4}
 & \cmark & \begin{minipage}{\cwidth}jrnotified car and transit vehicle sit on a road ). \end{minipage} & \begin{minipage}{\cwidth}a car and eachother and a roadway public transit vehicle . \end{minipage} \\[10pt]
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidth}\colorbox{red!30}{An} \colorbox{red!30}{image} \colorbox{red!30}{of} \colorbox{red!30}{a} hotel ba

### Two Tables

In [7]:


# ---- print LaTeX table -----------------------------------------------------
# print("\nLaTeX table (copy & paste):\n")
print(r"\begin{tabular}{lcll}")
print(r"    \toprule")
print(r"     Original & Robust & Reconstructed ViT-H/14 \\")
print(r"     \midrule")

for i, idx in enumerate(idx_sample):
    orig = get_highlighting(original=refs[idx], reconstructed_clean=res_vith_clean[idx]["reconstructed"], reconstructed_robust=res_vith_robust[idx]["reconstructed"])
    orig = latex_escape(orig)
    mark = r"\xmark"
    rec_h_clean = latex_escape(res_vith_clean[idx]["reconstructed"])
    # rec_g_clean = latex_escape(res_vitg_clean[idx]["reconstructed"])
    row = (
        fr"     \multirow{{2}}{{*}}{{\begin{{minipage}}{{\cwidthorig}}{orig}\end{{minipage}}}} & "
        fr"{mark} & "
        fr"\begin{{minipage}}{{\cwidthrec}}{rec_h_clean}\end{{minipage}} \\[10pt]"
        # fr"\begin{{minipage}}{{\cwidth}}{rec_g_clean}\end{{minipage}} \\[10pt]"
        )
    print(row)
    print(r"     \cmidrule(lr){2-3}")
    mark = r"\cmark"
    rec_h_robust = latex_escape(res_vith_robust[idx]["reconstructed"])
    # rec_g_robust = latex_escape(res_vitg_robust[idx]["reconstructed"])
    row = (
        fr" & "
        fr"{mark} & "
        fr"\begin{{minipage}}{{\cwidthrec}}{rec_h_robust}\end{{minipage}} \\[10pt]"
        # fr"\begin{{minipage}}{{\cwidth}}{rec_g_robust}\end{{minipage}} \\[10pt]"
    )
    print(row)
    print(r"     \midrule") if i < len(idx_sample) - 1 else None
print(r"     \bottomrule")
print(r"\end{tabular}")

\begin{tabular}{lcll}
    \toprule
     Original & Robust & Reconstructed ViT-H/14 \\
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidthorig}A car \colorbox{red!30}{and} a \colorbox{teal!30}{public} transit vehicle \colorbox{red!30}{on} a road.\end{minipage}} & \xmark & \begin{minipage}{\cwidthrec}public transit car alongside a vehicle amongst partially road road ."\end{minipage} \\[10pt]
     \cmidrule(lr){2-3}
 & \cmark & \begin{minipage}{\cwidthrec}jrnotified car and transit vehicle sit on a road ). \end{minipage} \\[10pt]
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidthorig}\colorbox{yellow!40}{An} \colorbox{yellow!40}{image} \colorbox{yellow!40}{of} \colorbox{yellow!40}{a} \colorbox{red!30}{hotel} bathroom \colorbox{yellow!40}{that} \colorbox{yellow!40}{is} ugly.\end{minipage}} & \xmark & \begin{minipage}{\cwidthrec}ugly bathroom demonstrating poorly gross envir\foreignlanguage{russian}{т}khobbhutto?\end{minipage} \\[10pt]
     \cmidrule(lr){2-3}
 & \cmark & \b

In [9]:
# ---- print LaTeX table -----------------------------------------------------
# print("\nLaTeX table (copy & paste):\n")
print(r"\begin{tabular}{lcll}")
print(r"    \toprule")
print(r"     Original & Robust & Reconstructed ViT-H/14 \\")
print(r"     \midrule")

for i, idx in enumerate(idx_sample):
    orig = get_highlighting(original=refs[idx], reconstructed_clean=res_vitg_clean[idx]["reconstructed"], reconstructed_robust=res_vitg_robust[idx]["reconstructed"])
    orig = latex_escape(orig)
    mark = r"\xmark"
    # rec_h_clean = latex_escape(res_vith_clean[idx]["reconstructed"])
    rec_g_clean = latex_escape(res_vitg_clean[idx]["reconstructed"])
    row = (
        fr"     \multirow{{2}}{{*}}{{\begin{{minipage}}{{\cwidthorig}}{orig}\end{{minipage}}}} & "
        fr"{mark} & "
        # fr"\begin{{minipage}}{{\cwidth}}{rec_h_clean}\end{{minipage}} \\[10pt]"
        fr"\begin{{minipage}}{{\cwidthrec}}{rec_g_clean}\end{{minipage}} \\[10pt]"
        )
    print(row)
    print(r"     \cmidrule(lr){2-3}")
    mark = r"\cmark"
    # rec_h_robust = latex_escape(res_vith_robust[idx]["reconstructed"])
    rec_g_robust = latex_escape(res_vitg_robust[idx]["reconstructed"])
    row = (
        fr" & "
        fr"{mark} & "
        # fr"\begin{{minipage}}{{\cwidth}}{rec_h_robust}\end{{minipage}} \\[10pt]"
        fr"\begin{{minipage}}{{\cwidthrec}}{rec_g_robust}\end{{minipage}} \\[10pt]"
    )
    print(row)
    print(r"     \midrule") if i < len(idx_sample) - 1 else None
print(r"     \bottomrule")
print(r"\end{tabular}")

\begin{tabular}{lcll}
    \toprule
     Original & Robust & Reconstructed ViT-H/14 \\
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidthorig}\colorbox{red!30}{A} car \colorbox{red!30}{and} \colorbox{red!30}{a} public transit vehicle \colorbox{yellow!40}{on} \colorbox{red!30}{a} \colorbox{yellow!40}{road.}\end{minipage}} & \xmark & \begin{minipage}{\cwidthrec}partially tionally car sits alongside alongside roads public transit vehicle '. \end{minipage} \\[10pt]
     \cmidrule(lr){2-3}
 & \cmark & \begin{minipage}{\cwidthrec}a car and eachother and a roadway public transit vehicle . \end{minipage} \\[10pt]
     \midrule
     \multirow{2}{*}{\begin{minipage}{\cwidthorig}\colorbox{red!30}{An} \colorbox{red!30}{image} \colorbox{red!30}{of} \colorbox{red!30}{a} hotel bathroom \colorbox{yellow!40}{that} \colorbox{yellow!40}{is} ugly.\end{minipage}} & \xmark & \begin{minipage}{\cwidthrec}apparent nicely tered hotel bathroom containing looking ugly pfmage \end{minipage} \\[10pt]
     \

## Track down unicode characters

In [12]:
import os
import json
import numpy as np
import open_clip_pez

REF_CAPTIONS_PATH = "coco_captions.txt"  # currently unused but kept for context
RECONSTRUCTIONS_DIR = "/mnt/cschlarmann37/project_bimodal-robust-clip/results_inversions"

FILES = {
    "vith_clean": "results-100smpls-3000iters-ViT-H-14-clean.json",
    "vith_robust": "results-100smpls-3000iters-ViT-H-14-robust.json",
    "vitg_clean": "results-100smpls-3000iters-ViT-g-14-clean.json",
    "vitg_robust": "results-100smpls-3000iters-ViT-g-14-robust.json",
}

tokenizer = open_clip_pez.tokenizer._tokenizer

def load_results(key: str):
    """Load the list of inversion results for a given key in FILES."""
    path = os.path.join(RECONSTRUCTIONS_DIR, FILES[key])
    with open(path, "r") as f:
        return json.load(f)["results"]

In [8]:
res = load_results("vith_clean")

In [19]:
res[0]

{'original': 'A black Honda motorcycle parked in front of a garage.',
 'reconstructed': 'dark black honda honda giving modest motorcycle parked docked outside . ',
 'ids_rec': [1449,
  2442,
  16580,
  8553,
  2714,
  10297,
  16487,
  12726,
  16487,
  16487,
  18650],
 'sim': 0.8970907926559448,
 'ids_orig': [49406,
  320,
  1449,
  8553,
  10297,
  16487,
  530,
  2184,
  539,
  320,
  8474,
  269,
  49407,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0]}

In [22]:
print(res[81]["ids_rec"])

[16218, 8470, 33769, 30832, 30832, 30832, 30832, 377, 30832, 30832]


In [21]:
tokenizer.decode(res[0]["ids_rec"])

'black small ridgehonda performance motorcycle parked sits parked parked ."'

In [14]:
tokenizer.decode(res[81]["ids_orig"])

'<start_of_text>an image of a hotel bathroom that is ugly . <end_of_text>!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!'

In [16]:
tokenizer.decode([377])

'� '

In [18]:
tokenizer.decoder[377]

'½</w>'

In [75]:
[tokenizer.byte_decoder[c] for c in '½</w>']

[189, 60, 47, 119, 62]

In [80]:
bytearray([189, 60, 47, 119, 62]).decode('utf-8', errors="replace")

'�</w>'

In [63]:
tokenizer.encode('Â½')

[33613]

In [73]:
tokenizer.decoder[33613]

'Â½</w>'

In [49]:
bytearray([tokenizer.byte_decoder[c] for c in 'Â½</w>'])#.decode('utf-8')

bytearray(b'\xc2\xbd</w>')

In [54]:
bytearray([tokenizer.byte_decoder[c] for c in 'Âb'])#.decode('utf-8')

bytearray(b'\xc2b')

In [32]:
tokenizer.encoder['½</w>']

377

In [67]:
tokenizer.byte_encoder["c"]

KeyError: 'c'

In [59]:
''.join(tokenizer.byte_encoder[c] for c in '½</w>'.encode('utf-8'))

'Â½</w>'

In [60]:
tokenizer.bpe('Â½')

'Â½</w>'

In [71]:
[c for c in '½'.encode('utf-8')]

[194, 189]

In [90]:
tokenizer.decode([30832])

'�▂�'

In [87]:
tokenizer.decoder[30832]

'ĤâĸĤâĸ'

In [85]:
tokenizer.encoder['ĤâĸĤâĸ']

30832

In [96]:
tokenizer.encode('ĤâĸĤâĸ')

[128, 98, 26170, 128, 116, 128, 98, 26170, 128, 372]

In [95]:
tokenizer.decode([30832, 30832])

'�▂▂▂�'

In [91]:
for el in res[:15]:
    ids = el["ids_rec"]
    rec = tokenizer.decode(ids)
    print(rec)
    print(ids)
    print()

black small ridgehonda performance motorcycle parked sits parked parked ."
[1449, 2442, 16580, 8553, 2714, 10297, 16487, 12726, 16487, 16487, 18650]

multiple messy desktop monitors bü�▂▂▂▂▂▂▂▅ coworking area 
[6470, 15987, 14345, 30478, 38706, 30832, 30832, 30832, 30832, 483, 36034, 2445]

apparently �▂▂▂▂▂▂▂�possibly extremely small toilet ."
[5287, 30832, 30832, 30832, 30832, 8601, 6519, 2442, 11071, 18650]

,,mga alt _( newly ]: woman resting sit waitin pathway 
[21340, 23954, 10006, 36951, 7056, 21641, 2308, 18044, 4037, 44482, 23488]

beautifully luxe dessert appetizer supportsmallstreamers 🇮ae bbhuttolliae 
[10627, 26373, 9753, 36065, 36097, 8268, 4542, 28085, 47015, 4542]

generously hungry cat eating bird �▂▂▂▂▂▂▂�
[35113, 8451, 2368, 4371, 3329, 30832, 30832, 30832, 30832]

australians 🏼possibly older elderly man standing inthekitchen ."
[31488, 6193, 8601, 7700, 15455, 786, 2862, 7486, 4485, 18650]

discarded male cat patiently staring amongst two car using parking . 
[45197