# Import

In [2]:
import os
import pandas as pd
import json
import warnings

warnings.filterwarnings("ignore", category=FutureWarning)

# Add filenames to CSV

In [3]:
csv_file_path = "data/metadata.csv"
df = pd.DataFrame(columns=["file_name", "prompt"])
df

Unnamed: 0,file_name,prompt


In [31]:
data_folder = "data"
fnames = []
for f in os.listdir(data_folder):
    fnames.append(str(f))
df["file_name"] = sorted(fnames)
df["file_name"]

0      C-YOhrjRuO9_0.jpg
1      C-YOhrjRuO9_1.jpg
2      C-YOhrjRuO9_2.jpg
3      C-YOhrjRuO9_3.jpg
4      C-YOhrjRuO9_4.jpg
             ...        
857          turtle4.jpg
858          turtle5.jpg
859          turtle6.jpg
860          turtle7.jpg
861          turtle8.jpg
Name: file_name, Length: 862, dtype: object

In [32]:
df.to_csv(csv_file_path)

# Build metadata from generated items

In [4]:
csv_file_path = "data/metadata.csv"
gen_df = pd.DataFrame(columns=["file_name", "prompt"])
for i in range(0, 860, 10):
    try:
        with open("./labeler_outputs/dump_{}_{}.json".format(i, i+10), "rb") as json_file:
            file_content = json_file.read()
            json_content = json.loads(file_content)
            new_rows = json_content["items"]
            new_rows = new_rows[:10] # i've noticed that claude sometimes hallucinates extra items
            sub_df = pd.DataFrame(new_rows, columns=["file_name", "prompt"], index=range(i, i+len(new_rows)))
            gen_df = pd.concat([gen_df, sub_df])
    except Exception as e:
        if isinstance(e, FileNotFoundError):
            print("couldn't find ./labeler_outputs/dump_{}_{}.json".format(i, i+10))
        else:
            print(e)

In [5]:
gen_df

Unnamed: 0,file_name,prompt
0,C-YOhrjRuO9_0.jpg,four colorful bunnies playing in a forest
1,C-YOhrjRuO9_1.jpg,colorful animals sitting around a table with v...
2,C-YOhrjRuO9_2.jpg,blue bunny standing alone on a hill while othe...
3,C-YOhrjRuO9_3.jpg,blue bunny looking at the moon in a night sky
4,C-lEu5TRo8u_0.jpg,bear wearing a sun hat holding a red purse in ...
...,...,...
853,turtle4.jpg,green turtle eating brownie at cafeteria with ...
854,turtle5.jpg,green turtle with animal friends playing baske...
855,turtle6.jpg,green turtle watching airplane fly in blue sky
856,turtle7.jpg,green turtle on swing with fox and rabbit hold...


In [6]:
missing_values = []
fnames = sorted([fname for fname in os.listdir("./data") if ".jpg" in fname])
for fname in fnames:
    if fname not in list(gen_df["file_name"]):
        missing_values.append({"file_name": fname, "prompt": "incomplete"})

print(missing_values)
for missing_value in missing_values:
    os.remove("./data/" + missing_value["file_name"])

[]


In [7]:
gen_df

Unnamed: 0,file_name,prompt
0,C-YOhrjRuO9_0.jpg,four colorful bunnies playing in a forest
1,C-YOhrjRuO9_1.jpg,colorful animals sitting around a table with v...
2,C-YOhrjRuO9_2.jpg,blue bunny standing alone on a hill while othe...
3,C-YOhrjRuO9_3.jpg,blue bunny looking at the moon in a night sky
4,C-lEu5TRo8u_0.jpg,bear wearing a sun hat holding a red purse in ...
...,...,...
853,turtle4.jpg,green turtle eating brownie at cafeteria with ...
854,turtle5.jpg,green turtle with animal friends playing baske...
855,turtle6.jpg,green turtle watching airplane fly in blue sky
856,turtle7.jpg,green turtle on swing with fox and rabbit hold...


In [8]:
# manual changes necessary due to hallucinations
gen_df["prompt"][29] = "two children bundled up in winter clothing standing next to cat snowman in snowy forest with sunbeams"
gen_df["prompt"][30] = "two small cartoon characters waving goodbye in snowy landscape"
gen_df["prompt"][34] = "smiling cat cloud in sky with two small characters below"
gen_df["prompt"][35] = "two small characters shoveling snow in front of house"
gen_df["prompt"][36] = "group of small animal characters eating dinner together"
gen_df["prompt"][37] = "santa character with small animal characters around christmas tree"
gen_df["prompt"][38] = "family of bear characters walking in front of decorated house"
gen_df["prompt"][39] = "rabbit and spotted cat sitting on colorful patchwork quilt"
gen_df["prompt"][43] = "elephant wearing hat and two cats standing under striped awning"
gen_df["prompt"][52] = "yellow and blue cat faces peeking out from green background"
gen_df["prompt"][53] = "three colorful cats sleeping together in cozy bedroom"
gen_df["prompt"][54] = "blue and yellow cats floating in colorful abstract background"
gen_df["prompt"][55] = "two orange foxes in colorful forest with yellow spotlight"
gen_df["prompt"][56] = "two orange foxes shining flashlights on their faces in colorful forest"
gen_df["prompt"][57] = "two orange foxes holding microphones in blue forest"
gen_df["prompt"][58] = "two happy orange foxes jumping in green background with yellow lights"
gen_df["prompt"][59] = "smirking yellow cat with red background"
gen_df["prompt"][62] = "blue and yellow cats hugging with orange background"
gen_df["prompt"][64] = "blue and yellow cats walking through colorful flower patch with their tails forming a heart shape"
gen_df["prompt"][65] = "blue and yelllow cats sitting together on a couch"
gen_df["prompt"][66] = "blue and yellow cats dancing in front of a red house"
gen_df["prompt"][67] = "blue and yellow cats sitting together on a ledge"
gen_df["prompt"][68] = "green creature holding a pencil with stars in the background"
gen_df["prompt"][69] = "green cat holding a pencil with sunset background"
gen_df["prompt"][83] = "gray creature holding smaller green creature while sitting on sofa chair"
gen_df["prompt"][92] = "grey creature and brown creature wearing winter clothings walking into a warmly lit room"
gen_df["prompt"][93] = "cat and bear sitting at table by arched window"
gen_df["prompt"][95] = "small monkey in chair under blanket dreaming of cookie"
gen_df["prompt"][96] = "small character in winter clothes walking through snowy driveway"
gen_df["prompt"][99] = "child characters waving excitedly to bigger character holding cookie"
gen_df["prompt"][114] = "orange cat talking on phone with teary eyes"
gen_df["prompt"][115] = "orange cat answering phone call in colorful bedroom"
gen_df["prompt"][116] = "orange cat waving from window at night with stars"
gen_df["prompt"][117] = "orange cat looking up at starry night sky with phone cord"
gen_df["prompt"][118] = "white cat sitting on bench dreaming of sunset over village"
gen_df["prompt"][119] = "grey cat sitting on picnic blanket in forest dreaming of holding hands"
gen_df["prompt"][132] = "yellow and blue cat-like creatures touching paws in clouds"
gen_df["prompt"][133] = "yellow and blue cat-like creatures playing under moonlight"
gen_df["prompt"][134] = "small orange creature standing in magical forest with glowing lights"
gen_df["prompt"][135] = "small orange creature in forest with sunbeams and glowing lights"
gen_df["prompt"][136] = "small orange creature looking at shadowy figure in mirror in forest"
gen_df["prompt"][137] = "small orange creature looking at its reflection in ornate mirror in forest"
gen_df["prompt"][138] = "two small orange creatures sitting in magical forest at night"
gen_df["prompt"][141] = "small orange creature sitting on swing with box in hands"
gen_df["prompt"][160] = "turtle character sitting kitchen with cup of coffee and kettle"
gen_df["prompt"][161] = "turtle character carrying wooden canoe in front of house"
gen_df["prompt"][162] = "turtle in boat in river just past a wooden dock surrounded by trees"
gen_df["prompt"][163] = "turtle character relaxing in boat on turquoise waters"
gen_df["prompt"][164] = "orange cat wearing sunglasses and headphones sitting on steps"
gen_df["prompt"][165] = "yellow and blue cats sleeping under floral patterned blanket"
gen_df["prompt"][166] = "orange squirrel holding coffee cup in sunny garden"
gen_df["prompt"][167] = "three cats laughing and eating at table with mugs and plates"
gen_df["prompt"][168] = "pink blob character standing in spotlight on stage"
gen_df["prompt"][169] = "pink blob character and three cat characters standing on grass in park"
gen_df["prompt"][205] = "two orange cats sitting on grass under a colorful sky"
gen_df["prompt"][206] = "two brown bears sledding down a snowy hill under a starry sky"
gen_df["prompt"][207] = "two brown bears in a bathtub filled with bubbles and toy fish"
gen_df["prompt"][208] = "two brown bears reading under blanket fort with lamp"
gen_df["prompt"][209] = "large brown bear and three small brown bear characters playing with pots and pans in room"
gen_df["prompt"][232] = "yellow house with face looking thoughtful against colorful background"
gen_df["prompt"][233] = "small house on a hill with orange sky and winking cloud"
gen_df["prompt"][272] = "cat and turtle sitting with shopping bags and takeout food"
gen_df["prompt"][295] = "two brown bears hugging with smaller bear sleeping"
gen_df["prompt"][296] = "small orange bear sleeping on orange and yellow stairs"
gen_df["prompt"][297] = "large orange bear hugging small orange bear in bed at night with stars outside window"
gen_df["prompt"][298] = "small orange bear sleeping in blue bed with stars visible through window"
gen_df["prompt"][299] = "blue and yellow cats sidding on bench under starry night"
gen_df["prompt"][302] = "blue cat and orange cat holding hands in a grassy field with clouds in sky background"
gen_df["prompt"][303] = "blue cat and orange cat holding hands in a flowery meadow"
gen_df["prompt"][304] = "yellow and blue cats dancing together on a rock in a lake"
gen_df["prompt"][305] = "sad blue starfish sitting on rocks in a purple ocean"
gen_df["prompt"][306] = "white starfish cooking soup in a cozy kitchen"
gen_df["prompt"][307] = "purple starfish running on a pier at sunset"
gen_df["prompt"][308] = "white starfish walking with ducklings in a spring meadow"
gen_df["prompt"][309] = "three white star characters skating in a park"
gen_df["prompt"][317] = "two orange monkeys looking at plant with heart in pot against yellow background"
gen_df["prompt"][318] = "orange monkey standing at bathroom sink with yellow background"
gen_df["prompt"][319] = "orange monkey character with green sprout on head"
gen_df["prompt"][324] = "green frog waving from doorway of mushroom house in flowery garden"
gen_df["prompt"][325] = "green frog leaping over tall grass with colorful sky"
gen_df["prompt"][326] = "green frog jumping joyfully over small bridge with pink sky"
gen_df["prompt"][327] = "pink and purple striped creature with antennae on green background"
gen_df["prompt"][328] = "yellow butterfly with colorful wings on green background"
gen_df["prompt"][329] = "orange bug with striped body on blue background"
gen_df["prompt"][343] = "small gray and white animal wearing red bandana standing in grass"
gen_df["prompt"][344] = "small gray bear and white dog with red bandana in grassy field"
gen_df["prompt"][346] = "three white rabbits sitting in colorful car with belonging scattered below them"
gen_df["prompt"][347] = "orange van with two animals looking out window in countryside"
gen_df["prompt"][348] = "smiling cow holding flower in field with other cows and kite"
gen_df["prompt"][349] = "three white rabbits sitting and waving from back of car"
gen_df["prompt"][376] = "two orange characters playing with pool noodles with palm trees in background"
gen_df["prompt"][381] = "close up of two green frog faces leaning on each other and stars above"
gen_df["prompt"][385] = "two small characters hugging surrounded by colorful stars and swirls"
gen_df["prompt"][386] = "two brown monkeys poking each other on yellow and white checkered background"
gen_df["prompt"][394] = "cheerful flower character next to angry burning candle character with orange background"
gen_df["prompt"][395] = "smilling blue character cutting hair of fiery burning candle character"
gen_df["prompt"][402] = "yellow owl sitting on doorstep with ladybug and wind chime in garden"
gen_df["prompt"][403] = "yellow owl-like creature serving coffee to sleepy yellow creature at counter"
gen_df["prompt"][443] = "small rabbit looking at reflection in blue pond in grassy field"
gen_df["prompt"][453] = "blue cat wearing scarf and yellow cat standing together"
gen_df["prompt"][454] = "blue cat and yellow cat listening to music together on picnic blanket"
gen_df["prompt"][455] = "blue cat lifting yellow cat to reach fruit on tree"
gen_df["prompt"][456] = "small creature in orange suit under colorful explosion in sky"
gen_df["prompt"][457] = "two small creatures watching colorful explosion in sky"
gen_df["prompt"][458] = "five colorful creatures watching explosion of stars in sky"
gen_df["prompt"][459] = "huge band of alien creatures dancing in a circle on glowing green background"
gen_df["prompt"][470] = "moose with antlers walks into kitchen while monkey character stirs pot"
gen_df["prompt"][488] = "small orange monkey sleeping in bed with stuffed animals"
gen_df["prompt"][489] = "three cartoon characters in halloween outfits standing together"
gen_df["prompt"][490] = "three cartoon characters in halloween outfits jumping with joy"
gen_df["prompt"][491] = "three cartoon characters in halloween outfits walking out the door"
gen_df["prompt"][492] = "three cartoon characters dressed in halloween outfits standing by mailbox"
gen_df["prompt"][493] = "yellow cat and blue cat in autumn clothing getting ready to enter haunted house"
gen_df["prompt"][494] = "two cartoon cats scared in a haunted house with a mummy"
gen_df["prompt"][498] = "two cartoon cats hugging scared in a graveyard"
gen_df["prompt"][499] = "two cartoon cats laughing and running through a haunted house"
gen_df["prompt"][504] = "two small characters in halloween costume taking candy from bowl guarded by skeleton"
gen_df["prompt"][505] = "small skeleton and vampire characters spilling colorful candy"
gen_df["prompt"][506] = "small characters watching halloween movie and eating candy"
gen_df["prompt"][507] = "small bear characters in cozy home entrance"
gen_df["prompt"][508] = "small bear characters watching tv and eating popcorn"
gen_df["prompt"][509] = "small bear with green backpack hiking through forest"
gen_df["prompt"][518] = "blue cat with leafy head in kitchen holding red bowl"
gen_df["prompt"][519] = "blue cat with leafy head washing dishes in kitchen sink"
gen_df["prompt"][523] = "white dog sleeping in bed with snake while lamp glows on nightstand"
gen_df["prompt"][524] = "white dog tangled with snake while other animals watch in colorful background"
gen_df["prompt"][525] = "white dog holding snake in kitchen with floral wallpaper"
gen_df["prompt"][526] = "white dog petting snake on steps at night with stars and moon"
gen_df["prompt"][527] = "sunflower character crying under cracked red heart in colorful background"
gen_df["prompt"][528] = "sunflower character standing under red heart in colorful background"
gen_df["prompt"][529] = "colorful flower creature with yellow petals and green body surrounded by vibrant flowers"
gen_df["prompt"][530] = "colorful flower creature with yellow petals surrounded by small creatures and flowers"
gen_df["prompt"][531] = "colorful flower creature with yellow petals surrounded by small creatures and flowers"
gen_df["prompt"][536] = "black cat with orange ears in green grass"
gen_df["prompt"][537] = "black cat standing in puddle surrounded by green foliage"
gen_df["prompt"][538] = "black cat dancing in rain with green foliage background"
gen_df["prompt"][539] = "blue cat and orange cat in bed with quilted cover reading books"
gen_df["prompt"][541] = "yellow cat wearing hoodie opening door to blue cat wearing scarf holding food and coffee"
gen_df["prompt"][542] = "yellow cat and blue cat riding bicycles in autumn scenery"
gen_df["prompt"][543] = "blue cat and yellow cat playing guitar by campfire"
gen_df["prompt"][544] = "blue cat watching yellow cat rake leaves in yard into heart shape"
gen_df["prompt"][545] = "two small characters making snow angels on snowy colorful abstract surface"
gen_df["prompt"][547] = "two small characters looking up into sky standing on colorful abstract snow"
gen_df["prompt"][548] = "abstract winged creature with multiple eyes and colorful swirling patterns"
gen_df["prompt"][549] = "blue rabbit sitting at desk writing note"
gen_df["prompt"][550] = "blue rabbit putting note into bottle next to bowl of fruit on table"
gen_df["prompt"][551] = "blue rabbit walking on beach at night holding green bottle with moonlight reflecting on water"
gen_df["prompt"][553] = "green bottle floating in blue ocean waves"
gen_df["prompt"][554] = "green bottle floating in stormy ocean waves"
gen_df["prompt"][555] = "cartoon animals eating dinner together at table with colorful food"
gen_df["prompt"][556] = "small animal walking in orange cape under bright sun and swirling clouds"
gen_df["prompt"][557] = "small animal in orange cape sitting on log over waterfall"
gen_df["prompt"][558] = "small animal curled up under orange tree with flowers blooming"
gen_df["prompt"][559] = "small animal in orange cape walking with lantern into forest at night"
gen_df["prompt"][562] = "two mice standing in doorway of tree house"
gen_df["prompt"][564] = "mouse and mouse wearing winter clothes sitting under starry night sky"
gen_df["prompt"][564] = "smiling mouse with big ears on yellow background"
gen_df["prompt"][574] = "two small monkeys on orange sled about to go down white snowy slope"
gen_df["prompt"][575] = "two small monkeys flying through air on orange sled over snow"
gen_df["prompt"][576] = "two small monkeys sledding down snowy hill on orange sled"
gen_df["prompt"][577] = "two small monkeys flying off orange sled into snow"
gen_df["prompt"][578] = "two small monkeys buried in snow with orange sled nearby"
gen_df["prompt"][579] = "two small monkeys towing orange sled back up snowy hill"
gen_df["prompt"][582] = "small yellow lion creature and fluffy white animal in colorful forest"
gen_df["prompt"][583] = "small yellow lion creature dancing on patch of grass surrounded by snow and snow covered trees"
gen_df["prompt"][584] = "small yellow lion creature surrounded by animal friends around campfire"
gen_df["prompt"][585] = "small yellow creature with orange mane jumping in colorful clouds"
gen_df["prompt"][586] = "small creature wearing winter clothes sitting on bench in colorful city at night"
gen_df["prompt"][587] = "group of small animals gathered around table with birthday cake"
gen_df["prompt"][588] = "small yellow creature sitting on windowsill looking at sunset"
gen_df["prompt"][589] = "two yellow and orange dogs holding hands and walking down path in park"
gen_df["prompt"][602] = "small white rabbit carrying shield and sword and walking towards treehouse lit by lantern"
gen_df["prompt"][613] = "cat wizard and frog by pond with basket of oranges"
gen_df["prompt"][614] = "cat in blue robe sitting at table with yarn ball in cozy room"
gen_df["prompt"][615] = "wizard cat greeted by a group of animal characters at front door"
gen_df["prompt"][620] = "two orange kangaroos playing on a swing set in a park"
gen_df["prompt"][621] = "parent kangaroo bathing baby rabbit in a bubble bath"
gen_df["prompt"][627] = "yellow rabbit character dancing in a green forest clearing"
gen_df["prompt"][628] = "happy panda character dancing in a green meadow"
gen_df["prompt"][629] = "blue bear character sitting at quilted table holding cookies"
gen_df["prompt"][639] = "orange cat listening to spotted animal character playing guitar near brick wall"
gen_df["prompt"][649] = 'three small animals laying together on green grass'
gen_df["prompt"][672] = 'white rabbit reading red book while orange fox sits next to it in car'
gen_df["prompt"][676] = 'white bear in blue apron making pottery on yellow background'
gen_df["prompt"][681] = 'orange dog wearing headphones sitting in lounge chair with green blanket reading book by window'
gen_df["prompt"][682] = 'four colorful dogs sitting together watching sunset over mountains' 
gen_df["prompt"][683] = 'three smiling flower characters dancing together in sunny garden'
gen_df["prompt"][684] = 'smiling daisy character walking on path towards rainbow over hills'
gen_df["prompt"][685] = 'small yellow sunflower growing from sidewalk in front of brick wall'
gen_df["prompt"][686] = 'flower character relaxing on lounge chair in garden with watering can'
gen_df["prompt"][687] = 'small orange monkey in chef hat cooking at stove in kitchen'
gen_df["prompt"][688] = 'orange monkey chef flipping toast in kitchen at night'
gen_df["prompt"][689] = 'small orange monkey in chef hat making sandwich near stove in kitchen'
gen_df["prompt"][690] = 'small orange monkey eating sandwich at a table with a bowl of soup'
gen_df["prompt"][701] = 'small white character with colorful scarf sitting in orange boat on blue water watching boat in distance'
gen_df["prompt"][727] = 'cute gray cat character with purple bowtie holding yellow paper on red background'
gen_df["prompt"][730] = 'blue and yellow cat washing dishes together over sink'
gen_df["prompt"][731] = 'blue and yellow cat playing with sticks in a grassy field'
gen_df["prompt"][741] = 'orange cat on checkered floor and brown bear on phone with red circle'
gen_df["prompt"][742] = 'bear sitting reading book with fox standing nearby in nature scene'
gen_df["prompt"][743] = 'small fox and bear sitting in stormy landscape with colorful sky'
gen_df["prompt"][744] = 'bear carrying wooden chest with fox helping in green forest'
gen_df["prompt"][745] = 'bear in bed with nightcap reading book and tea on nightstand'
gen_df["prompt"][746] = 'bear in bed with nightcap and book with lightning outside window'
gen_df["prompt"][747] = 'bear peeking through rainy window with water droplets'
gen_df["prompt"][748] = 'bear sleeping in bed with nightcap and rainy window'
gen_df["prompt"][749] = 'white cat with orange cape reaching up to sea serpent monster emerging from stormy ocean waves'
gen_df["prompt"][771] = "brown squirrel character lying on picnic blanket in park doodling on paper"
gen_df["prompt"][772] = 'small orange monkey watering plant with yellow pot'
gen_df["prompt"][773] = 'small orange monkey drinking from cup while standing on stool'
gen_df["prompt"][774] = 'small orange monkey holding cup next to potted plant'
gen_df["prompt"][775] = 'small orange monkey drinking from cup while standing next to plant'
gen_df["prompt"][816] = 'orange cat standing in yellow autumn forest with broken bicycle'
gen_df["prompt"][817] = 'small orange cat crying and running towards large striped cat'
gen_df["prompt"][818] = 'large cat comforting injured small cat with first aid kit'
gen_df["prompt"][819] = 'large cat and small cat character smiling and eating cookies at kitchen table'
gen_df["prompt"][821] = 'yellow creature reading a book by a fireplace surrounded by other small creatures'
gen_df["prompt"][822] = 'blue and orange cats reading books on an orange plaid blanket'
gen_df["prompt"][823] = 'blue and yellow cats cooking together in a cozy kitchen'
gen_df["prompt"][824] = 'yellow cat opening door for blue cat carrying coffee and pastries'
gen_df["prompt"][825] = 'blue and yellow cats riding bicycles through autumn leaves'
gen_df["prompt"][826] = 'yellow cat raking leaves into heart shape while blue cat watches from inside'
gen_df["prompt"][827] = 'blue and yellow cats playing guitar by campfire at night'
gen_df["prompt"][828] = 'colorful alien creatures sharing love in beam of light'
gen_df["prompt"][829] = 'green alien and pink creature with antennae facing each other against bright background'
gen_df["prompt"][830] = 'pink creature reaching into green alien creature heart glowing beam'


# delete row 63
# delete row 253
# delete row 723

In [9]:
def add_annalaura_suffix(prompt):
    return prompt + " in an annalaura watercolor drawing style"

gen_df["prompt"] = gen_df["prompt"].apply(lambda x: add_annalaura_suffix(x))
gen_df

Unnamed: 0,file_name,prompt
0,C-YOhrjRuO9_0.jpg,four colorful bunnies playing in a forest in a...
1,C-YOhrjRuO9_1.jpg,colorful animals sitting around a table with v...
2,C-YOhrjRuO9_2.jpg,blue bunny standing alone on a hill while othe...
3,C-YOhrjRuO9_3.jpg,blue bunny looking at the moon in a night sky ...
4,C-lEu5TRo8u_0.jpg,bear wearing a sun hat holding a red purse in ...
...,...,...
853,turtle4.jpg,green turtle eating brownie at cafeteria with ...
854,turtle5.jpg,green turtle with animal friends playing baske...
855,turtle6.jpg,green turtle watching airplane fly in blue sky...
856,turtle7.jpg,green turtle on swing with fox and rabbit hold...


In [10]:
gen_df.to_csv(csv_file_path)