In [None]:
print("HELO")

In [None]:
from icecream import ic
import numpy as np
import rp
import torch
import torch.nn as nn
import source.stable_diffusion as sd
from easydict import EasyDict
from source.learnable_textures import (LearnableImageFourier,
                                       LearnableImageFourierBilateral,
                                       LearnableImageRaster,
                                       LearnableImageRasterBilateral,
                                       LearnableTexturePackFourier,
                                       LearnableTexturePackRaster)

In [None]:
if 's' not in dir():
    model_name="CompVis/stable-diffusion-v1-4"
    gpu='cuda:1'
    s=sd.StableDiffusion(gpu,model_name)
device=s.device

In [None]:
class BaseLabel:
    def __init__(self, name:str, embedding:torch.Tensor):
        #Later on we might have more sophisticated embeddings, such as averaging multiple prompts
        #We also might have associated colors for visualization, or relations between labels
        self.name=name
        self.embedding=embedding
        
    def get_sample_image(self):
        with torch.no_grad():
            output=s.embeddings_to_imgs(self.embedding)[0]
        assert rp.is_image(output)
        return output
            
    def __repr__(self):
        return '%s(name=%s)'%(type(self).__name__,self.name)
        
class SimpleLabel(BaseLabel):
    def __init__(self, name:str):
        super().__init__(name, s.get_text_embeddings(name).to(device))

In [None]:
#ONLY GOOD PROMPTS HERE
# prompt_w = 'A cute kitten in a cardboard box in times square'
# prompt_w = 'The Legend of Zelda landscape atmospheric, hyper realistic, 8k, epic composition, cinematic, octane render, artstation landscape vista photography by Carr Clifton & Galen Rowell, 16K resolution, Landscape veduta photo by Dustin Lefevre & tdraw, 8k resolution, detailed landscape painting by Ivan Shishkin, DeviantArt, Flickr, rendered in Enscape, Miyazaki, Nausicaa Ghibli, Breath of The Wild, 4k detailed post processing, artstation, rendering by octane, unreal engine —ar 16:9'
# prompt_w = 'ultra realistic photo portrait of Scarlett Leithold cosmic energy, colorful, painting burst, beautiful symmetrical face, nonchalant kind look, realistic round eyes, tone mapped, intricate, elegant, highly detailed, digital painting, artstation, concept art, smooth, sharp focus, illustration, dreamy magical atmosphere, art by artgerm and greg rutkowski and alphonse mucha, 4k, 8k'
# prompt_w = 'Insanely detailed studio portrait shot photo of intricately detailed beautiful yorkshire terrier dressed as santa clause, smirking mischievously at the camera with mischievous detailed yellow green eyes , very detailed, rim light, photo, rim light, ultra-realistic, photorealistic, hyper detailed, photography, shot on Canon DSLR, f/2. 8 , photography by Felix Kunze and Annie Leibovitz and retouched by Pratik Naik'
# prompt_w = 'professional portrait photograph of a gorgeous Norwegian girl in winter clothing with long wavy blonde hair, freckles, gorgeous symmetrical face, cute natural makeup, wearing elegant warm winter fashion clothing, ((standing outside in snowy city street)), mid shot, central image composition, (((professionally color graded))), (((bright soft diffused light)))'
# prompt_w = '8 k concept art from a hindu temple lost in the jungle by david mattingly and samuel araya and michael whelan and dave mckean and richard corben. realistic matte painting with photorealistic hdr volumetric lighting. composition and layout inspired by gregory crewdson. '
# prompt_w = 'a big sailing ship in heavy sea, hypermaximalistic, high details, cinematic, 8k resolution, beautiful detailed, insanely intricate details, artstation trending, octane render, unreal engine'
# prompt_w = 'giant standalone lighthouse from bioshock infinite in england 1 9 century, half - ruined, covered by mold, staying in 2 kilometers far from a coast, opposite the dark cave - crack of giant rocks. when you see this lighthouse it makes you anxious. deep ones is living under this. view from sea, and view from the coast, by greg rutkowski'
# prompt_w = 'photo of bunny hugging another bunny, dramatic light, pale sunrise, cinematic lighting'
# prompt_w = 'thomas the tank engine as a military tank, intricate, highly detailed, centered, digital painting, artstation, concept art, smooth, sharp focus, illustration, artgerm, tomasz alen kopera, peter mohrbacher, donato giancola, joseph christian leyendecker, wlop, boris vallejo'
# prompt_w = 'a wolf with a tail, standing heroically on a rock. adventurous, new adventure, with a tail, forest, rocks, stream, ripples, tribal armor, female, wolf wolf wolf, atmospheric lighting, stunning, brave. by makoto shinkai, stanley artgerm lau, wlop, rossdraws, james jean, andrei riabovitchev, marc simonetti, krenz cushart, sakimichan, d & d trending on artstation, digital art. '
prompt_w = 'lolita dress, angelic pretty, award winning photograph trending on artstation'
# prompt_w = 'lolita dress, angelic pretty, portrait of magical lolita woman elf elven,  hyperrealism photography hdr 4k 3d, dreamy and ethereal, fantasy, intricate, elegant, many rainbow bubbles, rose tones, highly detailed, artstation, concept art, cyberpunk wearing, smooth, sharp focus, illustration, art by artgerm and greg rutkowskiand alphonse mucha'
# prompt_w = 'an intricate detailed hb pencil sketch of a giraffe head'
# prompt_w = 'an intricate detailed hb pencil sketch of a penguin'
# prompt_w = 'an intricate detailed hb pencil sketch of a violin'
# prompt_w = 'an orca whale spouting water intricate detailed hb pencil sketch of an black white spotted orca whale'
# prompt_w = 'an intricate detailed hb pencil sketch of a black white spotted cow'
# prompt_w = 'an intricate detailed hb pencil sketch of a walrus'
# prompt_w = 'an sketch of a cat head'
# prompt_w = 'a sketch of a penguin'

In [None]:
#ONLY GOOD PROMPTS HERE
good_prompts = EasyDict(
    kitten_in_box = 'A orange cute kitten in a cardboard box in times square',
    botw_landscape = 'The Legend of Zelda landscape atmospheric, hyper realistic, 8k, epic composition, cinematic, octane render, artstation landscape vista photography by Carr Clifton & Galen Rowell, 16K resolution, Landscape veduta photo by Dustin Lefevre & tdraw, 8k resolution, detailed landscape painting by Ivan Shishkin, DeviantArt, Flickr, rendered in Enscape, Miyazaki, Nausicaa Ghibli, Breath of The Wild, 4k detailed post processing, artstation, rendering by octane, unreal engine —ar 16:9',
    magic_emma_watson = 'ultra realistic photo portrait of Emma Watson cosmic energy, colorful, painting burst, beautiful symmetrical face, nonchalant kind look, realistic round eyes, tone mapped, intricate, elegant, highly detailed, digital painting, artstation, concept art, smooth, sharp focus, illustration, dreamy magical atmosphere, art by artgerm and greg rutkowski and alphonse mucha, 4k, 8k',
    yorkshire_terrier_santa = 'Insanely detailed studio portrait shot photo of intricately detailed beautiful yorkshire terrier dressed as santa clause, smirking mischievously at the camera with mischievous detailed yellow green eyes , very detailed, rim light, photo, rim light, ultra-realistic, photorealistic, hyper detailed, photography, shot on Canon DSLR, f/2. 8 , photography by Felix Kunze and Annie Leibovitz and retouched by Pratik Naik',
    norwegian_winter_girl = 'professional portrait photograph of a gorgeous Norwegian girl in winter clothing with long wavy blonde hair, freckles, gorgeous symmetrical face, cute natural makeup, wearing elegant warm winter fashion clothing, ((standing outside in snowy city street)), mid shot, central image composition, (((professionally color graded))), (((bright soft diffused light)))',
    hindu_temple = '8 k concept art from a hindu temple lost in the jungle by david mattingly and samuel araya and michael whelan and dave mckean and richard corben. realistic matte painting with photorealistic hdr volumetric lighting. composition and layout inspired by gregory crewdson. ',
    sailing_ship = 'a big sailing ship in heavy sea, hypermaximalistic, high details, cinematic, 8k resolution, beautiful detailed, insanely intricate details, artstation trending, octane render, unreal engine',
    bioshock_lighthouse = 'giant standalone lighthouse from bioshock infinite in england 1 9 century, half - ruined, covered by mold, staying in 2 kilometers far from a coast, opposite the dark cave - crack of giant rocks. when you see this lighthouse it makes you anxious. deep ones is living under this. view from sea, and view from the coast, by greg rutkowski',
    two_bunny_hug = 'photo of bunny hugging another bunny, dramatic light, pale sunrise, cinematic lighting',
    thomas_tank_military = 'thomas the tank engine as a military tank, intricate, highly detailed, centered, digital painting, artstation, concept art, smooth, sharp focus, illustration, artgerm, tomasz alen kopera, peter mohrbacher, donato giancola, joseph christian leyendecker, wlop, boris vallejo',
    wolf_on_rock = 'a wolf with a tail, standing heroically on a rock. adventurous, new adventure, with a tail, forest, rocks, stream, ripples, tribal armor, female, wolf wolf wolf, atmospheric lighting, stunning, brave. by makoto shinkai, stanley artgerm lau, wlop, rossdraws, james jean, andrei riabovitchev, marc simonetti, krenz cushart, sakimichan, d & d trending on artstation, digital art. ',
    lolita_dress_girl = 'lolita dress, angelic pretty, award winning photograph trending on artstation',
    lolita_dress_magical_elf = 'lolita dress, angelic pretty, portrait of magical lolita woman elf elven,  hyperrealism photography hdr 4k 3d, dreamy and ethereal, fantasy, intricate, elegant, many rainbow bubbles, rose tones, highly detailed, artstation, concept art, cyberpunk wearing, smooth, sharp focus, illustration, art by artgerm and greg rutkowskiand alphonse mucha',
    pencil_giraffe_head = 'an intricate detailed hb pencil sketch of a giraffe head',
    pencil_penguin = 'an intricate detailed hb pencil sketch of a penguin',
    pencil_violin = 'an intricate detailed hb pencil sketch of a violin',
    pencil_orca_whale = 'an orca whale spouting water intricate detailed hb pencil sketch of an black white spotted orca whale',
    pencil_cow = 'an intricate detailed hb pencil sketch of a black white spotted cow',
    pencil_walrus = 'an intricate detailed hb pencil sketch of a walrus',
    pencil_cat_head = 'an sketch of a cat head',
)

In [None]:
#ONLY GOOD PROMPTS HERE
prompt_w = good_prompts.lolita_dress_girl
prompt_y = good_prompts.kitten_in_box
prompt_x = good_prompts.wolf_on_rock
prompt_z = good_prompts.sailing_ship
prompt_a = good_prompts.hindu_temple

In [None]:
s.uncond_text=prompt_a
label_w = SimpleLabel(prompt_w)
label_x = SimpleLabel(prompt_x)
label_y = SimpleLabel(prompt_y)
label_z = SimpleLabel(prompt_z)

s.uncond_text=''
label_a = SimpleLabel(prompt_a)

In [None]:
print("Factors")
rp.display_image(label_w.get_sample_image())
rp.display_image(label_x.get_sample_image())
rp.display_image(label_y.get_sample_image())
rp.display_image(label_z.get_sample_image())
print("Products")
rp.display_image(label_a.get_sample_image())

In [None]:
#Mixing 4->1

#Random timestep vs this scheduleShould be faster.

#Select Learnable Image Type:
learnable_image_maker = lambda:LearnableImageFourier().to(s.device)
# learnable_image_maker = lambda:LearnableImageFourier(height=512,width=512,num_features=512,hidden_dim=512,scale=20).to(s.device)

learnable_image_w=learnable_image_maker()
learnable_image_x=learnable_image_maker()
learnable_image_y=learnable_image_maker()
learnable_image_z=learnable_image_maker()
learnable_image_a=lambda:torch.clamp(
     4*learnable_image_w()*learnable_image_x()*learnable_image_y()*learnable_image_z(),
    min=0,max=1
)
from itertools import chain

params=chain(
    learnable_image_w.parameters(),
    learnable_image_x.parameters(),
    learnable_image_y.parameters(),
    learnable_image_z.parameters(),
)
optim=torch.optim.SGD(params,lr=1e-4)

In [None]:
labels=[label_w,label_x,label_y,label_z,label_a]
learnable_images=[learnable_image_w,learnable_image_x,learnable_image_y,learnable_image_z,learnable_image_a]
weights=[1,1,1,1,1]

weights=rp.as_numpy_array(weights)
weights=weights/weights.sum()
weights=weights*4

In [None]:
NUM_ITER=100000
s.max_step=MAX_STEP=990
# s.min_step=MIN_STEP=450
s.min_step=MIN_STEP=10

et=rp.eta(NUM_ITER)

# folder='sd_previewer_results2/'+prompt[:100]+rp.random_namespace_hash()
# rp.make_folder(folder)

for iter_num in range(NUM_ITER):
    
    step = rp.blend(MAX_STEP,MIN_STEP,iter_num/NUM_ITER)
    # s.min_step = s.max_step = int(step)
    
    et(iter_num)

    # image=learnable_image()
    # variants=list(get_variants(image,label))
    # num_variants=len(variants)
    
    preds=[]
    for label,learnable_image,weight in zip(labels,learnable_images,weights):
        pred=s.train_step(
            label.embedding,
            learnable_image()[None],

            #PRESETS (uncomment one):
            noise_coef=.1*weight,guidance_scale=50,#10
            # noise_coef=0,image_coef=-.01,guidance_scale=50,
            # noise_coef=0,image_coef=-.005,guidance_scale=50,
            # noise_coef=.1,image_coef=-.010,guidance_scale=50,
            # noise_coef=.1,image_coef=-.005,guidance_scale=50,
            # noise_coef=.1/num_variants, image_coef=-.005/num_variants, guidance_scale=50,
        )
        preds+=list(pred)

    with torch.no_grad():
        # if not iter_num%20:
        if not iter_num%200:
            im=rp.tiled_images([
                rp.as_numpy_image(learnable_image_w()),
                rp.as_numpy_image(learnable_image_x()),
                rp.as_numpy_image(learnable_image_y()),
                rp.as_numpy_image(learnable_image_z()),
                rp.as_numpy_image(learnable_image_a()),
            ])
            # rp.save_image(im,folder+'/%06i.png'%iter_num)
            rp.display_image(im)
        # if not iter_num%60:
        #     gim=rp.tiled_images(rp.as_numpy_images(s.decode_latents(pred)))
        #     rp.display_image(gim)
        #     rp.save_image(gim,folder+'/G%06i.png'%iter_num)

    optim.step()
    optim.zero_grad()

In [None]:
labels=[label_w,label_x,label_y,label_z,label_a]
learnable_images=[learnable_image_w,learnable_image_x,learnable_image_y,learnable_image_z,learnable_image_a]
weights=[1,1,1,1,1]

weights=rp.as_numpy_array(weights)
weights=weights/weights.sum()
weights=weights*4


NUM_ITER=100000
s.max_step=MAX_STEP=990
# s.min_step=MIN_STEP=450
s.min_step=MIN_STEP=10

et=rp.eta(NUM_ITER)

# folder='sd_previewer_results2/'+prompt[:100]+rp.random_namespace_hash()
# rp.make_folder(folder)

for iter_num in range(NUM_ITER):
    
    step = rp.blend(MAX_STEP,MIN_STEP,iter_num/NUM_ITER)
    # s.min_step = s.max_step = int(step)
    
    et(iter_num)

    # image=learnable_image()
    # variants=list(get_variants(image,label))
    # num_variants=len(variants)
    
    preds=[]
    for label,learnable_image,weight in rp.random_batch(list(zip(labels,learnable_images,weights)),1):
        pred=s.train_step(
            label.embedding,
            learnable_image()[None],

            #PRESETS (uncomment one):
            noise_coef=.1*weight,guidance_scale=50,#10
            # noise_coef=0,image_coef=-.01,guidance_scale=50,
            # noise_coef=0,image_coef=-.005,guidance_scale=50,
            # noise_coef=.1,image_coef=-.010,guidance_scale=50,
            # noise_coef=.1,image_coef=-.005,guidance_scale=50,
            # noise_coef=.1*weight, image_coef=-.005*weight, guidance_scale=50,
        )
        preds+=list(pred)

    with torch.no_grad():
        # if not iter_num%20:
        if not iter_num%200:
            im=rp.tiled_images([
                rp.as_numpy_image(learnable_image_w()),
                rp.as_numpy_image(learnable_image_x()),
                rp.as_numpy_image(learnable_image_y()),
                rp.as_numpy_image(learnable_image_z()),
                rp.as_numpy_image(learnable_image_a()),
            ])
            # rp.save_image(im,folder+'/%06i.png'%iter_num)
            rp.display_image(im)
        # if not iter_num%60:
        #     gim=rp.tiled_images(rp.as_numpy_images(s.decode_latents(pred)))
        #     rp.display_image(gim)
        #     rp.save_image(gim,folder+'/G%06i.png'%iter_num)

    optim.step()
    optim.zero_grad()

In [None]:
im=rp.tiled_images([
    rp.as_numpy_image(learnable_image_w()),
    rp.as_numpy_image(learnable_image_x()),
    rp.as_numpy_image(learnable_image_y()),
    rp.as_numpy_image(learnable_image_z()),
    rp.as_numpy_image(learnable_image_a()),
])
# rp.save_image(im,folder+'/%06i.png'%iter_num)
rp.display_image(im)

In [None]:
im=rp.tiled_images([
    *rp.as_numpy_images(s.decode_latents(torch.stack(preds))),
])
# rp.save_image(im,folder+'/%06i.png'%iter_num)
rp.display_image(im)

In [None]:
torch.stack(preds).shape

In [None]:
torch.cuda.empty_cache()