In [None]:
print("Installing CLIP...")
!git clone https://github.com/openai/CLIP                 &> /dev/null
 
print("Installing Python Libraries for AI...")
!git clone https://github.com/CompVis/taming-transformers &> /dev/null
!pip install transformers                                 &> /dev/null
!pip install torch==1.13.0                                &> /dev/null
!pip install torchvision=0.14.0                           &> /dev/null
!pip install torchtext==0.14.0                            &> /dev/null
!pip install torchaudio==0.13.0                           &> /dev/null
# The transformer is a component used in many neural network designs for processing long sequential data, such as natural language text, genome sequences, sound signals or time series data                          
!pip install ftfy regex tqdm omegaconf pytorch-lightning  &> /dev/null
!pip install kornia                                       &> /dev/null
!pip install einops                                       &> /dev/null
!pip install wget                                         &> /dev/null
# !pip install taming-transformers-rom1504
!pip install fastcore -U                                  &> /dev/null

!pip install stegano                                      &> /dev/null
!apt install exempi                                       &> /dev/null
!pip install python-xmp-toolkit                           &> /dev/null
!pip install imgtag                                       &> /dev/null
!pip install pillow==7.1.2                                &> /dev/null 
# The Pillow library contains all the basic image processing functionality. You can do image resizing, rotation and transformation                             

In [None]:
!curl -L -o celebahq.yaml -C - 'https://app.koofr.net/content/links/6dddf083-40c8-470a-9360-a9dab2a94e96/files/get/2021-04-23T18-11-19-project.yaml?path=%2F2021-04-23T18-11-19_celebahq_transformer%2Fconfigs%2F2021-04-23T18-11-19-project.yaml&force' #CelebA-HQ
!curl -L -o celebahq.ckpt -C - 'https://app.koofr.net/content/links/6dddf083-40c8-470a-9360-a9dab2a94e96/files/get/last.ckpt?path=%2F2021-04-23T18-11-19_celebahq_transformer%2Fcheckpoints%2Flast.ckpt&force' #CelebA-HQ


In [None]:
# Loading libraries and definitions
# from pytorch_lightning.utilities.rank_zero import rank_zero_only

#taming>data>utils.py -- delete 'from torch._six' and interchange string_classes with str

import argparse
import math
from pathlib import Path
import sys
 
sys.path.append('./taming-transformers')
from IPython import display
from base64 import b64encode
from omegaconf import OmegaConf
from PIL import Image
from taming.models import cond_transformer, vqgan
import torch
from torch import nn, optim
from torch.nn import functional as F
from torchvision import transforms
from torchvision.transforms import functional as TF
from tqdm.notebook import tqdm
 
from CLIP import clip
import kornia.augmentation as K
import numpy as np
import imageio
from PIL import ImageFile, Image
from imgtag import ImgTag    # metadatos 
import libxmp                # metadatos
from libxmp import *         # metadatos
from libxmp import consts
from stegano import lsb
import json
ImageFile.LOAD_TRUNCATED_IMAGES = True
 
def sinc(x):
    return torch.where(x != 0, torch.sin(math.pi * x) / (math.pi * x), x.new_ones([]))
  
def lanczos(x, a):
    cond = torch.logical_and(-a < x, x < a)
    out = torch.where(cond, sinc(x) * sinc(x/a), x.new_zeros([]))
    return out / out.sum()
 
 
def ramp(ratio, width):
    n = math.ceil(width / ratio + 1)
    out = torch.empty([n])
    cur = 0
    for i in range(out.shape[0]):
        out[i] = cur
        cur += ratio
    return torch.cat([-out[1:].flip([0]), out])[1:-1]
 
 
def resample(input, size, align_corners=True):
    n, c, h, w = input.shape
    dh, dw = size
 
    input = input.view([n * c, 1, h, w])
 
    if dh < h:
        kernel_h = lanczos(ramp(dh / h, 2), 2).to(input.device, input.dtype)
        pad_h = (kernel_h.shape[0] - 1) // 2
        input = F.pad(input, (0, 0, pad_h, pad_h), 'reflect')
        input = F.conv2d(input, kernel_h[None, None, :, None])
 
    if dw < w:
        kernel_w = lanczos(ramp(dw / w, 2), 2).to(input.device, input.dtype)
        pad_w = (kernel_w.shape[0] - 1) // 2
        input = F.pad(input, (pad_w, pad_w, 0, 0), 'reflect')
        input = F.conv2d(input, kernel_w[None, None, None, :])
 
    input = input.view([n, c, h, w])
    return F.interpolate(input, size, mode='bicubic', align_corners=align_corners)
 
 
class ReplaceGrad(torch.autograd.Function):
    @staticmethod
    def forward(ctx, x_forward, x_backward):
        ctx.shape = x_backward.shape
        return x_forward
 
    @staticmethod
    def backward(ctx, grad_in):
        return None, grad_in.sum_to_size(ctx.shape)
 
 
replace_grad = ReplaceGrad.apply
 
 
class ClampWithGrad(torch.autograd.Function):
    @staticmethod
    def forward(ctx, input, min, max):
        ctx.min = min
        ctx.max = max
        ctx.save_for_backward(input)
        return input.clamp(min, max)
 
    @staticmethod
    def backward(ctx, grad_in):
        input, = ctx.saved_tensors
        return grad_in * (grad_in * (input - input.clamp(ctx.min, ctx.max)) >= 0), None, None
 
 
clamp_with_grad = ClampWithGrad.apply
 
 
def vector_quantize(x, codebook):
    d = x.pow(2).sum(dim=-1, keepdim=True) + codebook.pow(2).sum(dim=1) - 2 * x @ codebook.T
    indices = d.argmin(-1)
    x_q = F.one_hot(indices, codebook.shape[0]).to(d.dtype) @ codebook
    return replace_grad(x_q, x)
 
 

# Here we calculate how similar image and text.
class Prompt(nn.Module):
    def __init__(self, embed, weight=1., stop=float('-inf')):
        super().__init__()
        self.register_buffer('embed', embed)
        self.register_buffer('weight', torch.as_tensor(weight))
        self.register_buffer('stop', torch.as_tensor(stop))

#input - batch of image cuttouts 
    def forward(self, input):
        input_normed = F.normalize(input.unsqueeze(1), dim=2)
        embed_normed = F.normalize(self.embed.unsqueeze(0), dim=2)
        dists = input_normed.sub(embed_normed).norm(dim=2).div(2).arcsin().pow(2).mul(2)
        dists = dists * self.weight.sign()
        return self.weight.abs() * replace_grad(dists, torch.maximum(dists, self.stop)).mean()
 
 
def parse_prompt(prompt):
    vals = prompt.rsplit(':', 2)
    vals = vals + ['', '1', '-inf'][len(vals):]
    return vals[0], float(vals[1]), float(vals[2])
 

class MakeCutouts(nn.Module):
    def __init__(self, cut_size, cutn, cut_pow=1.):
        super().__init__()
        self.cut_size = cut_size
        self.cutn = cutn
        self.cut_pow = cut_pow
        self.augs = nn.Sequential(
            K.RandomHorizontalFlip(p=0.5),
            # K.RandomSolarize(0.01, 0.01, p=0.7),
            K.RandomSharpness(0.3,p=0.4),
            K.RandomAffine(degrees=30, translate=0.1, p=0.8, padding_mode='border'),
            K.RandomPerspective(0.2,p=0.4),
            K.ColorJitter(hue=0.01, saturation=0.01, p=0.7))
        self.noise_fac = 0.1
 
 
    def forward(self, input):
        sideY, sideX = input.shape[2:4]
        max_size = min(sideX, sideY)
        min_size = min(sideX, sideY, self.cut_size)
        cutouts = []
        for _ in range(self.cutn):
            size = int(torch.rand([])**self.cut_pow * (max_size - min_size) + min_size)
            offsetx = torch.randint(0, sideX - size + 1, ())
            offsety = torch.randint(0, sideY - size + 1, ())
            cutout = input[:, :, offsety:offsety + size, offsetx:offsetx + size]
            cutouts.append(resample(cutout, (self.cut_size, self.cut_size)))
        batch = self.augs(torch.cat(cutouts, dim=0))
        if self.noise_fac:
            facs = batch.new_empty([self.cutn, 1, 1, 1]).uniform_(0, self.noise_fac)
            batch = batch + facs * torch.randn_like(batch)
        return batch
 
 
def load_vqgan_model(config_path, checkpoint_path):
    config = OmegaConf.load(config_path)
    if config.model.target == 'taming.models.cond_transformer.Net2NetTransformer':
        parent_model = cond_transformer.Net2NetTransformer(**config.model.params)
        parent_model.eval().requires_grad_(False)
        parent_model.init_from_ckpt(checkpoint_path)
        model = parent_model.first_stage_model

    else:
        raise ValueError(f'unknown model type: {config.model.target}')
    del model.loss
    return model
 
 
def resize_image(image, out_size):
    ratio = image.size[0] / image.size[1]
    area = min(image.size[0] * image.size[1], out_size[0] * out_size[1])
    size = round((area * ratio)**0.5), round((area / ratio)**0.5)
    return image.resize(size, Image.LANCZOS)

def download_img(img_url):
    try:
        return wget.download(img_url,out="input.jpg")
    except:
        return


In [None]:
import base64
lossArray = []
def startProcessing(text):
  texts=[text]
  target_images = []
  is_gumbel = False
  name_model ="CelebA-HQ"
  input_images=False

  max_iterations=10000
 

# Parameters
  args = argparse.Namespace(
      prompts=texts,
      image_prompts=target_images,
      noise_prompt_seeds=[],
      noise_prompt_weights=[],
      size=[300, 300],
      init_image=None,
      init_weight=0.,
      clip_model='ViT-B/32',   #ViT-L/14
      vqgan_config='celebahq.yaml',
      vqgan_checkpoint='celebahq.ckpt',
      step_size=0.1,
      cutn=64,
      cut_pow=1.,
      display_freq=1,
      seed=-1,
  )
  # Start Processing 

  device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
  print('Using device:', device)
  if texts:
      print('Using texts:', texts)
  if target_images:
      print('Using image prompts:', target_images)
  if args.seed is None:
      seed = torch.seed()
  else:
      seed = args.seed
  torch.manual_seed(seed)
  print('Using seed:', seed)

  model = load_vqgan_model(args.vqgan_config, args.vqgan_checkpoint).to(device)
  perceptor = clip.load(args.clip_model, jit=False)[0].eval().requires_grad_(False).to(device)

  cut_size = perceptor.visual.input_resolution
  if is_gumbel:
      e_dim = model.quantize.embedding_dim
  else:
      e_dim = model.quantize.e_dim

  f = 2**(model.decoder.num_resolutions - 1)
  make_cutouts = MakeCutouts(cut_size, args.cutn, cut_pow=args.cut_pow)
  if is_gumbel:
      n_toks = model.quantize.n_embed
  else:
      n_toks = model.quantize.n_e

  toksX, toksY = args.size[0] // f, args.size[1] // f
  sideX, sideY = toksX * f, toksY * f
  if is_gumbel:
      z_min = model.quantize.embed.weight.min(dim=0).values[None, :, None, None]
      z_max = model.quantize.embed.weight.max(dim=0).values[None, :, None, None]
  else:
      z_min = model.quantize.embedding.weight.min(dim=0).values[None, :, None, None]
      z_max = model.quantize.embedding.weight.max(dim=0).values[None, :, None, None]
# z : initial VQGAN-encoded image vector
  if args.init_image:
      pil_image = Image.open(args.init_image).convert('RGB')
      pil_image = pil_image.resize((sideX, sideY), Image.LANCZOS)
      z, *_ = model.encode(TF.to_tensor(pil_image).to(device).unsqueeze(0) * 2 - 1)
  else:
      one_hot = F.one_hot(torch.randint(n_toks, [toksY * toksX], device=device), n_toks).float()
      if is_gumbel:
          z = one_hot @ model.quantize.embed.weight
      else:
          z = one_hot @ model.quantize.embedding.weight
      z = z.view([-1, toksY, toksX, e_dim]).permute(0, 3, 1, 2)
  z_orig = z.clone()
  z.requires_grad_(True)
  opt = optim.Adam([z], lr=args.step_size)

  normalize = transforms.Normalize(mean=[0.48145466, 0.4578275, 0.40821073],
                                  std=[0.26862954, 0.26130258, 0.27577711])

  pMs = []

  for prompt in args.prompts:
      txt, weight, stop = parse_prompt(prompt)
      embed = perceptor.encode_text(clip.tokenize(txt).to(device)).float()
      tempans = Prompt(embed, weight, stop)
      pMs.append(tempans.to(device))

  for prompt in args.image_prompts:
      path, weight, stop = parse_prompt(prompt)
      img = resize_image(Image.open(path).convert('RGB'), (sideX, sideY))
      batch = make_cutouts(TF.to_tensor(img).unsqueeze(0).to(device))
      embed = perceptor.encode_image(normalize(batch)).float()
      pMs.append(Prompt(embed, weight, stop).to(device))

  for seed, weight in zip(args.noise_prompt_seeds, args.noise_prompt_weights):
      gen = torch.Generator().manual_seed(seed)
      embed = torch.empty([1, perceptor.visual.output_dim]).normal_(generator=gen)
      pMs.append(Prompt(embed, weight).to(device))


  def synth(z):
      if is_gumbel:
          z_q = vector_quantize(z.movedim(1, 3), model.quantize.embed.weight).movedim(3, 1)
      else:
          z_q = vector_quantize(z.movedim(1, 3), model.quantize.embedding.weight).movedim(3, 1)
      
      return clamp_with_grad(model.decode(z_q).add(1).div(2), 0, 1)

  def add_stegano_data(filename):
      data = {
          "title": " | ".join(args.prompts) if args.prompts else None,
          "notebook": "VQGAN+CLIP",
          "i": i,
          "model": name_model,
          "seed": str(seed),
          "input_images": input_images
      }
      lsb.hide(filename, json.dumps(data)).save(filename)

  @torch.no_grad()
  def checkin(i, losses):
      losses_str = ', '.join(f'{loss.item():g}' for loss in losses)
      lossArray.append(sum(losses).item())
      tqdm.write(f'i: {i}, loss: {sum(losses).item():g}, losses: {losses_str}')
      out = synth(z)
      TF.to_pil_image(out[0].cpu()).save('progress.png')
      add_stegano_data('progress.png')
      # display.display(display.Image('progress.png'))

# return array of losses per prompt
  def ascend_txt(i):
      out = synth(z) # image synthesizing(generation)
      iii = perceptor.encode_image(normalize(make_cutouts(out))).float() # creating cutouts and encodeding them with CLIP

      result = []

      if args.init_weight:
          result.append(F.mse_loss(z, z_orig) * args.init_weight / 2)
      for prompt in pMs:
        result.append(prompt(iii))
      img = np.array(out.mul(255).clamp(0, 255)[0].cpu().detach().numpy().astype(np.uint8))[:,:,:]
      img = np.transpose(img, (1, 2, 0))
      filename = f"steps/{i:04}.png"
      imageio.imwrite(filename, np.array(img))
      add_stegano_data(filename)
      return result

  def train(i):
      opt.zero_grad()  #restarts looping without losses from the last step by making the gradient zero at each iteration.
      lossAll = ascend_txt(i) # calculate the loss
      if i % args.display_freq == 0:
          checkin(i, lossAll)
      loss = sum(lossAll) # we get loss(array of tensors) of each prompt so we can sum them and use backward once
      loss.backward()
      opt.step() # updating the image vector z.
      with torch.no_grad():
          z.copy_(z.maximum(z_min).minimum(z_max))

  i=0
  while True:
    # print('Before training', i)
    train(i)
    # print('After training', i)
    if i == max_iterations:
        break
    i += 1
  # return getImagesOutput(count)
  # i=0


In [None]:
!mkdir steps

In [None]:
import shutil
shutil.rmtree('steps', ignore_errors=True)

In [None]:
def getImagesOutput(count):
    max_iterations=100
    print("Getting Imges\n")
    ind=0;
    images=[]
    while(count):
      filename = f"/content/steps/{(max_iterations-count):04}.png"
      with open(filename, "rb") as img_file:
        images.append(base64.b64encode(img_file.read()))
      count=count-1
    print(len(images))
    return images

The woman has oval face and high cheekbones. She has straight hair which is brown in color. She has arched eyebrows. The smiling young attractive woman has heavy makeup. She is wearing earrings and lipstick.



The woman has oval face and high cheekbones. She is young women and dark skin color. She has blonde straight hair. She is smiling and wear heavy makeup.

The man has round flat and high cheekbones. He is a young man and has pale skin color. He has black hairs and smaller eyes.

round cheeks, small chins

An average Indian boy, 18 years old, has a round face with dark hair and brown eyes. He has a straight nose and a light to medium skin tone with minimal facial hair

A young boy with White Skin, curly blonde hair, bright blue eyes, and a narrow, pointed chin.



-------------------------------------------------------------

Male, Female.

Arched eyebrows, Bushy eyebrows, Normal eyebrows,

Narrow eyes, Normal eyes,

Big nose, Pointy nose, Normal nose,

Big lips, Normal lips,

Black hair, Blond hair, Brown hair, Gray hair,

Straight hair, Wavy hair, Receding hairline, Bald,

Mustache, No mustache,

Five-o-clock shadow, Goatee, Sideburns, No beard,

Fair, No fair,

Bags under eyes, No bags under eyes,

Bangs, No bangs,

Chubby, No chubby,

Double chin, No double chin,

High cheekbones, No high cheekbones,

Rosy cheeks, No rosy cheeks,

Oval face, No oval face,

Pale skin, Normal skin,

Wearing earrings, No wearing earrings,

Wearing lipstick, No wearing lipstick,

Wearing eye glass, No wearing eye glassâ€™,

Heavy makeup, No heavy makeup, 

Young, Old.



#**Before running the cell below create a steps folder and check whether the lossArray is empty or not.**

In [None]:
# json_data = request.get_json(force=True)
text_prompt =  "An elderly man with a thin, wispy mustache, a serene expression, and deep wrinkles around the eyes."
startProcessing(text_prompt)

In [None]:
import matplotlib.pyplot as plt

In [None]:
ys = []
no = 0
li = 25
fig, ax = plt.subplots(figsize=(12,8))
for i in lossArray:
  if no % li == 0:
    ys.append(i)
  no+=1
print(ys)
xs = []
no = 0
while len(xs) < len(ys):
  xs.append(no)
  no += li
plt.xticks(range(0, len(lossArray), li))
plt.yticks([])
plt.ylabel('LOSS')
plt.xlabel('ITERATIONS')
plt.plot(xs, ys)
for index in range(len(xs)):
  ax.text(xs[index], ys[index], round(ys[index], 3))
plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

import os
count = 0
images1 = []
imgno = 0
for filename in os.listdir("steps/"):
  if imgno % 10 == 0:
    img_path = f"/content/steps/{(imgno):04}.png"
    images1.append(mpimg.imread(img_path)) #read an image from file into array. Returns the image data
  imgno+=1
print(len(images1))

plt.figure(figsize=(40,30))
columns = 11
for i, image in enumerate(images1):
    plt.subplot(int(len(images1) / columns + 4), columns, i + 1)
    plt.axis('off')
    plt.imshow(image)

Store the images into google drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!cp -r /content/steps/ /content/drive/MyDrive/GANRES/

In [None]:
!pip install pipreqs

In [None]:
!pipreqs . --force