#Setup
Folgende Befehle müssen vor dem Verwenden ausgeführt werden. Die aktuelle Implementation verwendet ein lokales Google Drive, weshalb Dateipfade vor dem verwenden angepasst werden müssen.

Check GPU, CPU and CUDA

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"
# Check GPU and CUDA
!nvidia-smi
!nvcc --version

Mount your google drive containing all files you want to use later on. Allows easy load and saving Operations.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Install all needed requirements and adjust system path

In [None]:
!pip install einops ninja gdown
import sys
sys.path.append('/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/stylegan3')

##Imports
Alle verwendeten Imports

In [None]:
import os
from skimage import io as ios

import io
import re
from typing import List, Optional, Tuple, Union

import click
import dnnlib
import numpy as np
import PIL.Image
import torch
import torchvision
import legacy
import time
from typing import BinaryIO, Union, IO
from training.networks_stylegan2 import Generator
from matplotlib import pyplot as plt
import matplotlib
import cv2
import IPython.display
import dlib
import glob

import requests
import html
import hashlib
import PIL.ImageFile
import scipy.ndimage
import threading
import queue
import time
import json
import uuid
import argparse
import itertools
import shutil
from collections import OrderedDict, defaultdict
import collections

from pickle import NONE

import math
import torch.nn.functional as F

import pickle


#TSNE Imports
from sklearn.manifold import TSNE
from keras.datasets import mnist
from sklearn.datasets import load_iris
from numpy import reshape
import seaborn as sns
import pandas as pd  
import random
import pylab
from mpl_toolkits.mplot3d import Axes3D
from matplotlib.colors import ListedColormap

## Utility functions

Helferfunktionen

In [None]:
DEVICE = 'cuda'



def generate_z_from_seed(seed: int, num_samples: int, truncation_psi: float, device: torch.device, dim: int) -> torch.Tensor:
    '''Generate a latent vector Z using the given random seed. Default truncation_psi = 0.7'''
    torch.manual_seed(seed)
    z = torch.randn(num_samples, dim, device=device).to(device)
    z = z * truncation_psi
    return z

def open_img_tensor(img_tensor_path: Union[str, os.PathLike, BinaryIO, IO[bytes]]) -> torch.Tensor:
    '''Loads the image at img_tensor_path and returns the corresponding torch Tensor'''
    img = np.asarray(PIL.Image.open(img_tensor_path))


    img_tensor = torch.Tensor(img)
    img_tensor = img_tensor.view(1, 256, 256, 3).to(DEVICE)
    img_tensor = (img_tensor - 128) / 127.5
    img_tensor = img_tensor.permute(0, 3, 1, 2)

    return img_tensor


def plot_generator_img(img_tensor: torch.Tensor, title: str) -> None:
    ''' Visualize a single image tensors'''
    img = (img_tensor.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
    img = img[0].cpu().numpy()

    plt.title(title)
    plt.axis('off')
    plt.imshow(img)
    plt.show()

def plot_multiple_generator_img(img_tensors: torch.Tensor, title: str, start_seed, count, col) -> None:
    ''' Visualize multiple image tensors'''
    img = (img_tensors.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
    img = img.cpu().numpy()


    col = col
    row = img.shape[0] // col + 1
    fig = plt.figure(figsize=(40,40))

    ax = []

    for i in range(col*row):
      if (i < count):
        cur_img = img[i]

      else:
        cur_img = np.zeros((256,256))

      ax.append( fig.add_subplot(row, col, i+1) )
      ax[-1].set_title(f'Seed {start_seed+i}')  # set title
      plt.imshow(cur_img)

    plt.title(title)
    plt.show()



def resize_img(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]], size:int) -> None:
    '''Loads the Image at img_path, resizes it to sizexsize and save it at img_path'''
    img = PIL.Image.open(img_path)
    img = img.resize((size, size))
    img.save(img_path)

def gen_rand_z(gen: Generator) -> torch.Tensor:
    '''Generates a random latent Code z with fitting size'''
    return torch.from_numpy(np.random.randn(1, gen.z_dim)).to(DEVICE)

def save_generator_img(img_tensor: torch.Tensor, path: Union[str, os.PathLike, BinaryIO, IO[bytes]]) -> None:
    '''Save an image generated by the stylegan generator'''
    img = (img_tensor.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)
    img = img[0].cpu().numpy()

    PIL.Image.fromarray(img, 'RGB').save(path)

def init_generator(PKL):
  '''Initialize the generator for given network pickel '''
  #print('Loading networks from "%s"...' % NETWORK_PKL)
  device = torch.device('cuda')
  with dnnlib.util.open_url(NETWORK_PKL) as f:
    G = legacy.load_network_pkl(f)['G_ema'].to(device)  # type: ignore
  return G


def load_tensor(path):
  '''Loads the torch tensor at given path '''
  w = torch.load(path)
  return(w)

def save_tensor(tensor, path):
  '''Saves the given torch tensor to given path '''
  torch.save(tensor, path)

def imshow(images, col, viz_size=256):
  """Plots Images in one Figure """
  #Get Number and shapes

  num, height, width, channels = images.shape
  assert num % col == 0
  row = num // col
  

  fused_image = np.zeros((viz_size * row, viz_size * col, channels), dtype=np.uint8)

  for idx, image in enumerate(images):
    i, j = divmod(idx, col)
    y = i * viz_size
    x = j * viz_size
    if height != viz_size or width != viz_size:
      image = cv2.resize(image, (viz_size, viz_size))
    fused_image[y:y + viz_size, x:x + viz_size] = image

  fused_image = np.asarray(fused_image, dtype=np.uint8)
  data = io.BytesIO()
  PIL.Image.fromarray(fused_image).save(data, 'jpeg')
  im_data = data.getvalue()
  disp = IPython.display.display(IPython.display.Image(im_data))
  return disp




def prepare_image(img_path, dim):
  '''Prepares an 256x256 image to for projection'''

  target_images = open_img_tensor(img_path).to('cpu')

  target_images = np.asarray(target_images, dtype='float32')

  
  #target_images = (target_images + 1) * (255 / 2)
  plot_generator_img(torch.from_numpy(target_images), "OG")
 

  sh = target_images.shape
  print(sh)
  if sh[2] > dim:
      factor = sh[2] // dim
      target_images = np.reshape(target_images, [-1, sh[1], sh[2] // factor, factor, sh[3] // factor, factor]).mean((3, 5))
  plot_generator_img(torch.from_numpy(target_images), "reshaped")

  target_images = torch.from_numpy(target_images).to('cuda')

  return(target_images)

## VGG Percptual loss


In [None]:
class VGGPerceptualLoss(torch.nn.Module):
    '''Pretrained VGG-Model for VGG Loss, source https://gist.github.com/alper111/8233cdb0414b4cb5853f2f730ab95a49'''
    def __init__(self, resize=True):
        super(VGGPerceptualLoss, self).__init__()
        blocks = []
        blocks.append(torchvision.models.vgg16(pretrained=True).features[:4].eval())
        blocks.append(torchvision.models.vgg16(pretrained=True).features[4:9].eval())
        blocks.append(torchvision.models.vgg16(pretrained=True).features[9:16].eval())
        blocks.append(torchvision.models.vgg16(pretrained=True).features[16:23].eval())
        for bl in blocks:
            for p in bl.parameters():
                p.requires_grad = False
        self.blocks = torch.nn.ModuleList(blocks)
        self.transform = torch.nn.functional.interpolate
        self.resize = resize
        self.register_buffer("mean", torch.tensor([0.485, 0.456, 0.406]).view(1, 3, 1, 1))
        self.register_buffer("std", torch.tensor([0.229, 0.224, 0.225]).view(1, 3, 1, 1))

    def forward(self, input, target, feature_layers=[0, 1, 2, 3], style_layers=[]):

        if input.shape[1] != 3:
            input = input.repeat(1, 3, 1, 1)
            target = target.repeat(1, 3, 1, 1)
        input = (input - self.mean) / self.std
        target = (target - self.mean) / self.std
        if self.resize:
            input = self.transform(input, mode='bilinear', size=(224, 224), align_corners=False)
            target = self.transform(target, mode='bilinear', size=(224, 224), align_corners=False)
        loss = 0.0
        x = input
        y = target
        for i, block in enumerate(self.blocks):
            x = block(x)
            y = block(y)
            if i in feature_layers:
                loss += torch.nn.functional.l1_loss(x, y)
            if i in style_layers:
                act_x = x.reshape(x.shape[0], x.shape[1], -1)
                act_y = y.reshape(y.shape[0], y.shape[1], -1)
                gram_x = act_x @ act_x.permute(0, 2, 1)
                gram_y = act_y @ act_y.permute(0, 2, 1)
                loss += torch.nn.functional.l1_loss(gram_x, gram_y)
        return loss


# Morphing
Our first application is morphing between to randomly generated images. We will morph in z-space and w-space and compare the paths taken by linear morphing.

##Morphing in z-space
We start by generating two z-Tensors and morphing them in z-space and evaluating to effect on the generated image.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/ausarbeitung/gfx/morphing/z-space"
DEVICE = 'cuda'
def morph(zs, steps):
  '''Computes the latent Code for every step in the interpolation'''
    morphed_zs = []
    for i in range(len(zs)-1):
        for index in range(steps):
            t = index/float(steps)
            morphed_zs.append(zs[i+1]*t + zs[i]*(1-t))
    return morphed_zs

def z_morph(seed1, seed2, steps):
  '''linear morph between the latent code given by seed1 and the latenten code given by seed2 in number of steps steps.'''
  truncation_psi = 0.7
  noise_mode = 'const'

  #Init generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  #get latent codes
  z1 = np.random.RandomState(seed1).randn(1, G.z_dim)  
  z2 = np.random.RandomState(seed2).randn(1, G.z_dim)


  #get latente codes for every interpolation step
  morphed_zs = morph([z1,z2], steps)
  

  #initliaize loss function and target image 
  loss = torch.nn.MSELoss()
  target = G(torch.from_numpy(z2).to(DEVICE), label, truncation_psi=truncation_psi, noise_mode=noise_mode)


  error = []
  
  #Loop through every code, compute loss to target image and save the generated image and corresponding tensor.
  for idx, z in enumerate(morphed_zs):
    z = torch.from_numpy(z).to(DEVICE)
    img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    error.append(loss(target, img).item())


    if(idx % 10 == 0):
      plot_generator_img(img, f"image{idx}")
    save_generator_img(img,  f'{PATH}/frame-{idx:04d}.png')
    save_tensor(z, PATH + f"/tensor{idx}.pt")

  #Plot loss
  matplotlib.rcParams.update(matplotlib.rcParamsDefault)
  plt.plot(error)
  plt.xlabel("Interpolations Schritt")
  plt.ylabel("MSE-Loss")
  plt.title("MSE-Loss im Z-Raum")
  plt.show()

  print(error)

#Function call
z_morph(337, 338, 50)






Covert frames into a video

In [None]:
!ffmpeg -i /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/morphing/z-space/frame-%04d.png -r 24 -vcodec libx264 -pix_fmt yuv420p /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/morphing/z-space/morph.mp4

## Morphing in w-space
We start by generating two z-Tensors of our start and end images. We map the tensors to the belonging w-tensors and morph them in w-space and evaluating to effect on the generated image.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/ausarbeitung/gfx/morphing/w-space"
DEVICE = 'cuda'

def morph(zs, steps):
    morphed_zs = []
    for i in range(len(zs)-1):
        for index in range(steps):
            t = index/float(steps)
            morphed_zs.append(zs[i+1]*t + zs[i]*(1-t))
    return morphed_zs

def w_morph(seed1, seed2, steps):
  '''Same as z_morph but map latente codes z to their intermediate latent codes w and perform the interpolation in W Space'''
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  #generate z codes and map them to their corresponding w codes
  z1 = torch.from_numpy(np.random.RandomState(seed1).randn(1, G.z_dim)).to(DEVICE)  
  w1 = G.mapping(z1, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  z2 = torch.from_numpy(np.random.RandomState(seed2).randn(1, G.z_dim)).to(DEVICE)
  w2 = G.mapping(z2, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  loss = torch.nn.MSELoss()
  target = G.synthesis(w2, noise_mode=noise_mode, force_fp32=False)
  error = []

  morphed_ws = morph([w1,w2], steps)

  for idx, w in enumerate(morphed_ws):

    print(w.shape)
    img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    print(img.shape)
    error.append(loss(target, img).item())

    if(idx % 10 == 0):
      plot_generator_img(img, f"image{idx:04d}")
    save_generator_img(img, f'{PATH}/frame-{idx:04d}.png')
    save_tensor(w, PATH + f"/tensor{idx:04d}.pt")

  plt.plot(error)
  plt.xlabel("Interpolations Schritt")
  plt.ylabel("MSE-Loss")
  plt.title("MSE-Loss im W-Raum")
  plt.show()



w_morph(337, 338, 50)



Convert Pictures into a video

In [None]:
!ffmpeg -i /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/morphing/w-space/frame-%04d.png -r 24 -vcodec libx264 -pix_fmt yuv420p /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/morphing/w-space/morph.mp4

In [None]:
!ffmpeg -i /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/presentation/morphs/morph2/frame-%04d.png -r 24 -vcodec libx264 -pix_fmt yuv420p /content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/presentation/morphs/morph2/morph.mp4

##Boilerplate
Only used for graphics, otherwise no real usage

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/ausarbeitung/gfx/morphing/z-space"
DEVICE = 'cuda'

def morph(zs, steps):
    morphed_zs = []
    for i in range(len(zs)-1):
        for index in range(steps):
            t = index/float(steps)
            morphed_zs.append(zs[i+1]*t + zs[i]*(1-t))
    return morphed_zs

def z_morph(seed1, seed2, steps):
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z1 = np.random.RandomState(seed1).randn(1, G.z_dim)  
  z2 = np.random.RandomState(seed2).randn(1, G.z_dim)

  w1 = G.mapping(torch.from_numpy(z1).to(DEVICE) , label, truncation_psi=truncation_psi, truncation_cutoff=8)
  w2 = G.mapping(torch.from_numpy(z2).to(DEVICE) , label, truncation_psi=truncation_psi, truncation_cutoff=8)


  morphed_zs = morph([z1,z2], steps)
  morphed_ws = morph([w1,w2], steps)
  

  loss = torch.nn.MSELoss()
  target = G(torch.from_numpy(z2).to(DEVICE), label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  error_z = []
  error_w = []

  for idx, z in enumerate(morphed_zs):
    z = torch.from_numpy(z).to(DEVICE)
    img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    error_z.append(loss(target, img).item())

  for idx, w in enumerate(morphed_ws):
    img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    error_w.append(loss(target, img).item())


  figure, axis = plt.subplots(1, 2)
  axis[0].plot(error_z)
  axis[0].set_xlabel("Interpolations Schritt")
  axis[0].set_ylabel("MSE-Loss")
  axis[0].set_title("MSE-Loss in Z-Raum")

  axis[1].plot(error_w)
  axis[1].set_xlabel("Interpolations Schritt")
  axis[1].set_ylabel("MSE-Loss")
  axis[1].set_title("MSE-Loss in W-Raum")

  plt.show()


z_morph(335, 336, 50)






# Latent Arithmetics

## Distance calculations in stylegans latent spaces

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/morphing/w-space"
DEVICE = 'cuda'


def morph(zs, steps):
  morphed_zs = []
  for i in range(len(zs)-1):
      for index in range(steps):
          t = index/float(steps)
          morphed_zs.append(zs[i+1]*t + zs[i]*(1-t))
  return morphed_zs

def z_distance(seed1, seed2, steps):
  '''Distances between Images when moving in z latent space'''

  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z1 = np.random.RandomState(seed1).randn(1, G.z_dim)  

  z2 = np.random.RandomState(seed2).randn(1, G.z_dim)

  morphed_zs = morph([z1,z2], steps)


  loss = torch.nn.MSELoss()
  start = G(torch.from_numpy(z1).to(DEVICE), label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  error = []
  distance = []
  
  z1_t = torch.from_numpy(z1).to(DEVICE)

  for z in morphed_zs:
    z = torch.from_numpy(z).to(DEVICE)
    residual = z1_t - z
    length = torch.linalg.norm(residual)
    img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    error.append(loss(start, img).item())
    distance.append(length.item())

  plt.plot(distance, error)
  plt.xlabel("distance")
  plt.ylabel("MSE error")
  plt.title("MSE error for different distances")
  plt.show()

def w_distance(seed1, seed2, steps):
  '''Distances between Images when moving in w latent space'''
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z1 = torch.from_numpy(np.random.RandomState(seed1).randn(1, G.z_dim)).to(DEVICE)  
  w1 = G.mapping(z1, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  z2 = torch.from_numpy(np.random.RandomState(seed2).randn(1, G.z_dim)).to(DEVICE)
  w2 = G.mapping(z2, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  loss = torch.nn.MSELoss()
  start = G.synthesis(w1, noise_mode=noise_mode, force_fp32=False)
  error = []
  distance = []

  morphed_ws = morph([w1,w2], steps)

  for w in morphed_ws:
    residual = w1 - w
    length = torch.linalg.norm(residual)
    img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    error.append(loss(start, img).item())
    distance.append(length.item())

  plt.plot(distance, error)
  plt.xlabel("distance")
  plt.ylabel("MSE error")
  plt.title("MSE error for different distances")
  plt.show()



In [None]:
z_distance(200,201,100)
w_distance(200,201,100)

##Finding desired Features 

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def display_seeds(start_seed, count):
  '''Displays the images corresponding to the latent codes generated by seeds start_seed until seed start_seed+count.'''

  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  zs = [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)  for seed in range(start_seed, start_seed + count)]

  zs = torch.cat([z for z in zs]).to(DEVICE)

  img_tensor = G(zs, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  plot_multiple_generator_img(img_tensor, f'Seeds {start_seed} to {start_seed+count}', start_seed, count, 6 )


In [None]:
display_seeds(140, 70)


#Hand classified Arrays for certain attributes.
men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]



child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
child2 = [686,689,691,693,696,704,709,719,724,737]
teen = [205,225,245,302,303,313,336,349,344,356]
adult = [200,201,202,204,206,207,208,227,228,232,238,240,241,300,402,404,403,405,406,407,410,412,413,414,416,417,418,419,420,421,423,427,428,429,439,431,432,433,434,345]
adult2 = [547,548,549,553,554,557,560,561,563,564,540,550,551,555,556,559,568,614,612,623]
old = [218,222,224,233,236,249,261,265,309,311,316,334,367,368,411,426,474,540,574,589,638,645,667,655,673,707,717,747,727]

glasses_men =[206, 311, 334, 336, 339, 348, 352, 355, 356, 373, 374, 376, 775, 758,776,780,788,790,796,821,831,840,843,846,878,891,915, 916,917,944, 962, 965, 970, 975, 988,990,993,1006,517]
glasses_women =[255, 295, 291, 286, 309, 347, 384,683,770,798,851,874,900,935,977,1001,513,195]
glasses = glasses_men + glasses_women
smiling_men = [256,259, 273, 275, 300, 316, 320, 324]
smiling_women = [210, 214, 260, 272, 274, 276, 302, 309, 313, 338, 347,346, 350]
neutral_men = [218,257,258, 261, 264, 265, 307, 318, 319]
neutral_women = [211, 310, 312, 334, 351, 359]
smiling_child = [209, 216, 366]
neutral_child = [322,325,328, 341, 345, 356, 357]
beard = [218, 308, 318, 373,970,1013,16,123,181]
lipstick = [222, 225, 303, 305, 310, 763,888,907,1017,1029,547,198,192]
darker_skin_women = [272, 290, 384, 504,121,128]
darker_skin_men = [229, 271, 348, 349, 375, 399, 386,804,807,824,860,80,82]


###Geschlecht
Visualization of latenten Codes for different genders in w space

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

men = men + men2
women = women + women2


child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
child2 = [686,689,691,693,696,704,709,719,724,737,]
teen = [205,225,245,302,303,313,336,349,344,356]
adult = [200,201,202,204,206,207,208,227,228,232,238,240,241,300,402,404,403,405,406,407,410,412,413,414,416,417,418,419,420,421,423,427,428,429,439,431,432,433,434,345]
adult2 = [547,548,549,553,554,557,560,561,563,564,540,550,551,555,556,559,568,614,612,623]
old = [218,222,224,233,236,249,261,265,309,311,316,334,367,368,411,426,474,540,574,589,638,645,667,655,673,707,717,747,727]

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 300

#Initalize latent codes for hand classified images
w_men = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in men])
w_women = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in women])


w_men = reshape(w_men, [w_men.shape[0], w_men.shape[1]*w_men.shape[2]*w_men.shape[3]])
w_women = reshape(w_women, [w_women.shape[0], w_women.shape[1]*w_women.shape[2]*w_women.shape[3]])


#initalize t-SNE, labels and colors
color1 = ["Männer" for  i in range(len(men))]
color2 = ["Frauen" for  i in range(len(women))]

color =  color1 + color2
ws = np.concatenate((w_men, w_women))
imgs = np.concatenate((img_men, img_women))


tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=40)
embed = tsne.fit_transform(ws)


df = pd.DataFrame()
df["y"] = color
df["comp-1"] = embed[:,0]
df["comp-2"] = embed[:,1]




# hue=df.y.tolist()
sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df).set(title="Geschlecht W Raum") 



###Geschlecht Z-Space
Visualization of latenten Codes for different genders in z space

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

men = men + men2
women = women + women2


child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
child2 = [686,689,691,693,696,704,709,719,724,737,]
teen = [205,225,245,302,303,313,336,349,344,356]
adult = [200,201,202,204,206,207,208,227,228,232,238,240,241,300,402,404,403,405,406,407,410,412,413,414,416,417,418,419,420,421,423,427,428,429,439,431,432,433,434,345]
adult2 = [547,548,549,553,554,557,560,561,563,564,540,550,551,555,556,559,568,614,612,623]
old = [218,222,224,233,236,249,261,265,309,311,316,334,367,368,411,426,474,540,574,589,638,645,667,655,673,707,717,747,727]

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 300

#generate z codes of hand classified images
z_men = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in men])
z_women = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in women])




z_men = reshape(z_men, [z_men.shape[0], z_men.shape[1]*z_men.shape[2]])
z_women = reshape(z_women, [z_women.shape[0], z_women.shape[1]*z_women.shape[2]])


#initalize t-SNE, labels and colors
color1 = ["Männer" for  i in range(len(men))]
color2 = ["Frauen" for  i in range(len(women))]

color =  color1 + color2
zs = np.concatenate((z_men, z_women))

tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=30)
embed = tsne.fit_transform(zs)

#use pandas and seaborn for plotting
df = pd.DataFrame()
df["y"] = color
df["comp-1"] = embed[:,0]
df["comp-2"] = embed[:,1]


# hue=df.y.tolist()
sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df).set(title="Geschlecht Z Raum") 

###Alter
Visualization of latenten Codes for different ages in w space

In [None]:
##Visualisation

NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

men = men + men2
women = women + women2


child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
child2 = [686,689,691,693,696,704,709,719,724,737,]
teen = [205,225,245,302,303,313,336,349,344,356]
adult = [200,201,202,204,206,207,208,227,228,232,238,240,241,300,402,404,403,405,406,407,410,412,413,414,416,417,418,419,420,421,423,427,428,429,439,431,432,433,434,345]
adult2 = [547,548,549,553,554,557,560,561,563,564,540,550,551,555,556,559,568,614,612,623]
old = [218,222,224,233,236,249,261,265,309,311,316,334,367,368,411,426,474,540,574,589,638,645,667,655,673,707,717,747,727]

#These latente Codes work best to show the seperation in w space
child = child + child2
adult = old 
G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 300

w_child = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in child])
w_teen = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in teen])
w_adult = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in adult])
w_old = np.array([G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for seed in old])



w_child = reshape(w_child, [w_child.shape[0], w_child.shape[1]*w_child.shape[2]*w_child.shape[3]])
w_teen = reshape(w_teen, [w_teen.shape[0], w_teen.shape[1]*w_teen.shape[2]*w_teen.shape[3]])
w_adult = reshape(w_adult, [w_adult.shape[0], w_adult.shape[1]*w_adult.shape[2]*w_adult.shape[3]])
w_old = reshape(w_old, [w_old.shape[0], w_old.shape[1]*w_old.shape[2]*w_old.shape[3]])





#z_men = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in men])
#w_men = G.mapping(z_men, label, truncation_psi=truncation_psi)
#z_women = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in women])

color1 = ["Kinder" for  i in range(len(child))]
#color2 = ["Teen" for  i in range(len(teen))]
color3 = ["Erwachsene" for  i in range(len(adult))]
#color4 = ["Old" for  i in range(len(old))]

#color =  color1 + color2 + color3 + color4
color =  color1 + color3 
#ws = np.concatenate((w_child, w_teen,w_adult,w_old))
ws = np.concatenate((w_child,w_adult))

#z_list = [gen_rand_z(G).cpu().numpy().astype(np.float32) for i in range(size)]
#np_zs = np.array(z_list)[:,0,:]
print(ws.shape)

tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=49)
embed = tsne.fit_transform(ws)

#array = ["Male", "Female"]

#color = np.random.randint(0,2,size)
#color = random.choices(array, k=size)

df = pd.DataFrame()
df["y"] = color
df["comp-1"] = embed[:,0]
df["comp-2"] = embed[:,1]


# hue=df.y.tolist()
sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df).set(title="Alter W Raum") 


### Alter Z Spcae
Visualization of latenten Codes for different ages in z space

In [None]:


NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

men = men + men2
women = women + women2


child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
child2 = [686,689,691,693,696,704,709,719,724,737,]
teen = [205,225,245,302,303,313,336,349,344,356]
adult = [200,201,202,204,206,207,208,227,228,232,238,240,241,300,402,404,403,405,406,407,410,412,413,414,416,417,418,419,420,421,423,427,428,429,439,431,432,433,434,345]
adult2 = [547,548,549,553,554,557,560,561,563,564,540,550,551,555,556,559,568,614,612,623]
old = [218,222,224,233,236,249,261,265,309,311,316,334,367,368,411,426,474,540,574,589,638,645,667,655,673,707,717,747,727]

child = child + child2
adult = old 
G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)



z_child = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in child])
z_adult = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in old])


z_child = reshape(z_child, [z_child.shape[0], z_child.shape[1]*z_child.shape[2]])
#w_teen = reshape(w_teen, [w_teen.shape[0], w_teen.shape[1]*w_teen.shape[2]*w_teen.shape[3]])
z_adult = reshape(z_adult, [z_adult.shape[0], z_adult.shape[1]*z_adult.shape[2]])
#w_old = reshape(w_old, [w_old.shape[0], w_old.shape[1]*w_old.shape[2]*w_old.shape[3]])





#z_men = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in men])
#w_men = G.mapping(z_men, label, truncation_psi=truncation_psi)
#z_women = np.array([np.random.RandomState(seed).randn(1, G.z_dim) for seed in women])

color1 = ["Kinder" for  i in range(len(child))]
#color2 = ["Teen" for  i in range(len(teen))]
color3 = ["Erwachsene" for  i in range(len(adult))]
#color4 = ["Old" for  i in range(len(old))]

#color =  color1 + color2 + color3 + color4
color =  color1 + color3 
#ws = np.concatenate((w_child, w_teen,w_adult,w_old))
ws = np.concatenate((z_child,z_adult))

#z_list = [gen_rand_z(G).cpu().numpy().astype(np.float32) for i in range(size)]
#np_zs = np.array(z_list)[:,0,:]
print(ws.shape)

tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=48)
embed = tsne.fit_transform(ws)

#array = ["Male", "Female"]

#color = np.random.randint(0,2,size)
#color = random.choices(array, k=size)

df = pd.DataFrame()
df["y"] = color
df["comp-1"] = embed[:,0]
df["comp-2"] = embed[:,1]


# hue=df.y.tolist()
sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df).set(title="Alter im Z Raum") 

###Z und W Raum
Treid to visualize the overall shape of the images distributed in z space and w space but images where to highdimensional. Results did not contain any meaningful semantics.

In [None]:
##Visualisation

NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 1000
img = np.array([G(gen_rand_z(G), label,  truncation_psi=truncation_psi, noise_mode=noise_mode).cpu().numpy().astype(np.float32) for i in range(size)])

print( img.shape)
img = reshape(img, [img.shape[0], img.shape[1]*img.shape[2]*img.shape[3]*img.shape[4]])
print(img.shape)

color = ["1" for  i in range(size)]

tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=50)

embed_img = tsne.fit_transform(img)

df_img = pd.DataFrame()
df_img["y"] = color
df_img["comp-1"] = embed_img[:,0]
df_img["comp-2"] = embed_img[:,1]


sns.scatterplot(x="comp-1", y="comp-2",hue=df_img.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df_img).set(title="IMG Raum") 


In [None]:
##Visualisation

NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 1000

ws = np.array([G.mapping(gen_rand_z(G), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32) for i in range(size)])

print(ws.shape)
ws = reshape(ws, [ws.shape[0], ws.shape[1]*ws.shape[2]*ws.shape[3]])
print(ws.shape)

color = ["1" for  i in range(size)]

tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=20)

embed_ws = tsne.fit_transform(ws)

df_ws = pd.DataFrame()
df_ws["y"] = color
df_ws["comp-1"] = embed_ws[:,0]
df_ws["comp-2"] = embed_ws[:,1]


sns.scatterplot(x="comp-1", y="comp-2",hue=df_ws.y.tolist(),
                palette=sns.color_palette("hls", 2),
                data=df_ws).set(title="W Raum") 



In [None]:
##Visualisation

NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 1000

zs = np.array([np.random.randn(1, G.z_dim) for i in range(size)])




print(zs.shape)
zs = reshape(zs, [zs.shape[0], zs.shape[1]*zs.shape[2]])

print(zs.shape)


color = ["1" for  i in range(size)]



tsne = TSNE(n_components=2, verbose=1, random_state=123, perplexity=100)
embed_zs = tsne.fit_transform(zs)



df_zs = pd.DataFrame()
df_zs["y"] = color
df_zs["comp-1"] = embed_zs[:,0]
df_zs["comp-2"] = embed_zs[:,1]



sns.scatterplot(x="comp-1", y="comp-2",hue=df_zs.y.tolist(),
              palette=sns.color_palette("hls", 2),
             data=df_zs).set(title="Z Raum") 



## Arithmetics
Use our latent Codes to do vektor arithemtics in latent space. See section 3.2.2 of thesis.

In [None]:
DEVICE = 'cuda'
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/ausarbeitung/gfx/Arithmetik/attributes_w3"

def latent_arithmetic():

  # handclasssified seeds 
  men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
  women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
  men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
  women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

  men = men + men2
  women = women + women2
  person = men + women

  child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
  child2 = [686,689,691,693,696,704,709,719,724,737]

  glasses_men =[206, 311, 334, 336, 339, 348, 352, 355, 356, 373, 374, 376, 775, 758,776,780,788,790,796,821,831,840,843,846,878,891,915, 916,917,944, 962, 965, 970, 975, 988,990,993,1006,517]
  glasses_women =[255, 295, 291, 286, 309, 347, 384,683,770,798,851,874,900,935,977,1001,513,195]
  smiling_men = [256,259, 273, 275, 300, 316, 320, 324]
  smiling_women = [210, 214, 260, 272, 274, 276, 302, 309, 313, 338, 347,346, 350]
  neutral_men = [218,257,258, 261, 264, 265, 307, 318, 319]
  neutral_women = [211, 310, 312, 334, 351, 359]
  smiling_child = [209, 216, 366]
  neutral_child = [322,325,328, 341, 345, 356, 357]
  beard = [218, 308, 318, 373,970,1013,16,123,181]
  lipstick = [222, 225, 303, 305, 310, 763,888,907,1017,1029,547,198,192]
  darker_skin_women = [272, 290, 384, 504,121,128]
  darker_skin_men = [229, 271, 348, 349, 375, 399, 386,804,807,824,860,80,82]

  darker_skin = darker_skin_women + darker_skin_men
  glasses = glasses_men + glasses_women
  smiling = smiling_men + smiling_women
  child = smiling_child + neutral_child + child + child2


  #init hyperparameters and generator
  truncation_psi = 0.7
  noise_mode = 'random'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  #compute the average latent code for an attribute and plot the image generated by the average code.
  w_smiling_men =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in smiling_men]
  w_smiling_men = torch.cat([l for l in w_smiling_men]).to(DEVICE)
  average_smiling_men = torch.mean(w_smiling_men, 0, True)
  img_average_smiling_men = G.synthesis(average_smiling_men, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_smiling_men, "average_smiling_men")

  w_men =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in men]
  w_men = torch.cat([l for l in w_men]).to(DEVICE)
  average_men = torch.mean(w_men, 0, True)
  img_average_men = G.synthesis(average_men, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_men, "average_men")

  w_women =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in women]
  w_women = torch.cat([l for l in w_women]).to(DEVICE)
  average_women = torch.mean(w_women, 0, True)
  img_average_women = G.synthesis(average_women, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_women, "average_women")

  w_child =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in child]
  w_child = torch.cat([l for l in w_child]).to(DEVICE)
  average_child = torch.mean(w_child, 0, True)
  img_average_child = G.synthesis(average_child, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_child, "average_child")

  w_lipstick =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in lipstick]
  w_lipstick = torch.cat([l for l in w_lipstick]).to(DEVICE)
  average_lipstick = torch.mean(w_lipstick, 0, True)
  img_average_lipstick = G.synthesis(average_lipstick, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_lipstick, "average_lipstick")

  w_men_glasses =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in glasses_men]
  w_men_glasses = torch.cat([l for l in w_men_glasses]).to(DEVICE)
  average_men_glasses = torch.mean(w_men_glasses, 0, True)
  img_average_men_glasses = G.synthesis(average_men_glasses, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_men_glasses, "men with glasses")

  w_person =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in person]
  w_person = torch.cat([l for l in w_person]).to(DEVICE)
  average_person = torch.mean(w_person, 0, True)
  img_average_person = G.synthesis(average_person, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_person, "average person")

  w_glasses =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in glasses]
  w_glasses = torch.cat([l for l in w_glasses]).to(DEVICE)
  average_glasses = torch.mean(w_glasses, 0, True)
  img_average_glasses = G.synthesis(average_glasses, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_glasses, "person with glasses")

  w_beard =  [G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 ) for seed in beard]
  w_beard = torch.cat([l for l in w_beard]).to(DEVICE)
  average_beard = torch.mean(w_beard, 0, True)
  img_average_beard = G.synthesis(average_beard, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_average_beard, "men with beard")

  child_glasses = average_men_glasses - average_men + average_child
  img_child_glasses = G.synthesis(child_glasses, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_child_glasses, "child with glasses")

  #compute subspace for image manipulation
  lipstick = average_lipstick - average_women
  glasses = average_glasses - average_person
  beard = average_beard - average_men

  #Uncomment if u want to use your own latent code from a torch Tensor to be manipulated
  #w_load = load_tensor("/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/w_tensors/macron_with_noise.pt")

  #w_load_beard = w_load +  beard
  #w_load_glasses = w_load +  glasses 
  #w_load_lipstick = w_load +  lipstick
  
  #img_load = G.synthesis(w_load, noise_mode=noise_mode, force_fp32=False)
  #img_load_beard = G.synthesis(w_load_beard, noise_mode=noise_mode, force_fp32=False)
  #img_load_glasses = G.synthesis(w_load_glasses, noise_mode=noise_mode, force_fp32=False)
  #img_load_lipstick = G.synthesis(w_load_lipstick, noise_mode=noise_mode, force_fp32=False)

  #plot_generator_img(img_load, "loaded image")
  #plot_generator_img(img_load_beard, "loaded image with beard")
  #plot_generator_img(img_load_glasses, "loaded image with glasses")
  #plot_generator_img(img_load_lipstick, "loaded image with makeup")

  #Use computed subspace to perform manipulation on random images
  for i in range(15):
    test_men = G.mapping(torch.from_numpy(np.random.RandomState(1300+i).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 )
    img_test_men =  G.synthesis(test_men, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_test_men, "test men")

    men_lipstick = test_men + lipstick
    img_men_lipstick = G.synthesis(men_lipstick, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_men_lipstick, "men with lipstick")

    men_beard = test_men + beard
    img_men_beard = G.synthesis(men_beard, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_men_beard, "men with beard")

    men_glasses = test_men + glasses
    img_men_glasses = G.synthesis(men_glasses, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_men_glasses, "test men with glasses")
    save_generator_img(img_men_glasses, f'{PATH}/men_glasses-{i}.png')
    save_generator_img(img_men_lipstick, f'{PATH}/men_lipstick-{i}.png')
    save_generator_img(img_men_beard, f'{PATH}/men_beard-{i}.png')
    save_generator_img(img_test_men, f'{PATH}/original_men-{i}.png')


  for i in range(10):
    test_women = G.mapping(torch.from_numpy(np.random.RandomState(1200+i).randn(1, G.z_dim)).to(DEVICE),label, truncation_psi=truncation_psi, truncation_cutoff=8 )
    img_original =  G.synthesis(test_women, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_original, "test women")

    test_women_lipstick = test_women + lipstick
    img_test_lipstick =  G.synthesis(test_women_lipstick, noise_mode=noise_mode, force_fp32=False)
    plot_generator_img(img_test_lipstick, "test lipstick women")

    test_women_glasses = test_women + glasses
    img_test_women_glasses =  G.synthesis(test_women_glasses, noise_mode=noise_mode, force_fp32=False)

    save_generator_img(img_original, f'{PATH}/original_women-{i}.png')
    save_generator_img(img_test_lipstick, f'{PATH}/women_lipstick-{i}.png')
    save_generator_img(img_test_women_glasses, f'{PATH}/women_glasses-{i}.png')



latent_arithmetic()


In [None]:
latent_arithmetic()

## Arithmetics in Z
Same as Section Arithmetics but in z-space to test our hypothesis of thesis section 3.2.2

In [None]:
DEVICE = 'cuda'
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
PATH = "/content/drive/My Drive/Bachelorarbeit/ausarbeitung/gfx/Arithmetik/attributes_z3"

def latent_arithmetic_z():


  men = [0,4,7,8,12,13,14,15,16,18,21,27,28,29,33,34,35,37,40,43,44,45,46,47,48,49,51,52,58,59,65,66,71,72,73,74,75,78,80,82,83,100,103,105,107,108,109,115,123,204,205,206,207,208,324,326,336]
  women = [6,9,17,22,23,24,25,26,30,31,32,39,41,42,50,53,55,56,57,60,62,64,67,68,70,76,77,79,101,102,104,106,110,111,112,113,114,116,117,118,119,120,121,122,124,127,128,200,201,202,210,211,213,214,222,221,222,223,304,305, 309,346,347,360,361,362,363,364]
  men2= [540,550,551,555,556,559,568,614,612,623,624,628,635,637,645,648,651,655,662,672,680,681,708,711,712,717,718]
  women2 = [547,548,549,553,554,557,560,561,563,564,565,569,625,631,632,641,642,650,653,665,682,683,685,688,690,692,694,700,701,702,703,710,713,714,715,720,722,723,728,729]

  men = men + men2
  women = women +  women2
  person = men + women

  child = [203,209,215,216,217,226,230,322,325,345,400,401,409,415,422,424,425,466,468,479,495,501,499,501,514,516,542,546,573,582,583,592,596,597,630,644,639,640,646,656,677]
  child2 = [686,689,691,693,696,704,709,719,724,737]

  glasses_men =[206, 311, 334, 336, 339, 348, 352, 355, 356, 373, 374, 376, 775, 758,776,780,788,790,796,821,831,840,843,846,878,891,915, 916,917,944, 962, 965, 970, 975, 988,990,993,1006,517]
  glasses_women =[255, 295, 291, 286, 309, 347, 384,683,770,798,851,874,900,935,977,1001,513,195]
  smiling_men = [256,259, 273, 275, 300, 316, 320, 324]
  smiling_women = [210, 214, 260, 272, 274, 276, 302, 309, 313, 338, 347,346, 350]
  neutral_men = [218,257,258, 261, 264, 265, 307, 318, 319]
  neutral_women = [211, 310, 312, 334, 351, 359]
  smiling_child = [209, 216, 366]
  neutral_child = [322,325,328, 341, 345, 356, 357]
  beard = [218, 308, 318, 373,970,1013,16,123,181]
  lipstick = [222, 225, 303, 305, 310, 763,888,907,1017,1029,547,198,192]
  darker_skin_women = [272, 290, 384, 504,121,128]
  darker_skin_men = [229, 271, 348, 349, 375, 399, 386,804,807,824,860,80,82]

  darker_skin = darker_skin_women + darker_skin_men
  glasses = glasses_men + glasses_women
  smiling = smiling_men + smiling_women
  child = smiling_child + neutral_child + child + child2


  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)


  z_men =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)) for seed in men]
  z_men = torch.cat([l for l in z_men]).to(DEVICE)
  average_men = torch.mean(z_men, 0, True)
  img_average_men = G(average_men, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_men, "average_men")

  z_women =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)) for seed in women]
  z_women = torch.cat([l for l in z_women]).to(DEVICE)
  average_women = torch.mean(z_women, 0, True)
  img_average_women = G(average_women, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_women, "average_women")

  z_lipstick =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE) for seed in lipstick]
  z_lipstick = torch.cat([l for l in z_lipstick]).to(DEVICE)
  average_lipstick = torch.mean(z_lipstick, 0, True)
  img_average_lipstick = G(average_lipstick, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_lipstick, "average_lipstick")

  z_person =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE) for seed in person]
  z_person = torch.cat([l for l in z_person]).to(DEVICE)
  average_person = torch.mean(z_person, 0, True)
  img_average_person = G(average_person, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_person, "average person")

  z_glasses =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE) for seed in glasses]
  z_glasses = torch.cat([l for l in z_glasses]).to(DEVICE)
  average_glasses = torch.mean(z_glasses, 0, True)
  img_average_glasses = G(average_glasses, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_glasses, "person with glasses")

  z_beard =  [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE) for seed in beard]
  z_beard = torch.cat([l for l in z_beard]).to(DEVICE)
  average_beard = torch.mean(z_beard, 0, True)
  img_average_beard = G(average_beard, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(img_average_beard, "men with beard")


  lipstick = average_lipstick - average_women
  lipstick = lipstick.to(DEVICE)
  glasses = average_glasses - average_person
  glasses = glasses.to(DEVICE)
  beard = average_beard - average_men
  beard = beard.to(DEVICE)


  seeds = []
  #uncomment if you want every image plotted
  for i in range(15):
    test_men = torch.from_numpy(np.random.RandomState(1300+i).randn(1, G.z_dim)).to(DEVICE)
    img_test_men =  G(test_men, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_test_men, "test men")

    men_lipstick = test_men + lipstick
    img_men_lipstick =G(men_lipstick, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_men_lipstick, "men with lipstick")

    men_beard = test_men + beard
    img_men_beard = G(men_beard, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_men_beard, "men with beard")

    men_glasses = test_men + glasses
    img_men_glasses = G(men_glasses, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_men_glasses, "test men with glasses")
    save_generator_img(img_men_glasses, f'{PATH}/men_glasses-{i}.png')
    save_generator_img(img_men_lipstick, f'{PATH}/men_lipstick-{i}.png')
    save_generator_img(img_men_beard, f'{PATH}/men_beard-{i}.png')
    save_generator_img(img_test_men, f'{PATH}/original_men-{i}.png')

  #uncomment for more images (only female)
  #for i in range(10):
   # test_women =torch.from_numpy(np.random.RandomState(women[10+i]).randn(1, G.z_dim)).to(DEVICE)
   # img_original =  G(test_women, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_original, "test women")

    #test_women_lipstick = test_women + lipstick
    #img_test_lipstick =  G(test_women_lipstick, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_test_lipstick, "test lipstick women")

    #test_women_glasses = test_women + glasses
    #img_test_women_glasses =  G(test_women_glasses, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
    #plot_generator_img(img_test_lipstick, "test women glasses")

    #save_generator_img(img_original, f'{PATH}/original_women-{i}.png')
    #save_generator_img(img_test_lipstick, f'{PATH}/women_lipstick-{i}.png')
    #save_generator_img(img_test_women_glasses, f'{PATH}/women_glasses-{i}.png')







In [None]:
latent_arithmetic_z()

#Projection

##Preprocessing
Reference: https://github.com/NVlabs/ffhq-dataset/blob/master/download_ffhq.py

###Downloads
Only need to be executed once. 

In [None]:
!pip install Google-Colab-Transfer
import colab_transfer
colab_transfer.mount_google_drive()

model_file = 'shape_predictor_68_face_landmarks.dat.bz2'

!wget http://dlib.net/files/$model_file
colab_transfer.copy_file(file_name=model_file,
                         destination='/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/stylegan3/project_images')

###Preprocessing functions

In [None]:
def detect_face_landmarks(face_file_path=None,
                          predictor_path=None,
                          img=None):
  # References:
  # -   http://dlib.net/face_landmark_detection.py.html
  # -   http://dlib.net/face_alignment.py.html

  if predictor_path is None:
    predictor_path = '/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/stylegan3/project_images/shape_predictor_68_face_landmarks.dat'

  # Load all the models we need: a detector to find the faces, a shape predictor
  # to find face landmarks so we can precisely localize the face
  detector = dlib.get_frontal_face_detector()
  shape_predictor = dlib.shape_predictor(predictor_path)

  if img is None:
    # Load the image using Dlib
    print("Processing file: {}".format(face_file_path))
    img = dlib.load_rgb_image(face_file_path)

  shapes = list()

  # Ask the detector to find the bounding boxes of each face. The 1 in the
  # second argument indicates that we should upsample the image 1 time. This
  # will make everything bigger and allow us to detect more faces.
  dets = detector(img, 1)
    
  num_faces = len(dets)
  print("Number of faces detected: {}".format(num_faces))

  # Find the face landmarks we need to do the alignment.
  faces = dlib.full_object_detections()
  for d in dets:
      print("Left: {} Top: {} Right: {} Bottom: {}".format(
          d.left(), d.top(), d.right(), d.bottom()
      ))

      shape = shape_predictor(img, d)
      faces.append(shape)

  return faces

Displaying Landmarks

In [None]:
import collections

plot_style = dict(marker='o',
                  markersize=4,
                  linestyle='-',
                  lw=2)

pred_type = collections.namedtuple('prediction_type', ['slice', 'color'])
pred_types = {'face': pred_type(slice(0, 17), (0.682, 0.780, 0.909, 0.5)),
              'eyebrow1': pred_type(slice(17, 22), (1.0, 0.498, 0.055, 0.4)),
              'eyebrow2': pred_type(slice(22, 27), (1.0, 0.498, 0.055, 0.4)),
              'nose': pred_type(slice(27, 31), (0.345, 0.239, 0.443, 0.4)),
              'nostril': pred_type(slice(31, 36), (0.345, 0.239, 0.443, 0.4)),
              'eye1': pred_type(slice(36, 42), (0.596, 0.875, 0.541, 0.3)),
              'eye2': pred_type(slice(42, 48), (0.596, 0.875, 0.541, 0.3)),
              'lips': pred_type(slice(48, 60), (0.596, 0.875, 0.541, 0.3)),
              'teeth': pred_type(slice(60, 68), (0.596, 0.875, 0.541, 0.4))
              }

In [None]:
def display_landmarks_raw(input_img, preds=None, fig_size=None):
  # This is a raw copy from:
  # https://github.com/1adrianb/face-alignment/blob/master/examples/detect_landmarks_in_image.py

  if fig_size is None:
    fig_size = plt.figaspect(.5)

  fig = plt.figure(figsize=fig_size)
  ax = fig.add_subplot(1, 1, 1) # only display one image
  ax.imshow(input_img)

  if preds is not None:
    for pred_type in pred_types.values():
        ax.plot(preds[pred_type.slice, 0],
                preds[pred_type.slice, 1],
                color=pred_type.color, **plot_style)

  ax.axis('off')

  return

In [None]:
def display_landmarks(image_name, 
                      dlib_output_faces=None, 
                      face_no=0,
                      fig_size=None):
  
  if fig_size is None:
    fig_size = [15, 15]

  input_img = ios.imread(image_name)

  if dlib_output_faces is None:
    dlib_output_faces = detect_face_landmarks(face_file_path=image_name,
                                              img=input_img)

  try:
    current_face = dlib_output_faces[face_no]

  except IndexError:
    current_face = None

    print('No face found for index n°{} (max={}).'.format(
        face_no, 
        len(dlib_output_faces)-1,
        ))

  if current_face is None:
    preds = None
  else:
    face_parts = current_face.parts()
    
    preds = np.array([
                      [v.x, v.y] 
                      for v in face_parts
                      ])    
    
  display_landmarks_raw(input_img=input_img, 
                        preds=preds,
                        fig_size=fig_size)  

  return

Align with Landmmarks

In [None]:
def recreate_aligned_images(json_data,
                            save=False,
                            output_path=None,
                            output_size=256, 
                            transform_size=4096, 
                            enable_padding=True):
    #print('Recreating aligned images...')
    #if dst_dir:
    #    os.makedirs(dst_dir, exist_ok=True)

    for item_idx, item in enumerate(json_data.values()):
        print('\r%d / %d ... ' % (item_idx, len(json_data)), end='', flush=True)

        # Parse landmarks.
        # pylint: disable=unused-variable
        lm = np.array(item['in_the_wild']['face_landmarks'])
        lm_chin          = lm[0  : 17]  # left-right
        lm_eyebrow_left  = lm[17 : 22]  # left-right
        lm_eyebrow_right = lm[22 : 27]  # left-right
        lm_nose          = lm[27 : 31]  # top-down
        lm_nostrils      = lm[31 : 36]  # top-down
        lm_eye_left      = lm[36 : 42]  # left-clockwise
        lm_eye_right     = lm[42 : 48]  # left-clockwise
        lm_mouth_outer   = lm[48 : 60]  # left-clockwise
        lm_mouth_inner   = lm[60 : 68]  # left-clockwise

        # Calculate auxiliary vectors.
        eye_left     = np.mean(lm_eye_left, axis=0)
        eye_right    = np.mean(lm_eye_right, axis=0)
        eye_avg      = (eye_left + eye_right) * 0.5
        eye_to_eye   = eye_right - eye_left
        mouth_left   = lm_mouth_outer[0]
        mouth_right  = lm_mouth_outer[6]
        mouth_avg    = (mouth_left + mouth_right) * 0.5
        eye_to_mouth = mouth_avg - eye_avg

        # Choose oriented crop rectangle.
        print(eye_to_mouth.shape)
        x = eye_to_eye - np.flipud(eye_to_mouth) * [-1, 1]
        x /= np.hypot(*x)
        x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8)
        y = np.flipud(x) * [-1, 1]
        c = eye_avg + eye_to_mouth * 0.1
        quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y])
        qsize = np.hypot(*x) * 2

        # Load in-the-wild image.
        src_file = item['in_the_wild']['file_path']
        img = PIL.Image.open(src_file)

        # Shrink.
        shrink = int(np.floor(qsize / output_size * 0.5))
        if shrink > 1:
            rsize = (int(np.rint(float(img.size[0]) / shrink)), int(np.rint(float(img.size[1]) / shrink)))
            img = img.resize(rsize, PIL.Image.ANTIALIAS)
            quad /= shrink
            qsize /= shrink

        # Crop.
        border = max(int(np.rint(qsize * 0.1)), 3)
        crop = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
        crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1]))
        if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]:
            img = img.crop(crop)
            quad -= crop[0:2]

        # Pad.
        pad = (int(np.floor(min(quad[:,0]))), int(np.floor(min(quad[:,1]))), int(np.ceil(max(quad[:,0]))), int(np.ceil(max(quad[:,1]))))
        pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0))
        if enable_padding and max(pad) > border - 4:
            pad = np.maximum(pad, int(np.rint(qsize * 0.3)))
            img = np.pad(np.float32(img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect')
            h, w, _ = img.shape
            y, x, _ = np.ogrid[:h, :w, :1]
            mask = np.maximum(1.0 - np.minimum(np.float32(x) / pad[0], np.float32(w-1-x) / pad[2]), 1.0 - np.minimum(np.float32(y) / pad[1], np.float32(h-1-y) / pad[3]))
            blur = qsize * 0.02
            img += (scipy.ndimage.gaussian_filter(img, [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0)
            img += (np.median(img, axis=(0,1)) - img) * np.clip(mask, 0.0, 1.0)
            img = PIL.Image.fromarray(np.uint8(np.clip(np.rint(img), 0, 255)), 'RGB')
            quad += pad[:2]

        # Transform.
        img = img.transform((transform_size, transform_size), PIL.Image.QUAD, (quad + 0.5).flatten(), PIL.Image.BILINEAR)
        if output_size < transform_size:
            img = img.resize((output_size, output_size), PIL.Image.ANTIALIAS)

        # Save aligned image.
        if(save == True and output_path != None):
          img.save(output_path)
          return img
        else:
          print("returned image")
          return img

    # All done.
    print('\r%d / %d ... done' % (len(json_data), len(json_data)))

    return

Full Workflow

In [None]:
face_file_path = "/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/stylegan3/project_images/target_images/hemsworth.jpg"
faces = detect_face_landmarks(face_file_path=face_file_path)

#Display landmarks on face
face_no=0
fig_size=[15,15]

display_landmarks(image_name=face_file_path,
                  dlib_output_faces=faces,
                  face_no=face_no,
                  fig_size=fig_size)

# The first face which is detected:
# NB: we assume that there is exactly one face per picture!
f = faces[0]

parts = f.parts()

num_face_landmarks=68

v = np.zeros(shape=(num_face_landmarks, 2))
for k, e in enumerate(parts):
  v[k, :] = [e.x, e.y]

json_data = dict()

item_idx = 0

json_data[item_idx] = dict()
json_data[item_idx]['in_the_wild'] = dict()
json_data[item_idx]['in_the_wild']['file_path'] = face_file_path
json_data[item_idx]['in_the_wild']['face_landmarks'] = v

recreate_aligned_images(json_data, "/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/stylegan3/project_images/aligned_target_images/img12.png")

##Preprocessing Pipeline
Combines all preprocessing steps




In [None]:

def preprocess_pipeline(img_path, save=False, out_path=None):
  face_file_path = img_path
  faces = detect_face_landmarks(face_file_path=face_file_path)
  face_no=0
  fig_size=[15,15]

  display_landmarks(image_name=face_file_path,
                    dlib_output_faces=faces,
                    face_no=face_no,
                    fig_size=fig_size)
  
  # The first face which is detected:
  # NB: we assume that there is exactly one face per picture!
  f = faces[0]

  parts = f.parts()

  num_face_landmarks=68

  v = np.zeros(shape=(num_face_landmarks, 2))
  for k, e in enumerate(parts):
    v[k, :] = [e.x, e.y]

  json_data = dict()

  item_idx = 0

  json_data[item_idx] = dict()
  json_data[item_idx]['in_the_wild'] = dict()
  json_data[item_idx]['in_the_wild']['file_path'] = face_file_path
  json_data[item_idx]['in_the_wild']['face_landmarks'] = v

 
  img = recreate_aligned_images(json_data, save, out_path)
  
  return img

## Projection into w-space
First attempt at projection into w-sapce without preprocessing. Can only be used for 256x256 Images

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def projection_w(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  


  #Get Image in the right tensor shape and plot target image
  target_img = prepare_image(img_path, 256)
  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)
  
  truncation_psi = 0.7
  noise_mode = 'const'

  #Init loss functions, only vgg ued, combinations of vgg and mse where used during testing but didn't yield better results
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)
  
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      w_list = [G.mapping(gen_rand_z(G), label) for i in range(1)]
      w_img_list = [G.synthesis(w, noise_mode=noise_mode, force_fp32=False).to(DEVICE) for w in w_list]

      #Calculate loss for each image
      w_img_error_list = [loss_fn_vgg(w_img, target_img) for w_img in w_img_list]


      #sort list in ascending order, after that the first element is the best
      w_list_sorted = sorted(zip(w_list, w_img_error_list), key=lambda x: x[1])

      #copy best image to w
      w = w_list_sorted[0][0].clone().detach().requires_grad_(True)

  else:
      #load pre trained image
      w = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)

  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([w], lr=learning_rate)

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))


  new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(new_img, "Starting  Point " )


  avg_loss = 0
  plot_loss = []

  #Projection Step
  for epoch in range(num_epochs):
    optimizer.zero_grad()
    #Generate image from w
    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    #Calculate loss
    #loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss =  loss_vgg
    avg_loss += loss 

    #Optimize w
    loss.backward()
    optimizer.step()
    scheduler.step()

    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_vgg)
        new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0

  #Plot loss during projection (not used in thesis, as the graph for different setups doesn't yield comparable information)
  plot_loss.append(avg_loss)
  plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  plot_loss = plot_loss[1:]
  plot_x = [x for  x in range(0, num_epochs+1, 100)]
  plot_x = plot_x[1:]
  plt.plot(plot_x, plot_loss)
  plt.xlabel("num_epochs")
  plt.ylabel("avg_loss in last 100 epochs")
  plt.title("Loss during training")
  plt.show()

  #Plot and save final image and optimised latent code
  final_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(final_img, 'Projiziertes Bild')
  plot_generator_img(target_img, 'Original Bild')

  save_tensor(w, save_path_projection + f'/w_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images/{name}.png')




In [None]:
learning_rates = [0.002,0.005,0.01,0.02, 0.05, 0.1, 0.2]


projection_w("/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/cat.jpg",
            2000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection",
            'hemsworth_ohne_preprocessing')

## Projection into z-space
Projection into W-sapce with preprocessing and optimized noise Inputs. For z-space a mix of mse and vgg loss sometimes yields better results but in general only vgg loss yield comparable results.



In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def projection_z(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  
  if(save_path_aligned == None):
    target_img = preprocess_pipeline(img_path)
  else:
    target_img = preprocess_pipeline(img_path, True, save_path_aligned)
  
  target_img = np.asarray(target_img)
  target_img = torch.Tensor(target_img)
  target_img = target_img.view(1, 256, 256, 3).to(DEVICE)
  target_img = (target_img - 128) / 127.5
  target_img = target_img.permute(0, 3, 1, 2)


  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)
  
  truncation_psi = 0.7
  noise_mode = 'const'

  #Init Noise
  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }


  #Init loss functions
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)
  
  #
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      z_list = [gen_rand_z(G) for i in range(2000)]
      z_img_list = [G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode).to(DEVICE) for z in z_list]

      #Calculate loss for each image
      z_img_error_list = [0.2* loss_fn_mse(z_img, target_img) + 0.8* loss_fn_vgg(z_img, target_img) for z_img in z_img_list]

      #sort list in ascending order, after that the first element is the best
      z_list_sorted = sorted(zip(z_list, z_img_error_list), key=lambda x: x[1])

      #copy best image to w
      z = z_list_sorted[0][0].clone().detach().requires_grad_(True)

  else:
      #load pre trained image
      z = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)

  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([z]+ list(noise_bufs.values()), lr=learning_rate)

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))

  for buf in noise_bufs.values():
    buf[:] = torch.randn_like(buf)
    buf.requires_grad = True

  avg_loss = 0
  plot_loss = []

  #Projection Step
  for epoch in range(num_epochs):
    optimizer.zero_grad()
    pred = G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode)


    reg_loss = 0.0
    for v in noise_bufs.values():
        noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d()
        while True:
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2
            if noise.shape[2] <= 8:
                break
            noise = F.avg_pool2d(noise, kernel_size=2)
    #Calculate loss
    loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss = 0.2*loss_mse + 0.8*loss_vgg + 1e5 * reg_loss
    avg_loss += loss 

    #Optimize z
    loss.backward()
    optimizer.step()

    
    #scheduler.step(loss)

    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_mse, loss_vgg)
        new_img = G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0
        
    with torch.no_grad():
      for buf in noise_bufs.values():
        buf -= buf.mean()
        buf *= buf.square().mean().rsqrt()

  plot_loss.append(avg_loss)
  plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  plot_loss = plot_loss[1:]
  plot_x = [x for  x in range(0, num_epochs+1, 100)]
  plot_x = plot_x[1:]
  plt.plot(plot_x, plot_loss)
  plt.xlabel("num_epochs")
  plt.ylabel("avg_loss in last 100 epochs")
  plt.title("Loss during training")
  plt.show()


  final_img = G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode)
  plot_generator_img(final_img, 'final for ' + str(num_epochs) + ' epochs using Adam Optimizer and Plateau schedule')
  plot_generator_img(target_img, 'Original Image')

  save_tensor(z, save_path_projection + f'/z_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images_z/{name}.png')




In [None]:
projection_z(f'/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/blonde_frau.jpg',
            2000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection",
            f'blonde_frau',
            "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/aligned_target_images/blonde_frau.jpg")

## Projection noise

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'



def projection_w_noise(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  
  if(save_path_aligned == None):
    target_img = preprocess_pipeline(img_path)
  else:
    target_img = preprocess_pipeline(img_path, True, save_path_aligned)
  
  target_img = np.asarray(target_img)
  target_img = torch.Tensor(target_img)
  target_img = target_img.view(1, 256, 256, 3).to(DEVICE)
  target_img = (target_img - 128) / 127.5
  target_img = target_img.permute(0, 3, 1, 2)

  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)
  
  truncation_psi = 0.7
  noise_mode = 'const'
  N = 3*256*256

  # Init noise.
  init_noise_factor = 0.05
  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }

  #Init loss functions
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)
  
  #
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      w_list = [G.mapping(gen_rand_z(G), label) for i in range(2000)]
      w_img_list = [G.synthesis(w, noise_mode=noise_mode, force_fp32=False).to(DEVICE) for w in w_list]

      #Calculate loss for each image
      w_img_error_list = [loss_fn_vgg(w_img, target_img) for w_img in w_img_list]

      #sort list in ascending order, after that the first element is the best
      w_list_sorted = sorted(zip(w_list, w_img_error_list), key=lambda x: x[1])

      #copy best image to w
      w = w_list_sorted[0][0].clone().detach().requires_grad_(True)

  else:
      #load pre trained image
      w = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)


  #w_samples_number = 10000
  #w_samples = [G.mapping(gen_rand_z(G), label).cpu().numpy().astype(np.float32)  for i in range(w_samples_number)]
  #w_avg = np.mean(w_samples, axis=0, keepdims=True)
  #w_std = (np.sum((w_samples - w_avg) ** 2) / w_samples_number) ** 0.5

  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([w] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=learning_rate)

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))

  for buf in noise_bufs.values():
    buf[:] = torch.randn_like(buf)
    buf.requires_grad = True


  new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(new_img, "Starting  Point " )


  avg_loss = 0
  plot_loss = []
  plot_w = []

  noise_ramp_length =  0.75

  #Projection Step
  for epoch in range(num_epochs):
    optimizer.zero_grad()
    #Generate image from w

    #t = epoch / num_epochs
    #noise_scale = w_std * init_noise_factor * max(0.0, 1.0 - t / noise_ramp_length) ** 2
    #w_noise = torch.randn_like(w) * noise_scale
    #print( noise_scale)
  
    #w_pred = (w + w_noise).repeat([1,1, 1])
 

    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)

    #regularise Noise, so that there is not semantic in it
    reg_loss = 0.0
    for v in noise_bufs.values():
        noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d()
        while True:
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2
            if noise.shape[2] <= 8:
                break
            noise = F.avg_pool2d(noise, kernel_size=2)



    #Calculate loss
    loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss =  loss_vgg **2 + reg_loss * 1e5 + loss_mse *1/N
    avg_loss += loss 

    #Optimize w
    #optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch %10 == 0:
      plot_w.append(w.clone().detach().cpu().numpy())

    scheduler.step()

    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_mse, loss_vgg)
        new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0

    with torch.no_grad():
      for buf in noise_bufs.values():
        buf -= buf.mean()
        buf *= buf.square().mean().rsqrt()




  #plot_loss.append(avg_loss)
  #plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  #plot_loss = plot_loss[1:]
  #plot_x = [x for  x in range(0, num_epochs+1, 100)]
  #plot_x = plot_x[1:]
  #plt.plot(plot_x, plot_loss)
  #plt.xlabel("num_epochs")
  #plt.ylabel("avg_loss in last 100 epochs")
  #plt.title("Loss during training")
  #plt.show()


  final_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(final_img, 'final for ' + str(num_epochs) + ' epochs using Adam Optimizer and Plateau schedule')
  plot_generator_img(target_img, 'Original Bild')

  save_tensor(w, save_path_projection + f'/w_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images/{name}.png')


  return plot_w




In [None]:

plot_w = projection_w_noise(f'/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/paul.jpeg',
            2000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection",
            f'paul')




In [None]:

Perplexity = 30

tsne = TSNE(n_components=3, verbose=1, random_state=123, perplexity=Perplexity)
embed = tsne.fit_transform(plot_w)

x, y, z = list(zip(*embed))

x = list(x)
y = list(y)
z = list(z)

x_filtered = []
y_filtered = []
z_filtered = []

print(len(x))
print(len(y))
print(len(z))
print(max(x),max(y),max(z))
print(min(x),min(y),min(z))

indexes = []
#filter outliers
for i in range(len(x)):
  if abs(x[i]) < 22 and abs(y[i]) < 20 and abs(z[i]) < 20:
    x_filtered.append(x[i])
    y_filtered.append(y[i])
    z_filtered.append(z[i])

n = len(x_filtered)
color = []

for i in range(1, n+1):
  color.append(i)


# axes instance
fig = plt.figure(figsize=(6,6))
ax = Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)

cmap = ListedColormap(sns.color_palette("blend:#F00,#00F").as_hex())

# plot
sc = ax.scatter(x_filtered, y_filtered, z_filtered, s=20, marker='o', alpha=1, c =color, cmap=cmap)
ax.set_xlabel('X')
ax.set_ylabel('Y')
ax.set_zlabel('Z')
ax.set_title(f'Perplexity: {Perplexity}')
plt.show()


##Projection with noise 2nd
Includes some testing which dind't work out (different initalisation and adding noise to latent code to stabilise findding the global optimum). Further work may be done here. Current state is not compilable.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def projection_w_noise(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  
  if(save_path_aligned == None):
    target_img = preprocess_pipeline(img_path)
  else:
    target_img = preprocess_pipeline(img_path, True, save_path_aligned)
  
  target_img = np.asarray(target_img)
  target_img = torch.Tensor(target_img)
  target_img = target_img.view(1, 256, 256, 3).to(DEVICE)
  target_img = (target_img - 128) / 127.5
  target_img = target_img.permute(0, 3, 1, 2)

  print(target_img)
  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  # Init noise.
  init_noise_factor = 0.05
  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }

  truncation_psi = 0.7
  noise_mode = 'const'

  #Init loss functions
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)

  
  #
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      w_list = [G.mapping(gen_rand_z(G), label) for i in range(1000)]
      w_img_list = [G.synthesis(w, noise_mode=noise_mode, force_fp32=False).to(DEVICE) for w in w_list]

      #Calculate loss for each image
      w_img_error_list = [loss_fn_vgg(w_img, target_img) for w_img in w_img_list]

      #sort list in ascending order, after that the first element is the best
      w_list_sorted = sorted(zip(w_list, w_img_error_list), key=lambda x: x[1])

      #copy best image to w
      w = w_list_sorted[0][0].clone().detach().requires_grad_(True)

  else:
      #load pre trained image
      w = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)


  num_samples = 10000
  w_samples = [G.mapping(gen_rand_z(G), label).cpu().numpy().astype(np.float32)  for i in range(num_samples)]
  w_avg = np.mean(w_samples, axis=0, keepdims=True)
  w_std = (np.sum((w_samples - w_avg) ** 2) / num_samples) ** 0.5

  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([w] + list(noise_bufs.values()), betas=(0.9, 0.999), lr=learning_rate)

  for buf in noise_bufs.values():
      buf[:] = torch.randn_like(buf)
      buf.requires_grad = True

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))


  new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(new_img, "Starting  Point " )


  avg_loss = 0
  plot_loss = []

  three_quartes = num_epochs * 3 /4

  #Projection Step
  for epoch in range(num_epochs):

    factor = max(0, 1 - (epoch / three_quartes))
    noise_scale = init_noise_factor * w_std * (factor ** 2)
    w_noise = torch.randn_like(w) * noise_scale
    w = (w + w_noise)
    optimizer.zero_grad()
    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)

    #Generate image from w
    #pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    #Calculate loss


    #regularise Noise, so that there is not semantic in it
    reg_loss = 0.0
    for v in noise_bufs.values():
        noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d()
        while True:
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2
            reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2
            if noise.shape[2] <= 8:
                break
            noise = F.avg_pool2d(noise, kernel_size=2)

    loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss = loss_vgg ** 2  + reg_loss * 1e6
    #loss = 0.2 * loss_mse + 0.8* loss_vgg  
    avg_loss += loss 

    #Optimize w
    #optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    #if epoch %10 == 0:
    scheduler.step()


    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_mse, loss_vgg)
        new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0

    with torch.no_grad():
      for buf in noise_bufs.values():
        buf -= buf.mean()
        buf *= buf.square().mean().rsqrt()

  plot_loss.append(avg_loss)
  plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  plot_loss = plot_loss[1:]
  plot_x = [x for  x in range(0, num_epochs+1, 100)]
  plot_x = plot_x[1:]
  plt.plot(plot_x, plot_loss)
  plt.xlabel("num_epochs")
  plt.ylabel("avg_loss in last 100 epochs")
  plt.title("Loss during training")
  plt.show()


  final_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(final_img, 'final for ' + str(num_epochs) + ' epochs using Adam Optimizer and Plateau schedule')
  plot_generator_img(target_img, 'Original Image')

  save_tensor(w, save_path_projection + f'/w_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images/{name}.png')




In [None]:
projection_w_noise("/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/til.jpg",
            1000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/result_images",
            'test3')

##Projection with noise
Another experimental version which doesn't compile.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def projection_w(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  
  if(save_path_aligned == None):
    target_img = preprocess_pipeline(img_path)
  else:
    target_img = preprocess_pipeline(img_path, True, save_path_aligned)
  
  target_img = np.asarray(target_img)
  target_img = torch.Tensor(target_img)
  target_img = target_img.view(1, 256, 256, 3).to(DEVICE)
  target_img = (target_img - 128) / 127.5
  target_img = target_img.permute(0, 3, 1, 2)

  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  # Init noise.
  init_noise_factor = 0.05
  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }

  #for buf in noise_bufs.values():
  #    buf[:] = torch.randn_like(buf)
  #    buf.requires_grad = True

  torch.cuda.memory_summary(device=None, abbreviated=False)
  
  truncation_psi = 0.7
  noise_mode = 'const'

  #Init loss functions
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)
  
  #
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      w_list = [G.mapping(gen_rand_z(G), label) for i in range(2000)]
      w_img_list = [G.synthesis(w, noise_mode=noise_mode, force_fp32=False).to(DEVICE) for w in w_list]

      #Calculate loss for each image
      w_img_error_list = [loss_fn_mse(w_img, target_img) + 0.8 * loss_fn_vgg(w_img, target_img) for w_img in w_img_list]

      #
  

      #sort list in ascending order, after that the first element is the best
      w_list_sorted = sorted(zip(w_list, w_img_error_list), key=lambda x: x[1])

      #copy best image to w
      w = w_list_sorted[0][0].clone().detach().requires_grad_(True)

      #w_list = [G.mapping(gen_rand_z(G), label).cpu().numpy().astype(np.float32)  for i in range(2)]

      #w_tensor = torch.stack(w_list)
      #w_avg = torch.mean(w_tensor, 0)
      #w_avg = np.mean(w_list, axis=0, keepdims=True)
      #w_std = (np.sum((w_list - w_avg) ** 2) / 10) ** 0.5


  else:
      #load pre trained image
      w = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)

  w_list = w_list.cpu().numpy().astype(np.float32)
  w_avg = np.mean(w_list, axis=0, keepdims=True)
  w_std = (np.sum((w_list - w_avg) ** 2) / 10) ** 0.5



  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([w] + list(noise_bufs.values()), betas = (0.9,0.999), lr=learning_rate)

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))


  new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(new_img, "Starting  Point " )


  avg_loss = 0
  plot_loss = []

  three_quartes = num_epochs * 3 /4

  #Projection Step
  for epoch in range(num_epochs):
    factor = max(0, 1 - (num_epochs / three_quartes))
    noise_scale = init_noise_factor * w_std * factor ** 2
    w_noise = torch.randn_like(w) * noise_scale
    optimizer.zero_grad()
    w = (w + w_noise).repeat([1, G.mapping.num_ws, 1])
    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    #Generate image from w
    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
    #Calculate loss
    loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss = 0.2 * loss_mse + 0.8* loss_vgg
    avg_loss += loss 

    #Optimize w
    #optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    #if epoch %10 == 0:
    scheduler.step()

    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_mse, loss_vgg)
        new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0

  plot_loss.append(avg_loss)
  plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  plot_loss = plot_loss[1:]
  plot_x = [x for  x in range(0, num_epochs+1, 100)]
  plot_x = plot_x[1:]
  plt.plot(plot_x, plot_loss)
  plt.xlabel("num_epochs")
  plt.ylabel("avg_loss in last 100 epochs")
  plt.title("Loss during training")
  plt.show()


  final_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(final_img, 'final for ' + str(num_epochs) + ' epochs using Adam Optimizer and Plateau schedule')
  plot_generator_img(target_img, 'Original Image')

  save_tensor(w, save_path_projection + f'/w_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images/{name}.png')




In [None]:
projection_w("/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/til.jpg",
            2000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/result_images",
            'til_with_noise',
            "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/aligned_target_images/til2.jpg")

## Projection for non face images into ffhq
Used in Section 3.3.4 to embedd non facial images in the latent space of the generator trained on ffhq. Only use 256x256 Images ff there is no recognizable face in the image, otherwise any image is possible 

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'



def projection_w_noise_limit(img_path: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 num_epochs: int,
                 learning_rate,
                 save_path_projection: Union[str, os.PathLike, BinaryIO, IO[bytes]],
                 name,
                 save_path_aligned: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None,
                 continue_training: bool = False,
                 pre_trained_image_path: Union[str, os.PathLike, BinaryIO, IO[bytes]] = None):
  
  #Uncommend if there is an recognizable face in the image.
  #if(save_path_aligned == None):
  #  target_img = preprocess_pipeline(img_path)
  #else:
  #  target_img = preprocess_pipeline(img_path, True, save_path_aligned)
  
  #target_img = np.asarray(target_img)
  #target_img = torch.Tensor(target_img)
  #target_img = target_img.view(1, 256, 256, 3).to(DEVICE)
  #target_img = (target_img - 128) / 127.5
  #target_img = target_img.permute(0, 3, 1, 2)

  #plot_generator_img(target_img, "Target Image")

  target_img = prepare_image(img_path, 256)
  plot_generator_img(target_img, "Target Image")

  #Init Generator
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)
  
  truncation_psi = 0.7
  noise_mode = 'const'

  # Init noise.
  init_noise_factor = 0.05
  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }

  #Init loss functions
  loss_fn_mse = torch.nn.MSELoss()
  loss_fn_vgg = VGGPerceptualLoss().to(DEVICE)
  
  #
  if(continue_training == False ):
      #Initiliza array of random image and use the best to project to
      w_list = [G.mapping(gen_rand_z(G), label) for i in range(2000)]
      w_img_list = [G.synthesis(w, noise_mode=noise_mode, force_fp32=False).to(DEVICE) for w in w_list]

      #Calculate loss for each image
      w_img_error_list = [loss_fn_vgg(w_img, target_img) for w_img in w_img_list]

      #sort list in ascending order, after that the first element is the best
      w_list_sorted = sorted(zip(w_list, w_img_error_list), key=lambda x: x[1])

      #copy best image to w
      w = w_list_sorted[0][0].clone().detach().requires_grad_(True)

  else:
      #load pre trained image
      w = load_tensor(pre_trained_image_path).clone().detach().requires_grad_(True)

  # Used for experimental approach, which didn't work out.
  #w_samples_number = 10000
  #w_samples = [G.mapping(gen_rand_z(G), label).cpu().numpy().astype(np.float32)  for i in range(w_samples_number)]
  #w_avg = np.mean(w_samples, axis=0, keepdims=True)
  #w_std = (np.sum((w_samples - w_avg) ** 2) / w_samples_number) ** 0.5

  #initialize optimizer/scheduler
  learning_rate = learning_rate
  optimizer = torch.optim.Adam([w], betas=(0.9, 0.999), lr=learning_rate)

  #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min')
  #scheduler = torch.optim.lr_scheduler.ExponentialLR(optimizer, 0.995)
  scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.1, steps_per_epoch=50, epochs=int(num_epochs/50))

  for buf in noise_bufs.values():
    buf[:] = torch.randn_like(buf)
    buf.requires_grad = True


  new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(new_img, "Starting  Point " )


  avg_loss = 0
  plot_loss = []
  plot_w = []

  noise_ramp_length =  0.75

  #Projection Step
  for epoch in range(num_epochs):
    optimizer.zero_grad()
    #Generate image from w

    #experimental approach, which didn't work out.
    #t = epoch / num_epochs
    #noise_scale = w_std * init_noise_factor * max(0.0, 1.0 - t / noise_ramp_length) ** 2
    #w_noise = torch.randn_like(w) * noise_scale
    #print( noise_scale)
  
    #w_pred = (w + w_noise).repeat([1,1, 1])
 

    pred = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)

    #Experiments show that even a high reugularisation results semantic in the noise Inputs. Only uncomment for testing.
    #regularise Noise, so that there is not semantic in it
    #reg_loss = 0.0
    #for v in noise_bufs.values():
    #    noise = v[None,None,:,:] # must be [1,1,H,W] for F.avg_pool2d()
    #    while True:
    #        reg_loss += (noise*torch.roll(noise, shifts=1, dims=3)).mean()**2
    #        reg_loss += (noise*torch.roll(noise, shifts=1, dims=2)).mean()**2
    #        if noise.shape[2] <= 8:
    #            break
    #        noise = F.avg_pool2d(noise, kernel_size=2)



    #Calculate loss
    loss_mse = loss_fn_mse(pred, target_img)
    loss_vgg = loss_fn_vgg(pred, target_img)
    loss =  loss_vgg **2  + loss_mse *1/N
    avg_loss += loss 

    #Optimize w
    #optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if epoch %10 == 0:
      plot_w.append(w.clone().detach().cpu().numpy())

    scheduler.step()

    #Print loss and current state of the projection
    if epoch % 100 == 0:
        print('Epoch: %d, avgloss: %f' % (epoch, avg_loss))
        print(loss_mse, loss_vgg)
        new_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
        plot_generator_img(new_img, "Projection after " +  str(epoch) +  " epochs: " )
        plot_loss.append(avg_loss)
        avg_loss = 0

    #dont use noise inputs
    #with torch.no_grad():
    #  for buf in noise_bufs.values():
    #    buf -= buf.mean()
    #    buf *= buf.square().mean().rsqrt()



  #Uncomment if you want plots of the loss value during projection.
  #plot_loss.append(avg_loss)
  #plot_loss = [x.cpu().detach().numpy() for x in plot_loss]
  #plot_loss = plot_loss[1:]
  #plot_x = [x for  x in range(0, num_epochs+1, 100)]
  #plot_x = plot_x[1:]
  #plt.plot(plot_x, plot_loss)
  #plt.xlabel("num_epochs")
  #plt.ylabel("avg_loss in last 100 epochs")
  #plt.title("Loss during training")
  #plt.show()


  final_img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(final_img, 'final for ' + str(num_epochs) + ' epochs using Adam Optimizer and Plateau schedule')
  plot_generator_img(target_img, 'Original Bild')

  save_tensor(w, save_path_projection + f'/w_tensors/{name}.pt')
  save_generator_img(final_img,  save_path_projection + f'/images/{name}.png')


  return plot_w




In [None]:

plot_w = projection_w_noise_limit(f'/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/target_images/Zimmer.jpg',
            3000,
            0.002,
            "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection",
            f'zimmer2')




#Feature Vectors

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

#Reference: https://github.com/rosinality/stylegan2-pytorch/blob/master/closed_form_factorization.py

def generate_eigvec(pkl):

  device = torch.device(DEVICE)
  with dnnlib.util.open_url(pkl) as f:
    G = legacy.load_network_pkl(f)['G_ema'].to(device)

  #Gewichte aus dem Affinen Transformationen 
  M = {
    k[0]: k[1]
    for k in G.named_parameters()
    if "affine" in k[0] and "torgb" not in k[0] and "weight" in k[0] or ("torgb" in k[0] and "b4" in k[0] and "weight" in k[0] and "affine" in k[0])
  }



  weight_mat = []
  for k, v in M.items():
    weight_mat.append(v)


  #Stack alle affinen Gewichtsmatrizen
  W = torch.cat(weight_mat, 0)
  #Berechnen der Eigenwerte mittels svd
  eigvec = torch.linalg.svd(W).Vh.to("cpu")



  return eigvec

In [None]:
eigvec = generate_eigvec(NETWORK_PKL)
print(eigvec.shape)

Now we experiment with the computed feature vectors

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def add_feature_vector_to_image(seed, degree, feature_vector_index, steps):

  #Number of steps must be uneven
  if(steps % 2 == 0):
    steps = steps + 1

  truncation_psi = 0.7
  noise_mode = 'const'  

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z = np.random.RandomState(seed).randn(1, G.z_dim)
  z = torch.from_numpy(z)

  w = G.mapping(torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu()
 

  #just for clarification
  degree = degree # Shoud be between 0 and +-10, with 0 doing nothing 
  vector_index = feature_vector_index # any number 0-511 (eig.vec.shape[0]) 

  eigvec = generate_eigvec(NETWORK_PKL)
  cur_eigvec = eigvec[vector_index]
  direction = degree * cur_eigvec

  ws = []
  

  for i in range(steps//2):
    ws.append(w-(direction*(1-(i/(steps//2)))))
  
  ws.append(w)

  for i in range(steps//2):
    ws.append(w+(direction*(1-(i/(steps//2)))))

 

  ws = torch.cat([w for w in ws]).to(DEVICE)

  

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)
  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)


  imshow(img.cpu(), col=steps)

In [None]:

for i in range(10):
    add_feature_vector_to_image(175, 3, i+2, 5)

#Style Transfer

## Switching Styleblocks
Implementation of Style Mixing for two generated Images

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def switch_styleblocks(seed1, seed2, indexes, i , j):

  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z1 = torch.from_numpy(np.random.RandomState(seed1).randn(1, G.z_dim)).to(DEVICE)
  z2 = torch.from_numpy(np.random.RandomState(seed2).randn(1, G.z_dim)).to(DEVICE)  

  w1 = G.mapping(z1, label, truncation_psi=truncation_psi, truncation_cutoff=8)
  w2 = G.mapping(z2, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  w_mixed = w1.clone()

  for index in indexes:
    w_mixed[:, index, :] = w2[:, index, :]

  ws = [w1, w_mixed, w2]

  ws = torch.cat([w for w in ws]).to(DEVICE)

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)

  img_mixed = img[1].clone()

  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)

  #for w in ws:
  #  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  #  images.append(img)

  img_mixed = img_mixed.reshape((1, 3,256,256))

  #plot_generator_img(img_mixed, "mixed image")
  save_generator_img(img_mixed,  f'/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/style_mixing/mixed_bilder/8-13/Bild{i}{j}.png')

  #imshow(img.cpu(), len(img))

In [None]:
for i in range(5):
  for j in range(5):
    switch_styleblocks(1300+i, 1500+j, [8,9,10,11,12,13], i, j)

##Switching Styleblocks Tensor
Implementation of Style Mixing for one generated image and one image from our projection (latent code already computed).

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def switch_styleblocks_custom(seed, tensor_path, indexes):

  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)


  z1 = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)  

  w2 = load_tensor(tensor_path)
  w1 = G.mapping(z1, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  w_mixed = w1.clone()

  for index in indexes:
    w_mixed[:, index, :] = w2[:, index, :]

  ws = [w1, w_mixed, w2]

  ws = torch.cat([w for w in ws]).to(DEVICE)

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)

  img_mixed = img[1].clone()

  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)

  #for w in ws:
  #  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  #  images.append(img)

  img_mixed = img_mixed.reshape((1, 3,256,256))

  plot_generator_img(img_mixed,"mixed image")
  save_generator_img(img_mixed,  f'/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/style_mixing/mixed_bilder/0-3/Bild{i}{j}.pt')

  imshow(img.cpu(), len(img))

In [None]:
"/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/w_tensors/macron_with_noise.pt"

for i in range(820,830,1): 
  switch_styleblocks_custom(i, '/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/result_images/w_tensors/hendric_onecycle_high_0.002.pt', [0,1,2,3])
  #switch_styleblocks_custom(i, "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/w_tensors/blonde_frau.pt", [4,5,6,7])

##Transfer Style (Single)
First computes the latent Code embedding for the given Image and then computes the style mixing between the projected Image and the generated image by the given seed.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def style_transfer_single(img1_target_path, img1_save_path, img1_aligned_target_save_path, img1_name, epochs, result_save_path, indexes, seed):

  projection_w(img1_target_path,
             epochs,
             0.02,
             img1_save_path,
             img1_name,
             img1_aligned_target_save_path)
  

  truncation_psi = 0.7
  noise_mode = 'const'
  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  z1 = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)  
  w1 = G.mapping(z1, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  w2 =  load_tensor(img1_save_path + f'/w_tensors/{img1_name}.pt')

  w_mixed = w1.clone()

  for index in indexes:
    w_mixed[:, index, :] = w2[:, index, :]
  
  ws = [w1, w_mixed, w2]

  ws = torch.cat([w for w in ws]).to(DEVICE)

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)

  img_mixed = img[1].clone()
  img_mixed = img_mixed.reshape((1, 3,256,256))

  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)

  #for w in ws:
  #  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  #  images.append(img)

  imshow(img.cpu(), len(img))
  plot_generator_img(img_mixed, 'mixed image')
  save_generator_img(img_mixed, result_save_path)



  



In [None]:
style_transfer_single("/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/target_images/hemsworth.jpg",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/projection_results",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/aligned_target_images/hemsworth_single_4.jpg",
               "hemsworth_single_5",
               2000,
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/result_images/hemsworth_seed3000_single_0123_optimized.jpg",
               [0,1,2,3],
               3000)


## Transfer Style (Double)
First computes the latent Code embedding for two given Images and then computes the style mixing between the projected images. Very computational expensive and needs a lot of projektion epochs for stable results.

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def style_transfer(img1_target_path, img1_save_path, img1_aligned_target_save_path, img1_name, img2_target_path, img2_save_path, img2_aligned_target_save_path, img2_name, epochs, result_save_path, indexes):

  projection_w(img1_target_path,
             epochs,
             0.02,
             img1_save_path,
             img1_name,
             img1_aligned_target_save_path)
  
  projection_w(img2_target_path,
             epochs,
             0.02,
             img2_save_path,
             img2_name,
             img2_aligned_target_save_path)
  
  w1 =  load_tensor(img1_save_path + f'/w_tensors/{img1_name}.pt')
  w2 =  load_tensor(img2_save_path + f'/w_tensors/{img2_name}.pt')

  
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  w_mixed = w1.clone()

  for index in indexes:
    w_mixed[:, index, :] = w2[:, index, :]
  
  ws = [w1, w_mixed, w2]

  ws = torch.cat([w for w in ws]).to(DEVICE)

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)

  img_mixed = img[1].clone()
  img_mixed = img_mixed.reshape((1, 3,256,256))

  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)

  #for w in ws:
  #  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  #  images.append(img)

  imshow(img.cpu(), len(img))
  plot_generator_img(img_mixed, 'mixed image')
  save_generator_img(img_mixed, result_save_path)
  



In [None]:
style_transfer("/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/target_images/macron.jpg",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/projection_results",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/aligned_target_images/macron.jpg",
               "macron_high_3",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/target_images/hemsworth.jpg",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/projection_results",
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/aligned_target_images/hemsworth.jpg",
               "hemsworth_high_3",
               3000,
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/result_images/hemsworth-macron_high_3_01234.jpg",
               [0,1,2,3])


## Tansfer Style (Double) Tensor
Style Mixing between two alredy projected images. Projection needs a lot of epochs for good results.

In [None]:
def style_transfer_tensor(img1_path, img2_path, result_save_path, indexes):


  
  w1 =  load_tensor(img1_path)
  w2 =  load_tensor(img2_path)

  
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  w_mixed = w1.clone()

  for index in indexes:
    w_mixed[:, index, :] = w2[:, index, :]
  
  ws = [w1, w_mixed, w2]

  ws = torch.cat([w for w in ws]).to(DEVICE)

  img = G.synthesis(ws, noise_mode=noise_mode, force_fp32=False)

  img_mixed = img[1].clone()
  img_mixed = img_mixed.reshape((1, 3,256,256))

  img = (img.permute(0, 2, 3, 1) * 127.5 + 128).clamp(0, 255).to(torch.uint8)

  #for w in ws:
  #  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)
  #  images.append(img)

  imshow(img.cpu(), len(img))
  plot_generator_img(img_mixed, 'mixed image')
  save_generator_img(img_mixed, result_save_path)
  


In [None]:
style_transfer_tensor('/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/result_images/w_tensors/hendric_noise.pt',
               '/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/projection/result_images/w_tensors/til_noise.pt',
               "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/style-transfer/result_images/hendric_mona_high.jpg",
               [0,1,2])

#Testing
Testing different functionalities.

##Loading and saving

In [None]:
#Generate a random image, display image, save image, load image and display again
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
Path = "/content/drive/MyDrive/Bachelorarbeit/bachelorarbeit/testing"
def load_save_test_z():

  device = 'cuda'
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=device)

  z = gen_rand_z(G)

  img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  plot_generator_img(img, 'Image to Save')

  save_tensor(z, Path + '/test.pt')
  z_load = load_tensor(Path + '/test.pt')

  load_img = G(z_load, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  plot_generator_img(load_img, "Reloaded image")


def load_save_test_w():

  device = 'cuda'
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=device)

  z = gen_rand_z(G)
  w = G.mapping(z, label, truncation_psi=truncation_psi, truncation_cutoff=8)

  img = G.synthesis(w, noise_mode=noise_mode, force_fp32=False)

  plot_generator_img(img, 'Image to Save')

  save_tensor(w, Path + '/test.pt')
  w_load = load_tensor(Path + '/test.pt')

  load_img = G.synthesis(w_load, noise_mode=noise_mode, force_fp32=False)

  plot_generator_img(load_img, "Reloaded image")

def save_img(seed,i):

  device = 'cuda'
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=device)

  #z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE), label, truncation_psi=truncation_psi).cpu().numpy().astype(np.float32)

  img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

  #plot_generator_img(img, 'Image to Save')

  save_generator_img(img, f'/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/style_mixing/original_bilder/Bild_A{i}.png')


def load_img(Path):

  device = 'cuda'
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=device)

  w_load = load_tensor(Path)
  img_load = G.synthesis(w_load, noise_mode=noise_mode, force_fp32=False)
  plot_generator_img(img_load, "Loaded Image")
  save_generator_img(img_load, "/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/images/see_reloaded.png")

load_img("/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/w_tensors/paul.pt")

##Preprocessing

In [None]:
IMG_PATH = "/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/projection/target_images/picasso.jpg"
SAVE_PATH = "/content/drive/My Drive/Bachelorarbeit/bachelorarbeit/projection/aligned_target_images/picasso.jpg"

preprocess_pipeline(IMG_PATH, True, SAVE_PATH)

## Noise Mode Testing

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def test_noise_modes(seed, count):

  truncation_psi = 0.7
  noise_mode = 'none'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  #zs = [torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)  for i in range(count)]

  #zs = torch.cat([z for z in zs]).to(DEVICE)

  z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)
  img_tensor = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  save_generator_img(img_tensor,  f"/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/projection/none_noise{i}.png")

  plot_generator_img(img_tensor, "")


In [None]:
seeds = [12,14,16]
for i in range(len(seeds)):
 test_noise_modes(seeds[i], i)

##Analysis

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"
DEVICE = 'cuda'

def analysis_testing(seed):

  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=DEVICE)

  noise_bufs = { name: buf for (name, buf) in G.synthesis.named_buffers() if 'noise_const' in name }

  for buf in noise_bufs.values():
      buf[:] = torch.randn_like(buf)
      buf.requires_grad = True

  print(noise_bufs)

  

  w_list = [G.mapping(gen_rand_z(G), label).cpu().numpy().astype(np.float32)  for i in range(2)]
  print(w_list)
  #w_list = w_list.cpu()
  #print(w_list)
  
 
  #w_tensor = torch.FloatTensor(w_list)
  #w_tensor = torch.stack(w_list)
  w_avg = np.mean(w_list, axis=0, keepdims=True)
  print(w_avg)
  #w_avg = torch.mean(w_tensor, 0)

  w_std = (np.sum((w_list - w_avg) ** 2) / 10) ** 0.5
  print(w_std)


  #for tensor in w_tensor:
   # dist = torch.cdist(tensor, w_avg, p=2.0, compute_mode='use_mm_for_euclid_dist_if_necessary') 
  #  sigma = torch.mean
  #  print(dist)
  
 

analysis_testing(10)

##T-SNE

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 300

z_list = [gen_rand_z(G).cpu().numpy().astype(np.float32) for i in range(size)]
np_zs = np.array(z_list)[:,0,:]
print(np_zs.shape)

tsne = TSNE(n_components=3, verbose=1, random_state=123, perplexity=40)
embed = tsne.fit_transform(np_zs)

x, y, z = list(zip(*embed))


fig = pylab.figure()
ax = fig.add_subplot(111, projection = '3d')
sc = ax.scatter(x,y,z)


#array = ["Male", "Female"]

#color = np.random.randint(0,2,size)
#color = random.choices(array, k=size)

#df = pd.DataFrame()
#df["y"] = color
#df["comp-1"] = embed[:,0]
#df["comp-2"] = embed[:,1]


# hue=df.y.tolist()
#sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
#                palette=sns.color_palette("hls", 3),
#                data=df).set(title="Random Test data") 


#z_img_list = [G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode).to(DEVICE) for z in z_list]

#img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"

device = 'cuda'
truncation_psi = 0.7
noise_mode = 'const'

G = init_generator(NETWORK_PKL)
label = torch.zeros([1, G.c_dim], device=device)

size = 300

z_list = [gen_rand_z(G).cpu().numpy().astype(np.float32) for i in range(size)]
np_zs = np.array(z_list)[:,0,:]
print(np_zs.shape)

tsne = TSNE(n_components=3, learning_rate='auto',
                  perplexity=20)
embed = tsne.fit_transform(np_zs)

x, y, z = list(zip(*embed))

# axes instance
fig = plt.figure(figsize=(6,6))
ax = Axes3D(fig, auto_add_to_figure=False)
fig.add_axes(ax)

#color = np.random.randint(0,2,size)
#color = random.choices(array, k=size)


# get colormap from seaborn
cmap = ListedColormap(sns.color_palette("husl", 256).as_hex())

# plot
sc = ax.scatter(x, y, z, s=40, c='r', marker='o', alpha=1)
ax.set_xlabel('X Label')
ax.set_ylabel('Y Label')
ax.set_zlabel('Z Label')

x, y, z = list(zip(*embed))


fig = pylab.figure()
ax = fig.add_subplot(111, projection = '3d')
sc = ax.scatter(x,y,z)


#array = ["Male", "Female"]

#color = np.random.randint(0,2,size)
#color = random.choices(array, k=size)

#df = pd.DataFrame()
#df["y"] = color
#df["comp-1"] = embed[:,0]
#df["comp-2"] = embed[:,1]


# hue=df.y.tolist()
#sns.scatterplot(x="comp-1", y="comp-2",hue=df.y.tolist(),
#                palette=sns.color_palette("hls", 3),
#                data=df).set(title="Random Test data") 


#z_img_list = [G(z, label,  truncation_psi=truncation_psi, noise_mode=noise_mode).to(DEVICE) for z in z_list]

#img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)

## Truncation Psi

In [None]:
NETWORK_PKL = "https://api.ngc.nvidia.com/v2/models/nvidia/research/stylegan2/versions/1/files/stylegan2-ffhq-256x256.pkl"


def generate_img_psi(seed, psi):
  device = 'cuda'
  truncation_psi = 0.7
  noise_mode = 'const'

  G = init_generator(NETWORK_PKL)
  label = torch.zeros([1, G.c_dim], device=device)

  z = torch.from_numpy(np.random.RandomState(seed).randn(1, G.z_dim)).to(DEVICE)

  img = G(z, label, truncation_psi=truncation_psi, noise_mode=noise_mode)
  save_generator_img(img,  f'/content/drive/MyDrive/Bachelorarbeit/ausarbeitung/gfx/style_mixing/original_bilder/BildB{seed}.png')

In [None]:
seeds = [1500,1501,1502,1503,1504]
for seed in seeds:
  generate_img_psi(seed, 0.7)