
# **CAT Faces Generator - Denoising Diffusion Probabilistic Model**

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Deyht/AI_astro_ED_AAIF/blob/main/codes/CNN/diffusion/cat_faces_diffusion.ipynb)

---


### **CIANNA BETA DEV installation**

/!\ WARNING /!\
This beta version is not suited for general application and has been modified for the specific case covered in this notebook. Some function might have a different behavior than the expected one.
Do not use outside this notebook !

#### Query GPU allocation and properties

If nvidia-smi fail, it might indicate that you launched the colab session whithout GPU reservation.  
To change the type of reservation go to "Runtime"->"Change runtime type" and select "GPU" as your hardware accelerator.

In [None]:
%%shell

nvidia-smi

cd /content/

git clone https://github.com/NVIDIA/cuda-samples/

cd /content/cuda-samples/Samples/1_Utilities/deviceQuery/

cmake CMakeLists.txt

make SMS="50 60 70 80"

./deviceQuery | grep Capability | cut -c50- > ~/cuda_infos.txt
./deviceQuery | grep "CUDA Driver Version / Runtime Version" | cut -c57- >> ~/cuda_infos.txt

cd ~/

If you are granted a GPU that supports high FP16 compute scaling (e.g the Tesla T4), it is advised to change the mixed_precision parameter in the last cell to "FP16C_FP32A".  
See the detail description on mixed precision support with CIANNA on the [Systeme Requirements](https://github.com/Deyht/CIANNA/wiki/1\)-System-Requirements) wiki page.

#### Clone CIANNA git repository

In [None]:
%%shell

cd /content/

wget https://share.obspm.fr/s/KNw8aYAEjfxJzsR/download/CIANNA_exp_07_02_25.tar.gz
tar -xvzf CIANNA_exp_07_02_25.tar.gz
mv CIANNA_exp_07_02_25 CIANNA

cd CIANNA

#### Compiling CIANNA for the allocated GPU generation

There is no guaranteed forward or backward compatibility between Nvidia GPU generation, and some capabilities are generation specific. For these reasons, CIANNA must be provided the platform GPU generation at compile time.
The following cell will automatically update all the necessary files based on the detected GPU, and compile CIANNA.

In [None]:
%%shell

cd /content/CIANNA

mult="10"
cat ~/cuda_infos.txt
comp_cap="$(sed '1!d' ~/cuda_infos.txt)"
cuda_vers="$(sed '2!d' ~/cuda_infos.txt)"

lim="11.1"
old_arg=$(awk '{if ($1 < $2) print "-D CUDA_OLD";}' <<<"${cuda_vers} ${lim}")

sm_val=$(awk '{print $1*$2}' <<<"${mult} ${comp_cap}")

gen_val=$(awk '{if ($1 >= 80) print "-D GEN_AMPERE"; else if($1 >= 70) print "-D GEN_VOLTA";}' <<<"${sm_val}")

sed -i "s/.*arch=sm.*/\\t\tcuda_arg=\"\$cuda_arg -D CUDA -D comp_CUDA -lcublas -lcudart -arch=sm_$sm_val $old_arg $gen_val\"/g" compile.cp
sed -i "s/\/cuda-[0-9][0-9].[0-9]/\/cuda-$cuda_vers/g" compile.cp
sed -i "s/\/cuda-[0-9][0-9].[0-9]/\/cuda-$cuda_vers/g" src/python_module_setup.py

./compile.cp CUDA PY_INTERF

mv src/build/lib.linux-x86_64-* src/build/lib.linux-x86_64

#### Testing CIANNA installation

**IMPORTANT NOTE**   
CIANNA is mainly used in a script fashion and was not designed to run in notebooks. Every cell code that directly invokes CIANNA functions must be run as a script to avoid possible errors.  
To do so, the cell must have the following structure.

```
%%shell

cd /content/CIANNA

python3 - <<EOF

[... your python code ...]

EOF
```

This syntax allows one to easily edit python code in the notebook while running the cell as a script. Note that all the notebook variables can not be accessed by the cell in this context.


### **CAT Faces for generative models**

This dataset comprises 15747 images of cat faces close-up at a 64x64 resolution. There is no labels as it is intended for training generative models.

#### Data Handling

In [None]:
%cd /content/

import numpy as np
import glob
import matplotlib.pyplot as plt
from PIL import Image

import os
import albumentations as A

def cosin_schedule(t, T, s):
	return np.cos((t/T+s)/(1.0+s)*(np.pi/2))**2

im_size = 64
im_depth = 3

if(not os.path.isdir("cats")):
		os.system("wget https://share.obspm.fr/s/58BkbEaALbsSWNG/download/cat_faces_gen.tar.gz")
		os.system("tar -xzf cat_faces_gen.tar.gz")


file_names = glob.glob("cats/*.jpg")
nb_raw_images = 15747

n_step = 200

min_signal_rate = 0.05**2
max_signal_rate = 0.95**2

p_alpha_t = cosin_schedule(np.linspace(0,n_step, n_step),n_step,0.0008)
p_alpha_t -= np.min(p_alpha_t[:-1])
p_alpha_t /= np.max(p_alpha_t[:-1])

p_alpha_t = p_alpha_t * (max_signal_rate - min_signal_rate) + min_signal_rate

transform = A.Compose([
	A.ColorJitter(brightness=(0.9,1.1), contrast=(0.9,1.1), saturation=(0.9,1.1), hue=0.05, p=1.0),
	A.HorizontalFlip(p=0.5),
])

def create_batch(nb_size):

	data = np.zeros((nb_size,im_size*im_size*(im_depth+1)), dtype="float32")
	targets = np.zeros((nb_size,im_size*im_size*im_depth), dtype="float32")

	for i in range(0,nb_size):
		i_d = int(np.random.random()*nb_raw_images)

		patch = np.asarray(Image.open(file_names[i_d]))
		transformed = transform(image=patch)
		patch = (transformed['image']/255.0)*2.0 - 1.0

		step = np.random.randint(0,n_step-2)

		noise_patch = np.random.normal(loc=0.0, scale=1.0, size=(im_size,im_size,im_depth))

		patch_in = np.sqrt(p_alpha_t[step+1])*patch + np.sqrt(1.0-p_alpha_t[step+1])*noise_patch

		for depth in range(0,im_depth):
			data[i,depth*im_size*im_size:(depth+1)*im_size*im_size] = (np.copy(patch_in[:,:,depth]).flatten("C"))
			targets[i,depth*im_size*im_size:(depth+1)*im_size*im_size] = noise_patch[:,:,depth].flatten("C")
		data[i,3*im_size*im_size:4*im_size*im_size] = np.sqrt(1.0-p_alpha_t[step+1])

	return data, targets


#### Visualize random subset of raw data

In [None]:
sq_size = 5

im_index = np.random.randint(0,nb_raw_images, sq_size**2)

fig, axs = plt.subplots(sq_size, sq_size, figsize=(1.4*sq_size,1.4*sq_size), dpi=250, constrained_layout=True)

patch = np.zeros((im_size, im_size,3))
for i in range(0, sq_size):
  for j in range(0, sq_size):
    axs[i][j].set_axis_off()
    patch = np.asarray(Image.open(file_names[im_index[i*sq_size+j]]))
    axs[i][j].imshow(patch)
plt.show()

#### Visualize a training noise chain




In [None]:
steps_list = np.arange(0,n_step, 20)

print (steps_list)

im_id = 0

patch = np.asarray(Image.open(file_names[im_id]))
transformed = transform(image=patch)
patch = (transformed['image']/255.0)*2.0 - 1.0

fig, axs = plt.subplots(1, len(steps_list), figsize=(2*len(steps_list),2), dpi=250, constrained_layout=True)

for i in range(len(steps_list)):
  noise_patch = np.random.normal(loc=0.0, scale=1.0, size=(im_size,im_size,im_depth))
  patch_in = np.sqrt(p_alpha_t[steps_list[i]])*patch + np.sqrt(1.0-p_alpha_t[steps_list[i]])*noise_patch

  axs[i].set_axis_off()
  axs[i].imshow(np.clip((patch_in+1.0)*0.5,0.0,1.0))

plt.show()

#### Visualize a training batch

In [None]:
sq_size = 5

data_augm, target_augm = create_batch(sq_size**2)

fig, axs = plt.subplots(sq_size, sq_size, figsize=(1.4*sq_size,1.4*sq_size), dpi=250, constrained_layout=True)

patch = np.zeros((im_size, im_size,3))
for i in range(0, sq_size):
  for j in range(0, sq_size):
    axs[i][j].set_axis_off()
    raw_patch = np.clip((data_augm[i*sq_size+j]+1.0)*0.5,0.0,1.0)
    for k in range(0,im_depth):
      patch[:,:,k] = np.reshape(raw_patch[k*im_size*im_size:(k+1)*im_size*im_size], (im_size, im_size))
    axs[i][j].imshow(patch)
plt.show()

#### Generate a batch of image from pre-trained model using probabilistic sampling

In [None]:
%%shell

cd /content/

python3 - <<EOF

import numpy as np
import glob
import matplotlib.pyplot as plt
from PIL import Image

import os, sys
sys.path.insert(0,glob.glob('/content/CIANNA/src/build/lib.*/')[-1])
import CIANNA_exp as cnn

def i_ar(int_list):
	return np.array(int_list, dtype="int")

def f_ar(float_list):
	return np.array(float_list, dtype="float32")

def cosin_schedule(t, T, s):
	return np.cos((t/T+s)/(1.0+s)*(np.pi/2))**2

im_size = 64
im_depth = 3

f_im_s = im_size*im_size
im_s = f_im_s*im_depth

#Was trained with 200, but can be modified
n_step = 40

min_signal_rate = 0.05**2
max_signal_rate = 0.95**2

p_alpha_t = cosin_schedule(np.linspace(0,n_step, n_step),n_step,0.0008)
p_alpha_t -= np.min(p_alpha_t[:-1])
p_alpha_t /= np.max(p_alpha_t[:-1])

p_alpha_t = p_alpha_t * (max_signal_rate - min_signal_rate) + min_signal_rate


cnn.init(in_dim=i_ar([im_size,im_size]), in_nb_ch=im_depth+1, out_dim=im_s, \
		bias=0.1, b_size=8, use_wema=1, comp_meth="C_CUDA", dynamic_load=1, mixed_precision="FP32C_FP32A") #Change to C_BLAS or C_NAIV

if(not os.path.isfile("cat_gen_diff_beta_large.dat")):
		os.system("wget https://share.obspm.fr/s/QRBHAbt2wprxZNR/download/cat_gen_diff_beta_large_bin.dat")

cnn.load("cat_gen_diff_beta_large.dat",0,0)

nb_test = 32
n_dif_chain = 5
n_dif_steps = 10
sq_size = 5

#probabilistic or deterministic
inference_mode = "probabilistic"
#Set eta noise counterpart to 0

#For testing on static input noise
input_images = np.zeros((nb_test,f_im_s*(im_depth+1)), dtype="float32")
targets_diff = np.zeros((nb_test,f_im_s*im_depth), dtype="float32")
input_noise = np.random.normal(loc=0.0, scale=1.0, size=(nb_test,f_im_s*im_depth))

patch = np.zeros((im_size, im_size,im_depth))
interp = "bilinear"


input_images[:,0:im_s] = input_noise[:,:]
input_images[:,im_s:] = np.sqrt(1.0-p_alpha_t[-2])

cnn.create_dataset("TEST", nb_test, input_images, targets_diff)

#For display of several diffusion step of the same input
fig1, axs1 = plt.subplots(n_dif_chain, n_dif_steps, figsize=(n_dif_steps*2, n_dif_chain*2), dpi=250, constrained_layout=True)
fig2, axs2 = plt.subplots(n_dif_chain, n_dif_steps, figsize=(n_dif_steps*2, n_dif_chain*2), dpi=250, constrained_layout=True)

block_size = n_step // n_dif_steps

dif_im_count = 0
for step in range(1,n_step-1):
	cnn.forward(saving=2, no_error=1, silent=0)

	file_name = "fwd_res/net0_%04d.dat"%(0)
	fwd_dat = np.fromfile(file_name, dtype="float32")
	fwd_dat = np.reshape(fwd_dat,(nb_test,im_s))

	np1 = n_step-(step+1)
	np2 = n_step-(step+2)

	new_images = (input_images[:,0:im_s] - np.sqrt(1.0-p_alpha_t[np1])*fwd_dat[:,:])/(np.sqrt(p_alpha_t[np1]))
	pred_noise = fwd_dat

	if(inference_mode == "probabilistic"):
		eta = np.sqrt((1-p_alpha_t[np2])/(1-p_alpha_t[np1]))*np.sqrt(1-p_alpha_t[np1]/p_alpha_t[np2])
	else:
		eta = 0.0

	new_noise = np.random.normal(loc=0.0, scale=1.0, size=(nb_test,im_s))
	input_images[:,0:im_s] = np.sqrt(p_alpha_t[np2])*new_images[:,:] + np.sqrt(1.0-p_alpha_t[np2] - eta**2)*pred_noise + eta*new_noise[:,:]
	input_images[:,im_s:] =  np.sqrt(1.0-p_alpha_t[np2])

	j = int(step/block_size)

	if((step+1)%block_size == 0):
		for k in range(0,n_dif_chain):
			for depth in range(0,im_depth):
				patch[:,:,depth] = np.clip(np.reshape(new_images[k][depth*f_im_s:(depth+1)*f_im_s],(im_size,im_size)),-1.0,1.0)
			axs1[k][j].imshow((patch[:,:,:]+1.0)*0.5, vmax=0.99, vmin=0.01, interpolation=interp)
			axs1[k][j].axis('off')
			for depth in range(0,im_depth):
				patch[:,:,depth] = np.clip(np.reshape(input_images[k][depth*f_im_s:(depth+1)*f_im_s],(im_size,im_size)),-1.0,1.0)
			axs2[k][j].imshow((patch[:,:,:]+1.0)*0.5, vmax=0.99, vmin=0.01, interpolation=interp)
			axs2[k][j].axis('off')

	cnn.delete_dataset("TEST", silent=0)
	cnn.create_dataset("TEST", nb_test, input_images, targets_diff, silent=0)


cnn.forward(saving=2, no_error=1, silent=0)
cnn.delete_dataset("TEST", silent=0)

fwd_dat = np.fromfile(file_name, dtype="float32")
fwd_dat = np.reshape(fwd_dat,(nb_test,im_s))

new_images = (input_images[:,0:im_s] - np.sqrt(1.0-p_alpha_t[0])*fwd_dat[:,:])/(np.sqrt(p_alpha_t[0]))

for k in range(0,n_dif_chain):
	for depth in range(0,im_depth):
		patch[:,:,depth] = np.clip(np.reshape(new_images[k][depth*f_im_s:(depth+1)*f_im_s],(im_size,im_size)),-1.0,1.0)
	axs1[k][n_dif_steps-1].imshow((patch[:,:,:]+1.0)*0.5, vmax=0.99, vmin=0.01, interpolation=interp)
	axs1[k][n_dif_steps-1].axis('off')
	axs2[k][n_dif_steps-1].imshow((patch[:,:,:]+1.0)*0.5, vmax=0.99, vmin=0.01, interpolation=interp)
	axs2[k][n_dif_steps-1].axis('off')

fig1.savefig("dif_chain_img_fig.png", dpi=250)
fig2.savefig("dif_chain_noisy_img_fig.png", dpi=250)

#For display of mulitple final generations for multiple inputs
fig, axs = plt.subplots(sq_size, sq_size, figsize=(1.4*sq_size,1.4*sq_size), dpi=250, constrained_layout=True)

fpatch = np.zeros((im_size, im_size,3))
for i in range(0, sq_size):
	for j in range(0, sq_size):
		axs[i][j].set_axis_off()
		raw_patch = np.clip((new_images[i*sq_size+j]+1.0)*0.5,0.0,1.0)
		for k in range(0,im_depth):
			patch[:,:,k] = np.reshape(raw_patch[k*f_im_s:(k+1)*f_im_s], (im_size, im_size))
		axs[i][j].imshow(patch)
plt.savefig("all_dif_fig.png", dpi=250)

EOF


In [None]:
from PIL import Image
import matplotlib.pyplot as plt

im = Image.open("dif_chain_img_fig.png")
plt.figure(figsize=(8,8), dpi=250)
plt.imshow(im)
plt.gca().axis('off')
plt.show()


im = Image.open("dif_chain_noisy_img_fig.png")
plt.figure(figsize=(8,8), dpi=250)
plt.imshow(im)
plt.gca().axis('off')
plt.show()

im = Image.open("all_dif_fig.png")
plt.figure(figsize=(4,4), dpi=250)
plt.imshow(im)
plt.gca().axis('off')
plt.show()