# Import et préparation de l'environnement



In [None]:
import shutil
import time
import os
import random
import imageio
import subprocess
from IPython import display
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

import numpy as np
import PIL
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

import torch
import torchvision
import torch.nn as nn
from torch.utils.tensorboard import SummaryWriter

## Redémarrer l'environnement d'execution du notebook après avoir executer la cellule suivante pour prendre en compte les changements.

Réinstallation de la version de PyTorch pour matcher la version de CUDA.

In [None]:
CUDA_version = [s for s in subprocess.check_output(["nvcc", "--version"]).decode("UTF-8").split(", ") if s.startswith("release")][0].split(" ")[-1]
print("CUDA version:", CUDA_version)

if CUDA_version == "10.0":
    torch_version_suffix = "+cu100"
elif CUDA_version == "10.1":
    torch_version_suffix = "+cu101"
elif CUDA_version == "10.2":
    torch_version_suffix = ""
else:
    torch_version_suffix = "+cu110"

! pip install torch==1.7.1{torch_version_suffix} torchvision==0.8.2{torch_version_suffix} -f https://download.pytorch.org/whl/torch_stable.html ftfy regex

Récupération du package CLIP pour télécharger les modèles pré-entrainés.


In [None]:
%cd /content/

!git clone https://github.com/openai/CLIP.git

%cd /content/CLIP/

!pip install ftfy

import clip

%cd /content/

Connexion à Drive pour sauvegarder et charger les archives de logs

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Implem de SIREN et du modèle d'entraînement

In [None]:
class SineLayer(nn.Module):
    def __init__(self, in_features, out_features, bias=True,
                 is_first=False, omega_0=30):
        super().__init__()
        self.omega_0 = omega_0
        self.is_first = is_first
        
        self.in_features = in_features
        self.linear = nn.Linear(in_features, out_features, bias=bias)
        
        self.init_weights()
    
    def init_weights(self):
        with torch.no_grad():
            if self.is_first:
                self.linear.weight.uniform_(-1 / self.in_features, 
                                             1 / self.in_features)      
            else:
                self.linear.weight.uniform_(-np.sqrt(6 / self.in_features) / self.omega_0, 
                                             np.sqrt(6 / self.in_features) / self.omega_0)
        
    def forward(self, input):
        return torch.sin(self.omega_0 * self.linear(input))
    
    def forward_with_intermediate(self, input):
        intermediate = self.omega_0 * self.linear(input)
        return torch.sin(intermediate), intermediate
    
    
class Siren(nn.Module):
    def __init__(self, img_side_size, hidden_layers, neurons_par_layer, outermost_linear=True, 
                 first_omega_0=30, hidden_omega_0=30.):
        super().__init__()

        self.img_size = img_side_size
        self.net = []
        self.net.append(SineLayer(2, neurons_par_layer, 
                                  is_first=True, omega_0=first_omega_0))

        for i in range(hidden_layers):
            self.net.append(SineLayer(neurons_par_layer, neurons_par_layer, 
                                      is_first=False, omega_0=hidden_omega_0))

        if outermost_linear:
            final_linear = nn.Linear(neurons_par_layer, 3)
            
            with torch.no_grad():
                final_linear.weight.uniform_(-np.sqrt(6 / neurons_par_layer) / hidden_omega_0, 
                                              np.sqrt(6 / neurons_par_layer) / hidden_omega_0)
                
            self.net.append(final_linear)
        else:
            self.net.append(SineLayer(neurons_par_layer, 3, 
                                      is_first=False, omega_0=hidden_omega_0))
        
        self.net = nn.Sequential(*self.net)
    
    def forward(self, coords):
        coords = coords.clone().detach().requires_grad_(True)
        output = self.net(coords.cuda())
        return output.view(1, self.img_size, self.img_size, 3).permute(0, 3, 1, 2)#.sigmoid_()

    def forward_with_activations(self, coords, retain_grad=False):
        '''Returns not only model output, but also intermediate activations.
        Only used for visualizing activations later!'''
        activations = OrderedDict()

        activation_count = 0
        x = coords.clone().detach().requires_grad_(True)
        activations['input'] = x
        for i, layer in enumerate(self.net):
            if isinstance(layer, SineLayer):
                x, intermed = layer.forward_with_intermediate(x)
                
                if retain_grad:
                    x.retain_grad()
                    intermed.retain_grad()
                    
                activations['_'.join((str(layer.__class__), "%d" % activation_count))] = intermed
                activation_count += 1
            else: 
                x = layer(x)
                
                if retain_grad:
                    x.retain_grad()
                    
            activations['_'.join((str(layer.__class__), "%d" % activation_count))] = x
            activation_count += 1

        return activations


def get_mgrid(sidelen, dim=2):
    '''Generates a flattened grid of (x,y,...) coordinates in a range of -1 to 1.
    sidelen: int
    dim: int'''
    tensors = tuple(dim * [torch.linspace(-1, 1, steps=sidelen)])
    mgrid = torch.stack(torch.meshgrid(*tensors), dim=-1)
    mgrid = mgrid.reshape(-1, dim)
    return mgrid

In [None]:

class SirenWrapper():
  def __init__(self, starting_text, img_size, perceptor, siren, optimizer, lr = 1e-5, log_dir = None):
      self.starting_text = starting_text
      self.img_size = img_size
      self.perceptor = perceptor
      self.nom = torchvision.transforms.Normalize((0.48145466, 0.4578275, 0.40821073),
                                                  (0.26862954, 0.26130258, 0.27577711))
      self.model = siren
      self.optimizer = optimizer(self.model.parameters(), lr)
      self.writer = SummaryWriter(log_dir)


  def log_metrics(self, loss, epoch, exec_time):
    with torch.no_grad():
      img = self.nom(self.model(get_mgrid(self.img_size)).cpu()).numpy()[0]
    
    img = np.array(img)[:,:,:]
    img = np.transpose(img, (1, 2, 0))
    imageio.imwrite('tmp.png', np.array(img))

    self.writer.add_scalar('Loss/train', loss, epoch)
    self.writer.add_scalar('Epoch processing time in s', exec_time, epoch)
    with PIL.Image.open("tmp.png") as tmp_img:
      self.writer.add_image("Generated image", np.array(tmp_img), epoch, dataformats='HWC')
    

  def ascend_txt(self):
    out = self.model(get_mgrid(self.img_size))

    cutn = 64
    p_s = []
    for ch in range(cutn):
        size = torch.randint(int(.5 * self.img_size), int(.98 * self.img_size), ())
        offsetx = torch.randint(0, self.img_size - size, ())
        offsety = torch.randint(0, self.img_size - size, ())
        apper = out[:, :, offsetx:offsetx + size, offsety:offsety + size]
        apper = torch.nn.functional.interpolate(apper, (224,224), mode='bilinear')
        p_s.append(self.nom(apper))
    into = torch.cat(p_s, 0)

    iii = self.perceptor.encode_image(into)
    t = self.perceptor.encode_text(self.starting_text.cuda())
    return torch.cosine_similarity(t, iii, dim=-1).mean() * -100


  def train_iter(self, epoch):
      loss = self.ascend_txt()
      self.optimizer.zero_grad()
      loss.backward()
      self.optimizer.step()
      return loss


  def train(self, epochs, iter_per_epoch):
      for epoch in range(epochs):
          start_time = time.time()
          for _ in tqdm(range(iter_per_epoch)):
              loss = self.train_iter(epoch)

          self.log_metrics(loss, epoch, time.time() - start_time)


# Entraînement et grid-search

Chaque entrainement se fait sur 500 itérations au total (découpées en 10 epochs de 50 iter)

In [None]:
# Grid search values
txt_list = [
            "A knight fights a red dragon in a volcano.",
            "An ogre takes a mud bath in a swamp.",
            "Two birds make a nest in a tall maple.",
            "It all starts with a mouse searching for food."
]

clip_model_list = [
  "RN50",
  "RN101",
  "ViT-B/32"
]

hidden_layers_list = [12, 16, 20]

neurons_per_layer_list = [128, 256, 512]

learning_rate_list = [1e-4, 1e-5]

In [None]:
for txt_idx, starting_text in enumerate(txt_list):
  txt = clip.tokenize(starting_text) 

  for perc_idx, perceptor_name in enumerate(clip_model_list):
    perceptor, _ = clip.load(perceptor_name)

    img_side_size = 128
    for hl_idx, hidden_layers in enumerate(hidden_layers_list):
      for npl_idx, neurons_per_layer in enumerate(neurons_per_layer_list):
        siren_model = Siren(img_side_size, hidden_layers, neurons_per_layer).cuda()
        
        for lr_idx, learning_rate in enumerate(learning_rate_list):
          log_dir = f"runs/{txt_idx}-{perc_idx}-{hl_idx}-{npl_idx}-{lr_idx}"
          optimizer = torch.optim.Adam
          siren = SirenWrapper(txt, img_side_size, perceptor, siren_model, optimizer, learning_rate, log_dir)
          siren.train(10, 50)
          shutil.make_archive("/content/drive/MyDrive/Colab Notebooks/siren_training_logs", 'zip', "/content/runs/")

# Dezip et visualisation des logs via Tensorboard

In [None]:
!unzip "/content/drive/MyDrive/Colab Notebooks/siren_training_logs.zip" -d "/content/logs"

In [None]:
%load_ext tensorboard

In [None]:
%tensorboard --logdir /content/logs