<a href="https://colab.research.google.com/github/Dnt-1991/Neural-Style-Transfer/blob/main/Neural_Style_Transfer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###Panoramica
Utilizzo il deep learning usando il "neural style transfer", una tecnica che, utilizzando due immagini di partenza (la prima da modificare e la seconda raffigurante lo stile in cui trasformarla) ottiene una immagine di output  simile alla immagine di partenza ma nello stile della seconda immagine.

### Collego Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

### Import e configurazione dei percorsi dei file

In [None]:
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['figure.figsize'] = (10,10)
mpl.rcParams['axes.grid'] = False
import numpy as np
from PIL import Image
import time
import functools

%tensorflow_version 1.x
import tensorflow as tf
from tensorflow.python.keras.preprocessing import image as kp_image
from tensorflow.python.keras import models 
from tensorflow.python.keras import losses
from tensorflow.python.keras import layers
from tensorflow.python.keras import backend as K

from __future__ import print_function
import binascii
import struct
import scipy
import scipy.misc
import scipy.cluster 

#Abilito Eager
tf.enable_eager_execution()
print("Eager execution: {}".format(tf.executing_eagerly()))

Verifica del colore predominante

In [None]:
def color_image(img):
  NUM_OF_CLUSTERS = 5

  imm = Image.open(img)
  # Per ridurre il tempo di esecuzione
  imm = imm.resize((300,300))     
  array = np.asarray(imm)
  shape = array.shape
  array = array.reshape(np.product(shape[:2]), shape[2]).astype(float)

  print('finding clusters')
  codes, distance = scipy.cluster.vq.kmeans(array, NUM_OF_CLUSTERS)

  vectors, distance = scipy.cluster.vq.vq(array, codes)     
  counts, bins = np.histogram(vectors, len(codes))    

  # Trova il colore più frequente
  index_max = np.argmax(counts)                   
  colors = codes[index_max]

  #Cambia il filtro in base ai valori RGB dell'immagine
  if colors[0] > 100 and colors[1] < 100 and colors[2] < 100:
    #red
    print("L'immagine tende al rosso")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/abstract.jpg'
  elif colors[0] < 100 and colors[1] < 100 and colors[2] > 100:
    #blue
    print("L'immagine tende al blu")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/van_gogh.jpg'
  elif colors[0] < 100 and colors[1] > 100 and colors[2] < 100:
    #green
    print("L'immagine tende al verde")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/georgia.jpg'
  elif colors[0] > 100 and colors[1] < 100 and colors[2] > 100:
    #violet
    print("L'immagine tende al viola")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/flower.jpg'
  elif colors[0] > 100 and colors[1] > 100 and colors[2] < 100:
    #yellow
    print("L'immagine tende al giallo")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/van_gogh2.jpg'
  elif colors[0] < 100 and colors[1] > 100 and colors[2] > 130:
    #cyan
    print("L'immagine tende all'azzurro")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/kanagawa.jpg'
  elif colors[0] < 128 and colors[1] < 128 and colors[2] < 128:
    #dark
    print("L'immagine tende al nero")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/guernica.jpg'
  else:
    #light
    print("L'immagine tende al bianco")
    color = '/content/drive/MyDrive/Colab Notebooks/files/images/filters/wave.jpg'

  return color

In [None]:
# Immagini di partenza
meadow = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/meadow.jpg'
cats = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/cats.jpg'
city = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/city.jpg'
desert = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/desert.jpg'
landscape = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/landscape.jpg'
roses = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/roses.jpg'
sunset = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/sunset.jpg'
snow = '/content/drive/MyDrive/Colab Notebooks/files/images/photo/snow.jpg'



#Selezione interattiva
content_path = landscape #@param ["meadow", "cats", "city" , "desert" , "landscape", "roses" , "sunset" , "snow" ] {type:"raw"}
style_path = color_image(content_path)





## Preprocessing delle immagini
Eseguo lo stesso processo di pre-elaborazione previsto da VGG.

In [None]:
def load_img(path_to_img):
  max_dim = 512
  img = Image.open(path_to_img)
  long = max(img.size)
  scale = max_dim/long
  img = img.resize((round(img.size[0]*scale), round(img.size[1]*scale)), Image.ANTIALIAS)
  img = kp_image.img_to_array(img)
  img = np.expand_dims(img, axis=0)
  return img


In [None]:
def imshow(img, title=None):
  out = np.squeeze(img, axis=0)
  out = out.astype('uint8')
  plt.imshow(out)
  plt.axis(False)
  if title is not None:
    plt.title(title)
  plt.imshow(out)

In [None]:
def load_and_process_img(path_to_img):
  img = load_img(path_to_img)
  img = tf.keras.applications.vgg19.preprocess_input(img)
  return img

def deprocess_img(processed_img):
  x = processed_img.copy()
  if len(x.shape) == 4:
    x = np.squeeze(x, 0)
  assert len(x.shape) == 3, ("L'immagine di input deve avere "
                             "dimensione [1, height, width, channel] o [height, width, channel]")
  if len(x.shape) != 3:
    raise ValueError("Invalid input")

  x[:, :, 0] += 103.939
  x[:, :, 1] += 116.779
  x[:, :, 2] += 123.68
  x = x[:, :, ::-1]

  x = np.clip(x, 0, 255).astype('uint8')
  return x

### Definisci il contenuto e le rappresentazioni di stile
Per ottenere sia il contenuto che le rappresentazioni di stile della nostra immagine, esamineremo alcuni livelli intermedi all'interno del nostro modello. Man mano che andiamo più in profondità nel modello, questi strati intermedi rappresentano caratteristiche di ordine sempre più elevato. 
In questo caso, stiamo utilizzando l'architettura di rete VGG19, una rete di classificazione delle immagini pre-addestrata. 
Questi strati intermedi sono necessari per definire la rappresentazione del contenuto e dello stile dalle nostre immagini. 
Per un'immagine di input, proveremo a far corrispondere lo stile e le rappresentazioni di destinazione del contenuto corrispondenti a questi livelli intermedi.

Affinché una rete possa eseguire la classificazione delle immagini (cosa che la nostra rete è stata addestrata a fare), deve comprendere l'immagine. 
Ciò implica prendere l'immagine grezza come pixel di input e costruire una rappresentazione interna attraverso trasformazioni che trasformano i pixel dell'immagine grezza in una complessa comprensione delle caratteristiche presenti all'interno dell'immagine. 
Questo è anche in parte il motivo per cui le reti neurali convoluzionali sono in grado di generalizzare bene: sono in grado di catturare le invarianze e definire le caratteristiche all'interno delle classi.
Il modello funge da estrattore di caratteristiche complesse, quindi, accedendo ai livelli intermedi, siamo in grado di descrivere il contenuto e lo stile delle immagini di input.


Nello specifico, rimuoveremo questi livelli intermedi dalla nostra rete:


In [None]:
# Content layer dove verranno estratte le feature maps
content_layers = ['block5_conv2'] 

# Style layer che mi interessano
style_layers = ['block1_conv1',
                'block2_conv1',
                'block3_conv1', 
                'block4_conv1', 
                'block5_conv1'
               ]

num_content_layers = len(content_layers)
num_style_layers = len(style_layers)

## Costruire il modello
In questo caso, carichiamo [VGG19] (https://keras.io/applications/#vgg19) e alimentiamo il nostro input al modello. 

In [None]:
def get_model():
  """ Creates our model with access to intermediate layers. 
  
  This function will load the VGG19 model and access the intermediate layers. 
  These layers will then be used to create a new model that will take input image
  and return the outputs from these intermediate layers from the VGG model. 
  
  Returns:
    returns a keras model that takes image inputs and outputs the style and 
      content intermediate layers. 
  """
  # Load our model. We load pretrained VGG, trained on imagenet data
  vgg = tf.keras.applications.vgg19.VGG19(include_top=False, weights='imagenet')
  vgg.trainable = False
  # Get output layers corresponding to style and content layers 
  style_outputs = [vgg.get_layer(name).output for name in style_layers]
  content_outputs = [vgg.get_layer(name).output for name in content_layers]
  model_outputs = style_outputs + content_outputs
  # Build model 
  return models.Model(vgg.input, model_outputs)

In [None]:
def get_content_loss(base_content, target):
  return tf.reduce_mean(tf.square(base_content - target))

In [None]:
def gram_matrix(input_tensor):
  # We make the image channels first 
  channels = int(input_tensor.shape[-1])
  a = tf.reshape(input_tensor, [-1, channels])
  n = tf.shape(a)[0]
  gram = tf.matmul(a, a, transpose_a=True)
  return gram / tf.cast(n, tf.float32)

def get_style_loss(base_style, gram_target):
  """Expects two images of dimension h, w, c"""
  # height, width, num filters of each layer
  # We scale the loss at a given layer by the size of the feature map and the number of filters
  height, width, channels = base_style.get_shape().as_list()
  gram_style = gram_matrix(base_style)
  
  return tf.reduce_mean(tf.square(gram_style - gram_target))# / (4. * (channels ** 2) * (width * height) ** 2)

## Style transfer


### Esegui Gradient Descent

In questo caso, utilizziamo l'ottimizzatore [Adam] (https://www.tensorflow.org/api_docs/python/tf/keras/optimizers/Adam) * per ridurre al minimo le nostre perdite. Aggiorniamo iterativamente la nostra immagine di output in modo da ridurre al minimo la nostra perdita.

In [None]:
def get_feature_representations(model, content_path, style_path):
  """Helper function to compute our content and style feature representations.

  This function will simply load and preprocess both the content and style 
  images from their path. Then it will feed them through the network to obtain
  the outputs of the intermediate layers. 
  
  Arguments:
    model: The model that we are using.
    content_path: The path to the content image.
    style_path: The path to the style image
    
  Returns:
    returns the style features and the content features. 
  """
  # Load our images in 
  content_image = load_and_process_img(content_path)
  style_image = load_and_process_img(style_path)
  
  # batch compute content and style features
  style_outputs = model(style_image)
  content_outputs = model(content_image)
  
  
  # Get the style and content feature representations from our model  
  style_features = [style_layer[0] for style_layer in style_outputs[:num_style_layers]]
  content_features = [content_layer[0] for content_layer in content_outputs[num_style_layers:]]
  return style_features, content_features

### Calcolo della perdita e dei gradienti
Qui usiamo [** tf.GradientTape **] (https://www.tensorflow.org/programmers_guide/eager#computing_gradients) per calcolare il gradiente.

In [None]:
def compute_loss(model, loss_weights, init_image, gram_style_features, content_features):
  """This function will compute the loss total loss.
  
  Arguments:
    model: The model that will give us access to the intermediate layers
    loss_weights: The weights of each contribution of each loss function. 
      (style weight, content weight, and total variation weight)
    init_image: Our initial base image. This image is what we are updating with 
      our optimization process. We apply the gradients wrt the loss we are 
      calculating to this image.
    gram_style_features: Precomputed gram matrices corresponding to the 
      defined style layers of interest.
    content_features: Precomputed outputs from defined content layers of 
      interest.
      
  Returns:
    returns the total loss, style loss, content loss, and total variational loss
  """
  style_weight, content_weight = loss_weights
  
  # Feed our init image through our model. This will give us the content and 
  # style representations at our desired layers. Since we're using eager
  # our model is callable just like any other function!
  model_outputs = model(init_image)
  
  style_output_features = model_outputs[:num_style_layers]
  content_output_features = model_outputs[num_style_layers:]
  
  style_score = 0
  content_score = 0

  # Accumulate style losses from all layers
  # Here, we equally weight each contribution of each loss layer
  weight_per_style_layer = 1.0 / float(num_style_layers)
  for target_style, comb_style in zip(gram_style_features, style_output_features):
    style_score += weight_per_style_layer * get_style_loss(comb_style[0], target_style)
    
  # Accumulate content losses from all layers 
  weight_per_content_layer = 1.0 / float(num_content_layers)
  for target_content, comb_content in zip(content_features, content_output_features):
    content_score += weight_per_content_layer* get_content_loss(comb_content[0], target_content)
  
  style_score *= style_weight
  content_score *= content_weight

  # Get total loss
  loss = style_score + content_score 
  return loss, style_score, content_score

In [None]:
def compute_grads(cfg):
  with tf.GradientTape() as tape: 
    all_loss = compute_loss(**cfg)
  # Compute gradients wrt input image
  total_loss = all_loss[0]
  return tape.gradient(total_loss, cfg['init_image']), all_loss

### Optimization loop

In [None]:
import IPython.display

def run_style_transfer(content_path, style_path,num_iterations=1000,content_weight=1e3, style_weight=1e-2): 

  # We don't need to train any layers of our model, so we set their trainable to false. 
  model = get_model() 
  for layer in model.layers:
    layer.trainable = False
  
  # Get the style and content feature representations (from our specified intermediate layers) 
  style_features, content_features = get_feature_representations(model, content_path, style_path)
  gram_style_features = [gram_matrix(style_feature) for style_feature in style_features]
  
  # Set initial image
  init_image = load_and_process_img(content_path)
  init_image = tf.Variable(init_image, dtype=tf.float32)
  # Create our optimizer
  opt = tf.train.AdamOptimizer(learning_rate=5, beta1=0.99, epsilon=1e-1)

  # For displaying intermediate images 
  iter_count = 1
  
  # Store our best result
  best_loss, best_img = float('inf'), None
  
  # Create a nice config 
  loss_weights = (style_weight, content_weight)
  cfg = {
      'model': model,
      'loss_weights': loss_weights,
      'init_image': init_image,
      'gram_style_features': gram_style_features,
      'content_features': content_features
  }
    
  # For displaying
  num_rows = 2
  num_cols = 5
  display_interval = num_iterations/(num_rows*num_cols)
  start_time = time.time()
  global_start = time.time()
  
  norm_means = np.array([103.939, 116.779, 123.68])
  min_vals = -norm_means
  max_vals = 255 - norm_means   
  
  imgs = []
  for i in range(num_iterations):
    grads, all_loss = compute_grads(cfg)
    loss, style_score, content_score = all_loss
    opt.apply_gradients([(grads, init_image)])
    clipped = tf.clip_by_value(init_image, min_vals, max_vals)
    init_image.assign(clipped)
    end_time = time.time() 
    
    if loss < best_loss:
      # Update best loss and best image from total loss. 
      best_loss = loss
      best_img = deprocess_img(init_image.numpy())

    if i % display_interval== 0:
      start_time = time.time()
      
      # Use the .numpy() method to get the concrete numpy array
      plot_img = init_image.numpy()
      plot_img = deprocess_img(plot_img)
      imgs.append(plot_img)
      IPython.display.clear_output(wait=True)
      IPython.display.display_png(Image.fromarray(plot_img))
      print('Iteration: {}'.format(i))        
      print('Total loss: {:.4e}, ' 
            'style loss: {:.4e}, '
            'content loss: {:.4e}, '
            'time: {:.4f}s'.format(loss, style_score, content_score, time.time() - start_time))
  print('Total time: {:.4f}s'.format(time.time() - global_start))
  IPython.display.clear_output(wait=True)
  plt.figure(figsize=(14,4))
  for i,img in enumerate(imgs):
      plt.subplot(num_rows,num_cols,i+1)
      plt.imshow(img)
      plt.xticks([])
      plt.yticks([])
      
  return best_img, best_loss 

In [None]:
def show_results(best_img, content_path, style_path, show_large_final=True):
  plt.figure(figsize=(10, 5))
  content = load_img(content_path) 
  style = load_img(style_path)

  plt.subplot(1, 2, 1)
  imshow(content, 'Immagine di partenza')

  plt.subplot(1, 2, 2)
  imshow(style, 'Stile applicato')

  if show_large_final: 
    plt.figure(figsize=(10, 10))

    plt.imshow(best_img)
    plt.title('Immagine finale')
    plt.show()

##Risultati


In [None]:
plt.figure(figsize=(25,25))

content = load_img(content_path).astype('uint8')
style = load_img(style_path).astype('uint8')

plt.subplot(1, 2, 1)
imshow(content, 'Immagine di partenza')

plt.subplot(1, 2, 2)
imshow(style, 'Stile selezionato')

plt.show()

In [None]:
best, best_loss = run_style_transfer(content_path, 
                                     style_path, num_iterations=1000)
Image.fromarray(best)
show_results(best, content_path, style_path)
