In [131]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="1" 

In [132]:
import keras
from matplotlib import pyplot as plt
import numpy as np
import gzip
%matplotlib inline
from keras.layers import Input,Conv2D,MaxPooling2D,UpSampling2D
from keras.models import Model
from keras.optimizers import RMSprop

from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.vgg16 import preprocess_input,decode_predictions

In [133]:
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

In [134]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [135]:
bin_path = '/content/photos_produits/'

In [136]:
os.chdir('/content/photos_produits/')
print(f"The current working directory: {os.getcwd()} ")

The current working directory: /content/photos_produits 


In [137]:
# Import et transformation des images en np array
# Converts a PIL Image to 3D Numy Array
# Adding the fouth dimension, for number of images
# mean centering with respect to Image

def import_process_img(bin_path):
  liste_img_prep = []
  SIZE = (224, 224)

  for img in os.listdir(bin_path):
      if img[-1] == 'g':  # assure qu'on traite bien un jpg ou png
        img = image.load_img(img, color_mode='rgb', target_size=SIZE)

        img = image.img_to_array(img)

        img = np.expand_dims(img, axis=0)

        img_prep = preprocess_input(img)

        liste_img_prep.append(img_prep)

  return liste_img_prep


In [138]:
liste_test = import_process_img(bin_path)
liste_test[0].shape

(1, 224, 224, 3)

In [139]:
# On utilise un modèle VGG 16 pré entraîné

from keras.applications.vgg16 import VGG16
model = VGG16(weights='imagenet')
print(model.summary())

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_6 (InputLayer)         [(None, 224, 224, 3)]     0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [140]:
from keras import backend as K

In [141]:
# Avec K.function on fait passer nos images dans le modèle
# on enregistre les informations de la couche flatten pour chaque image 
# dans liste_images

def vectorisation_image(liste_images):
  liste_vecteurs = []
  func = K.function([model.input], [model.layers[-4].output])

  for img in liste_images:
    liste_vecteurs.append(func(img))
  
  return liste_vecteurs

In [142]:
vecteurs = vectorisation_image(liste_test)

In [165]:
len(vecteurs)

13

In [143]:
display(vecteurs[0][0].shape)

(1, 25088)

In [172]:
pictures = [file for file in os.listdir(bin_path) if file.endswith(('jpg', 'png' ))]
print(type(pictures))

<class 'list'>


In [173]:
# Renvoie un tuple contenant l'id des images et leurs vecteurs associés 
# bin_path est le chemin du dossier où les images sont stockées
# vecteurs est une liste de np array des images vectorisées
# on part du principe qu'on exécute les images dans l'ordre , à garder en tête

def zip_id_vecteur(vecteurs, bin_path):
  pictures = [file for file in os.listdir(bin_path) if file.endswith(('jpg', 'png' ))]

  df = list(zip(pictures, vecteurs))
  
  return df

In [174]:
id_vecteurs = zip_id_vecteur(vecteurs, bin_path)

In [175]:
print(id_vecteurs)

[('00658034.jpg', [array([[ 0.       ,  0.       ,  0.       , ..., 11.126054 ,  0.7029545,
         0.       ]], dtype=float32)]), ('00660709.jpg', [array([[0.        , 4.818705  , 0.        , ..., 0.76157254, 0.        ,
        0.        ]], dtype=float32)]), ('573595.jpg', [array([[0.       , 0.       , 0.       , ..., 0.       , 5.2320356,
        0.       ]], dtype=float32)]), ('00657181.jpg', [array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)]), ('00660396.jpg', [array([[0.        , 0.        , 0.02093965, ..., 0.        , 0.        ,
        0.        ]], dtype=float32)]), ('415660.jpg', [array([[0.       , 0.       , 0.       , ..., 3.1109273, 0.       ,
        0.       ]], dtype=float32)]), ('074331.jpg', [array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)]), ('074332.jpg', [array([[0., 0., 0., ..., 0., 0., 0.]], dtype=float32)]), ('152190.jpg', [array([[0.       , 0.       , 0.       , ..., 0.       , 7.8705616,
        0.       ]], dtype=float32)]), ('00657501.jpg',