In [None]:
import numpy as np
import os
from PIL import Image
import unittest
from multiprocessing import Pool
from sklearn.model_selection import train_test_split


import matplotlib.pyplot as plt      # plotting routines
from keras.models import Model       # Model type to be used
from keras.layers.core import Dense, Dropout, Activation # Types of layers to be used in our model
from keras.utils import np_utils                        # NumPy related tools
import keras                          # high-level neural networks API and interface to TensorFlow
import tensorflow as tf               #for numerical computation using data flow graphs

from keras.models import load_model
from keras.models import load_model, Model


from google.colab import drive
drive.mount('/content/drive')   

# ***Loading Data***

In [None]:
def load_images(data_dir, batch_size, img_size):
    """
    Load images in batches from a directory.

    Args:
    - data_dir: string, path to the directory containing the images.
    - batch_size: int, number of images to load in each batch.
    - img_size: tuple, size of the images to resize to (height, width).

    Returns:
    - generator object that yields batches of images.
    """
    # Get the list of image file names
    image_files = sorted([os.path.join(data_dir, file) for file in os.listdir(data_dir) if file.endswith('.jpg')])

    # Calculate the number of batches
    num_batches = len(image_files) // batch_size

    # Loop over the batches
    for i in range(num_batches):
        # Load the batch of images
        batch_files = image_files[i*batch_size:(i+1)*batch_size]
        batch_images = []
        for file in batch_files:
            image = Image.open(file)
            image = image.resize(img_size)
            image = np.array(image) / 255.0
            batch_images.append(image)
        #Conversion to numpy array of numpy arrays
        batch_images = np.array(batch_images)

        yield batch_images  #a list of image data, where each element of the list is an image array of shape (height, width, channels)

################
#Testing#
################

data_dir = '/content/drive/MyDrive/4A_NF/Projet_4BIM/selectedData'

batch_size = 32
img_size = (64, 64)

images_generator = load_images(data_dir, batch_size, img_size)
images_list=list(images_generator)
images_array = np.array(images_list)
print(images_array)
assert len(images_array[0]) ==32 


"""
Prints a list of size 32 where each element is a batch of images .
Each batch has a shape of (32,256,256,3): 32 images with height and width 256 and 3 color channels (R,G,B))
Each element of the batch is a numpy array that represents an image .
Numbers seen in the list are RGB values of pixels of the image normalized between 0 and 1 .

"""


## ***Autoencoder***

In [None]:
"""
     This code builds the encoder which is the part of the Autoencoder that constructs vectors from
     images, compressing then initial images.


    Args:
      encoded_dim : the dimension of the constructed vector
      input_shape : a tuple representing the height and width of the image
      input_img       : the input images generated


    Returns:
     encoded_imgs:  the reconstructed images
 """

input_shape = (64, 64, 3)
encoded_dim = 64
input_img = keras.Input(shape=input_shape) 
x = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(input_img)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D((2, 2), padding='same')(x)
x = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D((2, 2), padding='same')(x)
x = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.MaxPooling2D((2, 2), padding='same')(x)
encoded = keras.layers.Conv2D(encoded_dim, (3, 3), activation='relu', padding='same')(x)
    



In [None]:
"""
    This code builds the decoder which is the part of the Autoencoder that constructs the reconstructed
    images from the encoded vector.

    Args:
        encoded: the encoded image vectors
        decoded_dim: the dimension of the decoded images
        input_shape: a tuple representing the height and width of the image

    Returns:
        decoded_imgs: the reconstructed images
"""


x = keras.layers.Conv2D(256, (3, 3), activation='relu', padding='same')(encoded)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.UpSampling2D((2, 2))(x)
x = keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.UpSampling2D((2, 2))(x)
x = keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.UpSampling2D((2, 2))(x)
decoded = keras.layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

autoencoder = keras.Model(input_img, decoded)
encoder = keras.Model(input_img, encoded)

In [None]:
#Plotting the autoencoder model:
tf.keras.utils.plot_model(
    autoencoder,
    to_file="model.png",
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=70,
    layer_range=None,
)

In [None]:
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')

In [None]:
# Split the dataset into training and testing sets
X_train, X_test = train_test_split(images_array, test_size=0.2, random_state=42)

# Reshape the input images
X_train = X_train.reshape(-1, 64, 64, 3)
X_test = X_test.reshape(-1, 64, 64, 3)

#he -1 in the reshape function is used to automatically infer the number of samples based on the size of the original dataset.

In [None]:
autoencoder.fit(X_train, X_train,
                epochs=100,
                batch_size=32,
                shuffle=True,
                validation_data=(X_test, X_test))

In [None]:
history = autoencoder.history.history
# Display the model's architecture
autoencoder.summary()


# ***Model Evaluation***

In [None]:
plt.plot(history['val_loss'],label="test")
plt.plot(history['loss'],label="training")
plt.xlabel("epochs")
plt.ylabel("Loss")
plt.legend() #The differences of loss between the training and the test quite the same 

In [None]:
decoded_imgs = autoencoder.predict(X_test)

import matplotlib.pyplot as plt

n = 10  # How many faces we will display
plt.figure(figsize=(20, 4))
for i in range(n):
    # Display original
    ax = plt.subplot(2, n, i + 1)
    plt.imshow(X_test[i])
   
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)

    # Display reconstruction
    ax = plt.subplot(2, n, i + 1 + n)
    plt.imshow(decoded_imgs[i])
    
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
plt.show()

# ***Saving the training of the decoder ***

In [None]:

# extract the decoder layers from the autoencoder model
decoder_layers = autoencoder.layers[len(autoencoder.layers)//2:]

# create a new model that includes only the decoder layers
decoder = Model(inputs=autoencoder.input, outputs=decoder_layers[-1].output)

# save the decoder to a file in HDF5 format
decoder.save('decoder_model.h5') # save decoder to a file used to save the decoder model in the Hierarchical Data Format version 5 (HDF5) file format


# ***How to use this decoder part ?***

When considering encoded_img as a vector of size 64 (the actual size of the bottleneck layer).

**The following code has not been tested ; it just serves as an example of the decoder usage!!**

In [None]:
decoder = load_model('decoder_model.h5')
generated_image = decoder.predict(encoded_img) 

# ***Saving 2 outputs of the encoder :)***

In [None]:
# Retrieving the first two images from the initial set
first2Batches= images_array[:2] # get the first two batches

first2Images = first2Images.reshape(-1, 64, 64, 3)[:2]  # flatten the first two batches and extract the first two images
print(first2Images.shape)  # should print (2, 64, 64, 3)



# Predicting the encoded representation of the reshaped image


encoded_imgs = encoder.predict(first2Images)




# Saving the encoded vectors to a file
np.savetxt('encoded_vectors.txt', encoded_flat, delimiter=' ')


#np.savetxt('encoded_vectors.txt', encoded_imgs)


## Predicting the encoded representations of your images
encoded_imgs = encoder.predict(first2Images)


# Reshape the encoded_imgs array to be 2D
encoded_imgs_flat = np.reshape(encoded_imgs, (2, -1))

# Save the encoded vectors to a file
np.savetxt('encoded_vectors.txt', encoded_imgs_flat) # separation of the two values with a space




 