"""
Création d'un Dataset à partir des images scrapées de ZARA. Le dateset est sauvegardé dans le dossier DataSets du répoertoire du projet.
Le Dataset se présente sous la forme d'un dictionnaire contenant les données et les labels
sous différents formats ainsi que des caractéristiques  (global features, hog features) extraites des images 

Le répertoire de travail se présente sous la forme suivante : 

Projet_Classification_ZARA (MAINDIR) 
    Images (IMAGEDIR)
        jupes (category)
        pantalons
        robes
        t-shirts
    DataSets (DATASETDIR)
       
"""

# Packages 

In [2]:
# Jupyter Package
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# standard packages
## https://docs.python.org/3/library/io.html
import io
import os
import sys
import csv
import time 
import random
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# image processing and features extraction packages
## https://python-guide-pt-br.readthedocs.io/fr/latest/scenarios/imaging.html
## https://docs.python.org/fr/3/library/hashlib.html
from PIL import Image, ImageOps, ImageEnhance
import os, sys
import numpy as np
import mahotas
import cv2
import h5py
import hashlib # permet de chiffrer 
from skimage.feature import hog
from skimage.color import rgb2grey
from skimage import color
from skimage.io import imread
from skimage.transform import rescale, resize

# Dataset creation packages
from mlxtend.preprocessing import shuffle_arrays_unison
import pickle


# Définition du répertoire de Travail

In [3]:
#######################
## work directories ##
######################

MAINDIR='C:/Users/utilisateur/Documents/DataIA/Projet/Projet_Classification_ZARA/' # répertoire du projet
IMAGEDIR= MAINDIR + 'Images/'+'Images_models/' # répertoire contenant les images réparties  dans les dossiers categories
CATEGORIES=os.listdir(IMAGEDIR) #ie : jupes, pantalons, robes, t-shirts (attention j'ai un s à la fin des termes)
DATASETDIR=MAINDIR + 'DataSets/'

# Renommer les images 

In [None]:
############################################################
## FONCTION POUR RENOMMER LES IMAGES en "category"_xx.jpg ##
############################################################
def rename_file(CATEGORIES):
    """ Rename image files, in category folder, in the form "category"_xx.jpg ; ie: jupe_0.jpg, jupe_1.jpg etc... """
    for category in CATEGORIES:
        path=os.path.join(IMAGEDIR,category)
        for i, filename in enumerate(os.listdir(path)):
            os.rename(path + "/" + filename, path + "/" + category[:-1]+'_'+ str(i) + ".jpg") # folder[:-1] enlever le s

#  Extraction  des features et traitement des images 

In [4]:
###################################
## Features Extraction functions ##
###################################

## Fixed Variables ##
fixed_size = tuple((56, 56))  # size for resizing 
bins=8

# feature-descriptor-1 : Hu-Moments that quantifies shape 
def fd_hu_moments(image):
    # convert the image to grayscale
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the Hu moment feature vector
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature


# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

# histogramme of oriented gradient 
def fd_Hog(image):
    # convert the image to grayscale
    image = color.rgb2gray(image)
    # compute hog 
    cell_size = (16, 16)  # h x w in pixels
    block_size = (4, 4)  # h x w in cells
    nbins = 9  # number of orientation bins

    # winSize is the size of the image cropped to an multiple of the cell size
    # cell_size is the size of the cells of the img patch over which to calculate the histograms
    # block_size is the number of cells which fit in the patch

    fd,hog_image = hog(image, orientations=8, pixels_per_cell=cell_size,cells_per_block=block_size,block_norm='L2-Hys',visualise=True)
    return fd,hog_image


# Put all those features-descriptor into a function
def Global_feature_extraction(resized_image):
    ####################################
    # Global Feature extraction
    ####################################
    fv_hu_moments = fd_hu_moments(resized_image)
    fv_haralick   = fd_haralick(resized_image)
    fv_histogram  = fd_histogram(resized_image)

    ###################################
    # Concatenate global features
    ###################################
    global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])
    return global_feature



##############################################
## Image treatment : resizing and grayscale ##
##############################################
def traitement_image(image_path):
    ### Load image ###
    img = cv2.imread(image_path)
#   plt.imshow(img)
#   plt.show()

    ### resize ###
    resized=cv2.resize(img, fixed_size, interpolation=cv2.INTER_AREA)
#   plt.imshow(resized)
#   plt.show()

    ### convert the image to grayscale ###
    imgGray = cv2.cvtColor(resized, cv2.COLOR_BGR2GRAY)  
#   cv2.imshow('title',imgGray)
#   cv2.waitKey(0)
#   cv2.destroyAllWindows()
    return imgGray,resized


###################################################################
##  dress segmentation : traitement pour  masquer les mannequins ##
###################################################################

def traitement_image_mask(image_path):
    #img= cv2.imread(img)
    img = cv2.imread(image_path)
    img = cv2.resize(img,(224,224))
        
    mask = np.zeros(img.shape[:2],np.uint8)
    bgdModel = np.zeros((1,65),np.float64)
    fgdModel = np.zeros((1,65),np.float64)
    height, width = img.shape[:2]
    #rect = (50,60,width-100,height-60) ### pantalons
    rect = (50,50,width-100,height-60) #### robes
    #rect = (20,20,width-40,height-20) #### jupes
    cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)
    mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
    img2 = img*mask2[:,:,np.newaxis]
    img2[mask2 == 0] = (255, 255, 255)
    
    final = np.ones(img.shape,np.uint8)*0 + img2
    
    return cv2.cvtColor(final, cv2.COLOR_BGR2GRAY)

# Création  et sauvegarde du Dataset

In [5]:
#########################################################
## CREATION D'un Dataset à partir des images traitées ##
########################################################
# empty lists
Data = [] # Data contient la matrice des pixels de l'image en ligne => 1 lignes = 1 images (par ex Image[0] : 1x(3136) si on fait resize 56par56 )
Target_name = [] # Target_Name C'est la classe de chaque image sous forme ('jupe', 'robe', 't-shirt', 'pantalon')
Data_name = [] # Data_name contient le nom de l'image  par exemple : jupe_01
Image=[] #Image est une matrice de matrices => 1 ligne = 1 images en format matrice (par ex  Image[0] : 1x56x56)
Global_features=[]
Dataset_dic={} # Le DataSet sous fome de dictionnaire qui contiendra data, images, target etc etc 
Hog_images = [] # hog images
Hog_features = [] # hog features

def DataSet():
    for category in CATEGORIES: 
        clothe_class=category[:-1]  #ie jupe, pantalon, robe or t-shirt
        for image in os.listdir(IMAGEDIR+category):  #image : nom de l'image ie jupe_0.jpg
            image_path =os.path.join(IMAGEDIR+category+'/'+image) 
            # image treatment 
            imgGray,resized=traitement_image(image_path)
#           # matrix to line with flatten()
            imgGray_flat=np.array(imgGray).flatten()
#             # global feature extraction
            global_feature=Global_feature_extraction(resized)
            # Hog
            
            img_hog=imread(image_path)
            img_hog = resize(img_hog, (200,200))
            fd,hog_image=fd_Hog(img_hog)
            # update the lists
            try:
                Image.append(imgGray)
                Target_name.append(clothe_class)
                Data.append(imgGray_flat)
                Data_name.append(image[:-4])
                Global_features.append(global_feature)
                Hog_images.append(hog_image)
                Hog_features.append(fd)
            except Exception as e:
                print(e)
    return np.array(Data),np.array(Image), np.array(Target_name),np.array(Data_name), np.array(Hog_features), np.array(Hog_images), np.array(Global_features)

def DataSetCreation(name:str):
    ## on crée le data set
    Data, Image, Target_name, Data_Name ,Hog_features,Hog_images, Global_features = DataSet()  
    
    ## vérification des dimensions 
    print("[STATUS] Data (pixel matrix in line) size {}".format(Data.shape) )
    print("[STATUS] Target name (Labels) size {}".format(Target_name.shape) )
    print("[STATUS] Data Name (Image names) size {}".format(Data_Name.shape) )
    print("[STATUS] Image  (Image matrix) size {}".format(Image.shape) )
    print("[STATUS] global features (Image names) size {}".format(Global_features.shape) )
    print("[STATUS] Hog features  size {}".format(np.array(Hog_features).shape) )
    print("[STATUS] Hog images  size {}".format(Hog_images.shape) )
    
    ### mélange des données avec shuffle ; unison c'est pour que tous les arrays soient mélanger de la même façon et q'uon garde la correspondace entre eux
    Data, Image,Target_name, Data_Name, Hog_features,Hog_images, Global_features = shuffle_arrays_unison(arrays=[Data, Image,Target_name, Data_Name, Hog_features,Hog_images,Global_features], random_seed=0)
   
    ### encodage  pantalon to 1 ; jupe to 0 ;  t-shirt to 3 and robe to 2 pour chaque images 
    
    Target=pd.Series(Target_name).astype('category').cat.codes
    #print(Target)
    
    #### création d'un dictionnaire pour savoir la correspondance code <-> vetement 
    Target_name_list = dict(enumerate(pd.Series(Target_name).astype('category').cat.categories)) # ie {0: 'jupe', 1: 'pantalon', 2: 'robe', 3: 't-shirt'}
    print("[STATUS] Correspondance Class <-> Code {}".format(Target_name_list) )
    ### Creation du data set sous forme d'un dictionnaire comme quand on load les dataset scikitlearn
    Dataset_dic={
                        'data': Data,
                        'target':Target,
                        'target_names':Target_name,
                        'target_name_list':list(Target_name_list.values()),
                        'target_name_code': Target_name_list,
                        'data_Name':Data_Name,
                        'images': Image,
                         'global_features':Global_features,
                         'hog_features': Hog_features,
                         'hog_images': Hog_images
                       }
    
    ## pour sauver le dataset  (dans le dossier DataSets) et pouvoir le reloader plus tard dans un autre notebook
    if not os.path.exists(DATASETDIR):
        os.makedirs(DATASETDIR)
    with open(DATASETDIR+'/'+ name +'.pickle',"wb") as outpout:
        pickle.dump(Dataset_dic, outpout)
    
    return Dataset_dic

In [6]:
## Création du datase nommé 'ZARA_DataSet_models'
ZARA_DataSet_models=DataSetCreation('ZARA_DataSet_models_git')   

C:\Users\utilisateur\Anaconda3\lib\site-packages\skimage\feature\_hog.py:239: skimage_deprecation: Argument `visualise` is deprecated and will be changed to `visualize` in v0.16
  'be changed to `visualize` in v0.16', skimage_deprecation)


[STATUS] Data (pixel matrix in line) size (1307, 3136)
[STATUS] Target name (Labels) size (1307,)
[STATUS] Data Name (Image names) size (1307,)
[STATUS] Image  (Image matrix) size (1307, 56, 56)
[STATUS] global features (Image names) size (1307, 532)
[STATUS] Hog features  size (1307, 10368)
[STATUS] Hog images  size (1307, 200, 200)


NameError: name 'shuffle_arrays_unison' is not defined

# Vérification 

In [None]:
# vérification du contenu du dataset
print(ZARA_DataSet_models.keys())

In [None]:
# transformation des keys du dictionnaire dataset en variables globales
for key in ZARA_DataSet_models.keys():
    globals()[str(key)] =ZARA_DataSet_no_models[key]
    
# affichage de quelques images du dataset 
for i in range(0,6):
    plt.imshow(images[i])
    plt.imshow(hog_image[i], cmap='gray')
    plt.show()
    print()

In [None]:
## Pour charger le dataset dans un autre notebook
# with open(DATASETDIR+'ZARA_DataSet_models.pickle', 'rb') as data:
#     ZARA_DataSet_models = pickle.load(data)  

In [None]:
ZARA_DataSet_models_with_HOG.keys()
for key in ZARA_DataSet_models_with_HOG.keys():
    globals()[str(key)] =ZARA_DataSet_models_with_HOG[key]
    
for i in range(0,6):
    print(data_Name[i])
    print(target_names[i])
    
    plt.imshow(images[i])
    plt.show()
    plt.imshow(hog_images[i])
    plt.show()
    print('--------------')