#### ***Importación de las librerias utiles***

In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from skimage import feature
from tqdm import tqdm
from skimage.io import imread_collection
from sklearn.cluster import KMeans
import pywt
from scipy.stats import gennorm
import os

#### ***Funciones auxiliares***

In [2]:
def convert2image(data_list, size):
    img_data = np.reshape(np.array(data_list), (size,size))
    return  img_data

def save_new_csv(path, data):
    if os.path.exists(path): 
        pass
    else:
        data.to_csv(path, index=False)

### ***Lectura de datos de entrenamiento y prueba***

In [3]:
train = pd.read_csv('dataset2/CSV/clothes.csv')
x_train = imread_collection('dataset2/images_train/*.jpg')

print(len(x_train))

x_test = imread_collection('dataset2/images_test/*.jpg')

print(len(x_test))

1600
399


### ***Haralick***

In [4]:
def haralick_descriptor(images, size, distance, angles, moments):

    def GLCM(image, size, distance, angles):
        glcm = feature.graycomatrix(convert2image(image,size), distance, angles, levels=256, symmetric=True, normed=True)
        return glcm

    feature_moments = []
    Haralick_features = np.array([])

    for i in tqdm(range(0,len(images))):
        for moment in moments:
            haralick_moments = feature.graycoprops(GLCM(images[i], size, distance, angles), prop= moment).ravel().tolist()
            feature_moments += haralick_moments

        if i == 0:
            Haralick_features = np.array(feature_moments)
        else:
            Haralick_features = np.vstack((Haralick_features, np.array(feature_moments)))
        feature_moments = []   
    
    return Haralick_features

#### ***Se hallan los descriptores de Haralick para cada imagen***

In [34]:
Haralick_features_train = haralick_descriptor(x_train, 224, [1,2,3,4], [0, np.pi/4, np.pi/2, 3*np.pi/4],['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'])

Haralick_features_train.shape

100%|██████████| 1600/1600 [05:55<00:00,  4.50it/s]


(1600, 96)

In [35]:
Haralick_features_test = haralick_descriptor(x_test, 224,  [1,2,3,4], [0, np.pi/4, np.pi/2, 3*np.pi/4],['contrast', 'dissimilarity', 'homogeneity', 'energy', 'correlation', 'ASM'])

Haralick_features_test.shape

100%|██████████| 399/399 [01:35<00:00,  4.20it/s]


(399, 96)

#### ***Se almacenan los datos obtenidos en un archivo CSV***

In [36]:
pd_Haralick_features_train= pd.DataFrame(Haralick_features_train)
pd_Haralick_features_test= pd.DataFrame(Haralick_features_test)

save_new_csv('dataset2/CSV/Haralick_Descriptor_clothes_Train.csv', pd_Haralick_features_train)
save_new_csv('dataset2/CSV/Haralick_Descriptor_clothes_Test.csv', pd_Haralick_features_test)

### ***WAVELET***

In [24]:
def WAVELET(images, levels = 3):

    WAVELET_features = np.array([])

    for i in tqdm(range(0,len(images))):
        wavelets = pywt.wavedec2(images[i], 'haar', mode = 'symmetric', level = levels)

        features = gennorm.fit(wavelets[0].ravel())
        
        for coef in wavelets[1:]:
            for detail in coef:
                features += gennorm.fit(detail.ravel())     

        if i == 0:
            WAVELET_features = np.array([features])
        else:
            WAVELET_features = np.vstack((WAVELET_features, features))       


    return WAVELET_features

#### ***Se hallan los descriptores de Haralick para cada imagen***

In [25]:
WAVELET_descriptor_Train = WAVELET(x_train, 3)
WAVELET_descriptor_Train.shape

100%|██████████| 1600/1600 [34:39<00:00,  1.30s/it]


(1600, 30)

In [26]:
WAVELET_descriptor_Test = WAVELET(x_test, 3)
WAVELET_descriptor_Test.shape

100%|██████████| 399/399 [11:48<00:00,  1.78s/it]


(399, 30)

#### ***Se almacenan los datos obtenidos en un archivo CSV***

In [27]:
pdWAVELET_train = pd.DataFrame(WAVELET_descriptor_Train)
pdWAVELET_test = pd.DataFrame(WAVELET_descriptor_Test)

save_new_csv('dataset2/CSV/WAVELET_Descriptor_clothes_Train.csv', pdWAVELET_train)
save_new_csv('dataset2/CSV/WAVELET_Descriptor_clothes_Test.csv', pdWAVELET_test)