In [1]:
import os
import cv2
import h5py
import random
import numpy as np
from skimage import img_as_float32
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
class ImageProcessing(BaseEstimator,TransformerMixin):
    
    def __init__(self, path, attributes):
        self.path = path
        self.attributes = attributes
        self.dados_X = []
        self.dados_y = []
    
    def fit(seft,*_):
        return self
    
    def get_files(self, attribute):
        files = sorted([os.path.join(self.path, attribute, file)
                        for file in os.listdir(self.path + "/"+attribute)
                        if file.endswith('.jpg')])
        random.shuffle(files)
        return files
    
    def load_image(self,item):
        '''
            * Carregar a imagem
            * Converter a cor em Cinza
            * Normalizar os valores do pixel entre 0 e 1
        '''
        
        image = cv2.imread(item)
        gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
        gray = cv2.resize(gray,(255,255), interpolation=cv2.INTER_LINEAR)
        gray = img_as_float32(gray)
        
        return gray
    
        
    def make_sets(self, *_):
        for attribute in self.attributes:
            
            dados = self.get_files(attribute)
            
            for item in dados:
                image = self.load_image(item)
                self.dados_X.append(image)
                self.dados_y.append(attributes.index(attribute))

        self.dados_X = np.array(self.dados_X)
        self.dados_y = np.array(self.dados_y)
        
        
                              
    def save_h5(self, h5_filename, data, label, data_dtype='float64', label_dtype='int64'):
        
        if os.path.isfile(h5_filename):
            raise ValueError("O arquivo '{}' já existe e não pode "
                             "ser apagado.".format(h5_filename))
        
        h5_fout = h5py.File(h5_filename)
        h5_fout.create_dataset('data', data=data, compression='gzip', compression_opts=4,dtype=data_dtype)
        h5_fout.create_dataset('label', data=label, compression='gzip', compression_opts=1,dtype=label_dtype)
        h5_fout.close()
    
    
    def transform(self,Name=""):
        self.make_sets()
        self.save_h5(Name,self.dados_X,self.dados_y)
        print("Sucesso. Acessar os arquivos na pasta.")

In [3]:
path = "DATA/100-300"
attributes = ["Ar_Bru","Ca_PodVer","Mil_ManTur","Tri_Bru","Tri_Fer","Tri_Oid"]

In [4]:
MakeDataSet = ImageProcessing(path,attributes)

In [5]:
_ = MakeDataSet.transform("DataSet")

Sucesso. Acessar os arquivos na pasta.


In [None]:
#Passo 2 - Separando base de validação:
from sklearn.model_selection import StratifiedShuffleSplit

split = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
for train_index, val_index in split.split(X, y):
    X_train, X_val = X[train_index], X[val_index]
    y_train, y_val = y[train_index], y[val_index]

In [None]:
    def get_files(self, attribute):
        files = sorted([os.path.join(self.path, attribute, file)
                        for file in os.listdir(self.path + "/"+attribute)
                        if file.endswith('.jpg')])
       
    split = StratifiedShuffleSplit(n_splits=1, test_size=0.3, random_state=42)
        for train_index, val_index in split.split(X, y):
            X_train, X_val = X[train_index], X[val_index]
            y_train, y_val = y[train_index], y[val_index]
        
        
        
        training = files[:int(len(files) * 0.7)]
        test = files[-int(len(files) * 0.3):]
        return training , test