#Projeto 1
#Objetivo:

Expor a ideia e como será estruturado as funcionalidades

In [44]:
#GLOBAIS

import os
import glob
import cv2
import numpy as np
from sklearn.cluster import MiniBatchKMeans

#parametros globais
train_folder = 'dataset/dataset_updated/training_set/'
test_folder = 'dataset/dataset_updated/validation_set/'
image_format = '.jpg'

In [45]:
#AUXILIARES

def getFolders(data_base):
  data_folders = []
  for name in os.listdir(data_base):
    if(os.path.isdir(data_base + name)):
      data_folders.append(name)
  print(data_folders)

  return data_folders

def load_images(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        
        #img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
        if img is not None:
            images.append(img)
    return images

def resize_all_images(images):
    width = 0
    height =  0
    resized_imgs = []
    
    for im in images:
        h, w, d = im.shape
        height += h
        width += w
        
    width = int(width/len(images))
    height = int(height/len(images))
                
    for im in images:
        resized_imgs.append(cv2.resize(im,(width,height)))
    return resized_imgs
        

In [46]:
#Tarefa 1: Pre-processamento

In [66]:
def choosePreProcess(im, param):
    if (param['pre_process'] == 'Clahe'):
        return doClahe(im, param)
    elif (param['pre_process'] == 'Eq_Hist'):
        return doEqualizazaoHistograma(im, param)
    elif (param['pre_process'] == 'Quant_Linear'):
        return doQuantizacaoLinear(im,param)
    elif (param['pre_process'] == 'Median'):
        return doMedian(im,param)
    elif (param['pre_process'] == 'Gaussian'):
        return doGaussian(im,param)
    elif (param['pre_process'] == 'Blur'):
        return doBlur(im,param)
    elif (param['pre_process'] == 'Bilateral'):
        return doBilateral(im,param)
    elif (param['pre_process'] == 'BrilhoContraste'):
        return doBrilhoContraste(im,param)

        
def doBlur(im,params):
    return cv2.blur(im,(5,5))

def doMedian(im, params):
    return cv2.medianBlur(im,5)

def doGaussian(im, params):
    return cv2.GaussianBlur(im,(5,5),0)

def doBilateral(im, params):
    return cv2.bilateralFilter(im,9,75,75)

def doClahe(im, params):
    clahe = cv2.createCLAHE(clipLimit=2.0,tileGridSize=(7,7))
    hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    hsv[:,:,2] = clahe.apply(hsv[:,:,2])
    rgb = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
    return im

def doEqualizazaoHistograma(im, params):
    hsv = cv2.cvtColor(im, cv2.COLOR_BGR2HSV)
    hsv[:,:,2] = cv2.equalizeHist(hsv[:,:,2])
    rgb = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
    
    return rgb 

def doQuantizacaoLinear(im, params):
    
    (h,w) = im.shape[:2]
    
    im = cv2.cvtColor(im, cv2.COLOR_BGR2LAB)
    
    im = im.reshape((image.shape[0] * image.shape[1],3))
    
    clt = MiniBatchKMeans(n_clusters = params['K_Value'])
    labels = clt.fit_predict(im)
    quant = clt.cluster_centers_.astype("uint8")[labels]
    
    quant = quant.reshape((h,w,3))
    quant = cv2.cvtColor(quant,cv2.COLOR_LAB2BGR)
    
    '''im2 = im.flatten()
    im2 = np.transpose(im2)
    im2= np.float32(im2)
    k = params['K_Value']
    criterio = (cv2.TERM_CRITERIA_EPS + cv2.TERM_CRITERIA_MAX_ITER, 10, 1.0)
    compactness, labels, centers = cv2.kmeans(im2,k,None,criterio,10, cv2.KMEANS_PP_CENTERS)

    centers = np.uint8(centers)
    res = centers[labels.flatten()]
    im_quant = res.reshape((im.shape))'''
    return quant

def doBrilhoContraste(im, params):
    
    if params['brilho'] != 0:
        if params['brilho'] > 0:
            s = params['brilho']
            highlight = 255
        else:
            s = 0
            highlight = 255 + params['brilho']
        alpha_b = (highlight - s)/255
        gamma_b = s
        
        buf = cv2.addWeighted(im, alpha_b, im, 0, gamma_b)
    else:
        buf = im.copy()
    
    if params['contraste'] != 0:
        f = 131*(params['contraste'] + 127)/(127*(131-params['contraste']))
        alpha_c = f
        gamma_c = 127*(1-f)
        
        buf = cv2.addWeighted(buf, alpha_c, buf, 0, gamma_c)

    return buf



In [67]:
#TESTE
images = load_images('dataset/dataset_updated/training_set/teste/')
images = resize_all_images(images)


params_teste = {
    'pre_process':'BrilhoContraste',
    'K_Value' : 10,
    'brilho': 64,
    'contraste' : 64
    
}
images_pre_processed = []
for image in images:
    images_pre_processed.append(choosePreProcess(image,params_teste))
    
cv2.imshow("test", images_pre_processed[0])
cv2.waitKey(0)
cv2.destroyAllWindows()


In [14]:
#Tarefa 2: Extração de Característica 

In [None]:
from skimage.feature import local_binary_pattern

#nesse ponto, a base já deve ter passado pela etapa 1
def chooseFeats(im, params):
  if (param['feats'] == 'LBP')
    return doLBP(im, params)


def doLBP(im, params):
  
  lbp = local_binary_pattern(im, n_points, radius, METHOD)
  (hist, _) = np.histogram(lbp.ravel(), bins=np.arange(0, 255))
  hist = hist.astype("float")
  hist /= (hist.sum())

  return hist

#Executa em conjunto as duas primeiras etapas

In [None]:
def getPreProcessAndExtractFeatures(path_folder, params):
	classes_folders = getFolders(path_folder)

	data = []
	labels = []
	for f in classes_folders:
		dataset = glob.glob(path_folder + f + "/*" + image_format)
		for arq in dataset:
			im = cv2.imread(arq)			
		
			im = choosePreProcess(im, params)
			feats = chooseFeats(im, params)

			data.append(feats)
			labels.append(f)
	return np.asarray(data), np.asarray(labels)

X_base, y_base = getBase()	
print(X_base.shape)
print(y_base.shape)



['2', '1', '0']


#Tarefa 3: Seleção de Característica

In [None]:
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegression


#nesse ponto, a base já deve ter passado pela etapa 1 e etapa 2
def chooseBestFeats(params):
  if (param['selection'] == 'rfe')
    doRFE()

def evalBestFeats():
  #1. faz treinamento com validação cruzada  
  #2. retorna o valor do score

#problema aqui: o pre-processamento deve ser aplicado a toda a base
#mas, até escolher o melhor, ele não deve aplicar as alterações
#ou seja, a base original deve permanecer inalterada
def doRFE():
  parametros = dict(estimator=[ ... ],
                    step=[ ...],
                    min_features_to_select=[ ...] )
  
  lr = LogisticRegression(random_state=42, solver='liblinear')
  rfecv = RFECV(estimator=lr, 
              step=1, 
              cv=5,
              min_features_to_select = 100,
              scoring='accuracy')
  rfecv.fit(X_train, y_train)

  #Se vamos usar o do sklearn: precisamos criar o estimator que substitui a função eval
  #Podemos fazer o nosso, passando por parâmetro a função eval e o dicionário de parâmetros
  #RandomSearch ... ?
  #GridSearch   ... ?

  #onde está sendo avaliado 
  #na forma de grid
  for  ...
    for ...
      evalBestFeats()


  #deve retornar a os vetores de caracteristicas filtrados pela técnica com os melhores parâmetros aplicados

#Tarefa 4: Seleção de classificador

In [None]:
import cv2
from sklearn.feature_selection import RFECV
from sklearn.linear_model import LogisticRegression


#nesse ponto, a base já deve ter passado pela etapa 1, etapa 2 e etapa 3
def chooseBestClassifier(params):
  if (param['classifier'] == 'randomforest')
    doRandomForest()


def doRandomForest():

  #direto da doc
  # Number of trees in random forest
  n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
  # Number of features to consider at every split
  max_features = ['auto', 'sqrt']
  # Maximum number of levels in tree
  max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
  max_depth.append(None)
  # Minimum number of samples required to split a node
  min_samples_split = [2, 5, 10]
  # Minimum number of samples required at each leaf node
  min_samples_leaf = [1, 2, 4]
  # Method of selecting samples for training each tree
  bootstrap = [True, False]
  # Create the random grid
  random_grid = {'n_estimators': n_estimators,
                'max_features': max_features,
                'max_depth': max_depth,
                'min_samples_split': min_samples_split,
                'min_samples_leaf': min_samples_leaf,
                'bootstrap': bootstrap}

  rf = RandomForestClassifier()
  rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 100, cv = 3, verbose=2, random_state=42, n_jobs = -1)
  rf_random.fit(train_features, train_labels)  

  ...

  #realiza o experimento e devolve as métricas




#Em comum: otimizador

In [None]:
from hyperopt import fmin, tpe, hp, STATUS_OK, Trials
from hyperopt import hp
import random

param_space = hp.choice('classifier_type', [
    {
        'type': 'naive_bayes',
    },
    {
        'type': 'svm',
        'C': hp.lognormal('svm_C', 0, 1),
        'kernel': hp.choice('svm_kernel', [
            {'ktype': 'linear'},
            {'ktype': 'RBF', 'width': hp.lognormal('svm_rbf_width', 0, 1)},
            ]),
    },
    {
        'type': 'dtree',
        'criterion': hp.choice('dtree_criterion', ['gini', 'entropy']),
        'max_depth': hp.choice('dtree_max_depth',
            [None, hp.qlognormal('dtree_max_depth_int', 3, 1, 1)]),
        'min_samples_split': hp.qlognormal('dtree_min_samples_split', 2, 1, 1),
    },
    ])


def acc_model(params):
    print (params)
    return random.uniform(0.8, 1.0)

def fitness(params):
    acc = acc_model(params)
    return {'loss': -acc, 'status': STATUS_OK}

trials = Trials()
best = fmin(fitness, 
            param_space, 
            algo=tpe.suggest, 
            max_evals=10, 
            trials=trials)

#Predict
#...

{'criterion': 'gini', 'max_depth': None, 'min_samples_split': 6.0, 'type': 'dtree'}
{'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 3.0, 'type': 'dtree'}
{'C': 0.3860507163883026, 'kernel': {'ktype': 'linear'}, 'type': 'svm'}
{'C': 7.611796230284283, 'kernel': {'ktype': 'linear'}, 'type': 'svm'}
{'type': 'naive_bayes'}
{'type': 'naive_bayes'}
{'criterion': 'entropy', 'max_depth': None, 'min_samples_split': 9.0, 'type': 'dtree'}
{'C': 2.127415403926089, 'kernel': {'ktype': 'linear'}, 'type': 'svm'}
{'C': 0.7091905442547609, 'kernel': {'ktype': 'RBF', 'width': 0.5511719314784943}, 'type': 'svm'}
{'criterion': 'gini', 'max_depth': None, 'min_samples_split': 3.0, 'type': 'dtree'}
100%|██████████| 10/10 [00:00<00:00, 51.44it/s, best loss: -0.987312099434029]
