# Extração de features e organização em tabela - by Lucas de Jesus

Aquí extraímos as features de imagens utilizando Resnet50 de multiplos datasets: <br>

### Fruitnet

Este dataset é composto por 6 tipos de frutas. Assim demodo a deixar nosso algorítmo mais genérico (poder tentar identificar outros tipos de fruta com base nas características adquiridas a partir destas 6, uma 7ª categoria foi criada)
<ul>
    <li>Maçã</li>
    <li>Banana</li>
    <li>Guava</li>
    <li>Lima</li>
    <li>Laranja</li>
    <li>Romã</li>
    <li>Outras</li>
</ul>

Outra observação é que este dataset já é formado por imagens contendo <b> data augmentation </b>, o que portanto torna esta operação desnecessária

### Importação das bibliotecas necessárias

In [1]:
import argparse, os, logging
import time
import numpy as np
import pandas as pd
import cv2
from PIL import Image
from pathlib import Path


from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model
from tensorflow.keras.preprocessing import image


# Print iterations progress - auxiliar function
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█', printEnd = "\r"):
    """
    Call in a loop to create terminal progress bar
    @params:
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
        printEnd    - Optional  : end character (e.g. "\r", "\r\n") (Str)
    """
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    print(f'\r{prefix} |{bar}| {percent}% {suffix}', end = printEnd)
    # Print New Line on Complete
    if iteration == total: 
        print()


### Obtem a lista dos diretórios contendo imagens  para o dataset Fruitnet

In [2]:
import os, argparse

#função de tratamento (para ver se é diretório)
def dir_path(path):
    if os.path.isdir(path):
        return path
    else:
        raise argparse.ArgumentTypeError(f"readable_dir:{path} is not a valid path")

In [3]:
#Marca os diretórios em que as imagens de Fruitnet estão localizadas
diretorio_raiz = dir_path("C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/")
subdiretorios = ["Good Quality_Fruits/", "Bad Quality_Fruits/", "Mixed Quality_Fruits/"]

nome_frutas = ["Apple", "Banana", "Guava", "Lime", "Orange", "Pomegranate"]
qualidade_frutas = ["_Good", "_Bad", ""]

#lista com os diretórios do dataset fruitnet
lista_diretorios_fruitnet= []
for index_diretorio, diretorio in enumerate(subdiretorios):
    for fruta in nome_frutas:
        
        diretorio_atual = diretorio_raiz + diretorio + fruta + qualidade_frutas[index_diretorio]
        lista_diretorios_fruitnet.append(diretorio_atual)
        print(diretorio_atual)

C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Apple_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Banana_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Guava_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Lime_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Orange_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Good Quality_Fruits/Pomegranate_Good
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Bad Quality_Fruits/Apple_Bad
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Bad Quality_Fruits/Banana_Bad
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits/Bad Quality_Fruits/Guava_Bad
C:/Users/lucas/OneDrive/Documentos/F&V/fruitnet/Processed Images_Fruits

## Extrai as features das imagens contidas no dataset

Configura o modelo

In [5]:
base_model = ResNet50(weights='imagenet')
    # InceptionV3(weights='imagenet')
    # InceptionResNetV2(weights='imagenet')

model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)


In [6]:
"""carrega as imagens de um determinado diretorio
    retorna: [img_array, id_array]    
"""
def img_load(diretorio):

    lsize = 224 #input size do resnet50
    img_array = []
    id_array = []
    lista_dir = os.listdir(diretorio)

    #mensagens de feedback
    print("\nCarregando imagens... ")

    # chamada inicial da barra de progresso, para indicar 0% 
    printProgressBar(0, len(lista_dir), prefix = 'Progresso:', suffix = 'Completo', length = 50)
    index  = 0

    for nome in lista_dir:
        img = cv2.resize(cv2.imread(os.path.join(diretorio, nome)),  (lsize ,lsize))
       
        if img is not None:
            img_array.append(img)
            id_array.append(diretorio + "/"+nome)

        # Update da barra de progresso
        printProgressBar(index+1, len(lista_dir), prefix = 'Progresso:', suffix = 'Completo', length = 50)
        index +=1
    print("Done\n")
    return (np.array(img_array), np.array(id_array))

#Função para extrair as features de cada imagem e coloca o resultado em um vetor de features:
def get_features(img_array):
    features_array = np.zeros((img_array.shape[0],2048))

    #mensagens de feedback
    print("Extraindo features... ")

    # chamada inicial da barra de progresso, para indicar 0% 
    printProgressBar(0, len(img_array), prefix = 'Prog:', suffix = 'Comp', length = 25)
    
    for i, img_pos in enumerate(img_array):
        img = img_pos
        x = image.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)
        features = model.predict(x)
        features = features.reshape(2048,)
        features_array[i,:] = features

        # Update da barra de progresso
        printProgressBar(i+1, len(img_array), prefix = 'Prog:', suffix = 'Comp', length = 25)
    print("Done.")
    return features_array


In [8]:
#dataframe contendo todos os dados das imagens
final_data = pd.DataFrame()

for index, diretorio in enumerate(lista_diretorios_fruitnet):

    
    #obtem as features e os indices
    (img_array, img_id) = img_load(diretorio)
    data_features = get_features(img_array)
    
    #concatena os dados de features e de ids correspondentes em um Dataframe
    tmp = pd.DataFrame(data_features)
    tmp["ImageId"] = img_id
    tmp["Qualidade"] = index//6  #6 tipos de frutas .: 0 = Good, 1, Bad, 3 Mixed
    
    final_data = pd.concat([final_data, tmp])


Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 






































Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 








Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 


Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 


Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 


Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 


Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 
Prog: |█████████████████████████| 100.0% Comp
Done.

Carregando imagens... 
Progresso: |██████████████████████████████████████████████████| 100.0% Completo
Done

Extraindo features... 


Prog: |█████████████████████████| 100.0% Comp
Done.


In [9]:
final_data["Qualidade"]

0      0
1      0
2      0
3      0
4      0
      ..
120    2
121    2
122    2
123    2
124    2
Name: Qualidade, Length: 19526, dtype: int64

In [10]:
#cria um caminho. parents=True cria possiveis diretórios pai. exist_ok=True evita excessão caso o diretorio exista
final_data.to_csv(diretorio_raiz+"data.csv", index = None)

In [15]:
final_data.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2040,2041,2042,2043,2044,2045,2046,2047,ImageId,Qualidade
120,3.514952,0.589922,0.420647,0.710394,0.189993,0.018829,0.0,0.148587,0.411278,0.213874,...,0.098636,0.008464,0.702275,0.0,0.014291,2.631745,0.61461,0.15249,C:/Users/lucas/OneDrive/Documentos/F&V/fruitne...,2
121,3.706188,0.360693,0.225129,0.61353,0.241168,0.465065,0.0,0.131877,0.321608,0.095309,...,0.369143,0.022112,0.706179,0.0,0.1768,1.7303,0.626712,0.291197,C:/Users/lucas/OneDrive/Documentos/F&V/fruitne...,2
122,2.763747,0.467841,0.090997,1.221117,0.240596,0.448695,0.078005,0.093327,0.262737,0.143238,...,0.132788,0.00293,0.441962,0.0,0.008939,1.877034,0.191286,0.126135,C:/Users/lucas/OneDrive/Documentos/F&V/fruitne...,2
123,3.854888,0.298927,0.209296,0.975982,0.480537,0.632171,0.463008,0.218467,0.321193,0.059494,...,0.150068,0.117594,0.767495,0.0,0.000228,2.594018,1.467491,0.077223,C:/Users/lucas/OneDrive/Documentos/F&V/fruitne...,2
124,4.78939,0.265865,0.152963,0.907478,0.362052,0.090793,0.008525,0.035152,0.595448,0.0,...,0.888887,0.0,0.212851,0.001986,0.134471,2.27156,0.382053,0.422186,C:/Users/lucas/OneDrive/Documentos/F&V/fruitne...,2
