# Eigenlimones

## Primera parte

Generar todas las gráficas de eigenvalores para las imágenes de la dataset

In [1]:
# Importar las librerías necesarias
import cv2 as cv
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import KNeighborsClassifier
import os
import glob
import pandas as pd

In [None]:
# Establecemos el tamaño que tendrán las gráficas
plt.rcParams['figure.figsize'] = (10,8)

Para todas las imágenes de la base de datos se deben obtener los eigenvalores y su gráfica.

Estos son los pasos a seguir

1. Leer todas las imágenes
2. Hacer cuadradas las imágenes y reducir el tamaño
3. Convertir a escala de grises
4. Estandarizar todos los valores
5. Obtener los eigenvalores y descartar las partes imaginarias
6. Generar la gráfica
7. Guardar la gráfica

Primero se escriben todas las funciones para obtener los datos, es decir, las funciones para leer, convertir color, cambiar tamaño, entre otras.

In [2]:
def estandarizar(matriz):
    """
    Devuelve los valores estandarizados de una imagen a escala de grises
    El valor para estadarizar es de 255 (8-bits)
    """
    return(matriz/255)

In [3]:
def imresize(image, size=(200,200)):
    """
    Cambia el tamaño de una imagen
    """
    
    return(cv.resize(image,size))

In [4]:
def togray(image):
    i = cv.cvtColor(image, cv.COLOR_RGB2GRAY) # Convertir de color a RGB
    
    return(i)

Agregar una parte que guarde los eigenvalores en una lista, para después añadirlos a la dataset.

In [5]:
def geteigenvals(image):
    """
    Obtiene los eigenvalores y eigenvectores de una imagen (matriz)
    SOLO regresa los eigenvalores
    """
    
    evals, evecs = np.linalg.eig(image)
    
    return(evals)

In [6]:
def getreal(eigenvalues):
    """
    Toma solo la parte de real de un conjunto de números complejos
    """
    
    a = eigenvalues
    
    return(a.real)

In [7]:
def make_graph(val1,name):
    """
    Ingresa una imagen y el nombre. Devuelve la gráfica 
    de eigenvalores de una imagen a una escala de grises
    """
    plt.clf() # Para borrar la gráfica anterior
    plt.plot(val1,'g')
    plt.xlabel("# of eigenvalues")
    plt.ylabel('Value')
    plt.title(name)

A partir de aquí inicia el código principal del programa

In [None]:
# Declarar la dirección donde se encuentran las imágenes de limones
path = r'C:\Users\theki\Documents\Groundtruth\manually segmented'

In [None]:
### Proceso para una sola imagen

img = cv.imread(path + '/si0.png')
img = imresize(img)
gray = togray(img)
standarized = estandarizar(gray)
ev = geteigenvals(standarized)
ev = getreal(ev)

#Graficar
make_graph(ev,'grafica 1')
# plt.savefig('prueba2.jpg')
plt.show()

In [None]:
# save_path = r'C:\Users\theki\Documents\imgs\eigengraphs/'

test_path = r'C:\Users\theki\Documents\miniset' # Para hacer pruebas en un set pequeño

In [None]:
# for file in glob.glob(test_path + '/*.png'): # para cuando se hagan pruebas con el miniset
for file in glob.glob(path + '/*.png'): # Para cuando se quieren obtener todas las gráficas
    
#     graph_name = file[33:] # Nombre que llevará la gráfica, el 33 es para separar la dirección de la carpeta del nombre
    graph_name = file[56:] # Nombre que llevará la gráfica, el 56 es para separar la dirección de la carpeta del nombre
    #Paso 1
    img = cv.imread(file)  
    #Paso 2
    img = imresize(img)
    #Paso 3
    gray = togray(img)
    #Paso 4
    standarized = estandarizar(gray)
    #Paso 5
    ev = geteigenvals(standarized)
    ev = getreal(ev)
    #Paso 6
    make_graph(ev,graph_name)
    #Paso 7
    plt.savefig(graph_name + '.jpg')
    

## Segunda parte

Ya que se tienen los eigenvalores y sus gráficas, la información resultante se puede usar para generar una base de datos. Dicha base de datos nueva se puede usar para crear modelos clasificadores como kNN, mapas auto organizados o MLP.

La utilidad de esta información debe evaluarse dependiendo los resultados de cada modelo clasificador. Dicho esto, si los resultados son prometedores se deben encontrar las áreas de oportunidad del método.

También se debe examinar por qué en ciertas imágenes resultan en una linea recta (los eigenvalores). Esta parte puede deberse a la forma de obtener los valores o por la herramienta (Matplotlib) usada para graficar.

In [8]:
# Para generar las etiquetas de las imágenes
path_healthy = r'C:\Users\theki\Documents\defect_detection\dataset1\healthy'
path_faulty = r'C:\Users\theki\Documents\defect_detection\dataset1\faulty'

In [25]:
# file_name = [] #lista vacía para guardar el nombre de las imágenes

h_eigens = [] # Lista vacía para guardar los eigenvalores

def gen_labels(path):
    file_name = [] # Lista vacía para guardar los nombres de las imágenes
#     h_eigens = [] # Lista vacía para guardar los eigenvalores
    
    ns = len(path) + 1 # Se toma la longitud total (en caraceteres) de la dirección y se le suma 1 para incluir la /
    
    for file in glob.glob(path+'/*.png'):
        
        img = cv.imread(file)  
        #Paso 2
        img = imresize(img)
        #Paso 3
        gray = togray(img)
        #Paso 4
        standarized = estandarizar(gray)
        #Paso 5
        ev = geteigenvals(standarized)
        ev = getreal(ev)
        
        h_eigens.append(ev)
        file_name.append(file[ns:])
        
    return(file_name)


In [30]:
h_eigens = []

x = gen_labels(path_healthy) # Genrar la lista de nombres de los limones sanos

# Generar las etiquetas para con el texto 'healthy'
labels = []
for i in range(len(x)):
    labels.append('healthy')

In [33]:
# Ligero procesamiento de los datos

label_array = np.asarray(labels)
eig_array = np.asarray(h_eigens) # convertir de lista a array de numpy
file_array = np.asarray(x) # convertir de lista a array de numpy
file_array.shape = (460,1) # reasignar las dimensiones del array

In [35]:
df_names = pd.DataFrame(file_array,columns=['imagen'])
df_eigen = pd.DataFrame(eig_array)
df_label = pd.DataFrame(label_array)

df_healthy = pd.concat([df_names,df_eigen,df_label], axis=1)

In [36]:
df_healthy.head(-1)

Unnamed: 0,imagen,0,1,2,3,4,5,6,7,8,...,191,192,193,194,195,196,197,198,199,0.1
0,si0.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
1,si1.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
2,si100.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
3,si1005.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
4,si1006.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454,si963.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
455,si966.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
456,si97.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
457,si979.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy


Generamos dos dataframes. El primero tiene solo los nombres de las imágenes, el segundo tiene todos los eigenvalores de dicha imagen.

Al final, se combinan estos dataframes para tener un conjunto de datos con el nombre de la imagen y sus valores.

Este proceso se repite para el set de limones sanos y para los limones defectuosos.

In [38]:
# Código viejo, no prestar mucha atención

# frame1_healthy = pd.DataFrame(file_array,columns=['imagen']) # primer dataframe donde se guarda el nombre de la imagen
# frame1.head(-1) # por si quieren compobrar la información
# frame2_healthy = pd.DataFrame(eig_array) # segundo dataframe donde se guardan los eigenvalores
# frame2.head(-1) # por si quieren comprobar la información
# frame3_healthy = pd.concat([frame1_healthy,frame2_healthy],axis=1) # "fusionamos" los dataframes de forma column-wise
# frame3_healthy.head(-1) # desplegamos el dataframe resultante

In [39]:
# frame3_healthy.to_csv('healthy_evals.csv') # por si se desea guardar el dataframe como archivo .csv

In [40]:
# Ahora generamos las etiquetas para los limones defectuosos
h_eigens = []

y = gen_labels(path_faulty)

labels2 = [] # Lista vacía para guardar etiquetas

for i in range(len(y)):
    labels2.append('faulty')

In [41]:
# Mismo proceso que se hizo para los limones sanos
label_array = np.asarray(labels2)
eig_array = np.asarray(h_eigens) # de lista a np.array
file_array = np.asarray(y) # de lista a np.array
file_array.shape = (453,1) # cambio de dimensiones


df_names = pd.DataFrame(file_array,columns=['imagen'])
df_eigen = pd.DataFrame(eig_array)
df_label = pd.DataFrame(label_array)

df_faulty = pd.concat([df_names,df_eigen,df_label], axis=1)



# Código viejo, no prestar tanta atención
# frame1_faulty = pd.DataFrame(file_array,columns=['imagen']) # primer dataframe
# frame1.head(-1)
# frame2_faulty = pd.DataFrame(eig_array) # segundo dataframe
# frame2.head(-1)
# frame3_faulty = pd.concat([frame1_faulty,frame2_faulty],axis=1) # fusión de dataframes
# frame3_faulty.head(-1) # desplegar resultado


In [42]:
df_faulty.head(-1)

Unnamed: 0,imagen,0,1,2,3,4,5,6,7,8,...,191,192,193,194,195,196,197,198,199,0.1
0,si1000.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
1,si1001.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
2,si1002.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
3,si1003.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
4,si1004.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,si994.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
448,si995.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
449,si996.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
450,si997.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty


In [142]:
# Para guardar el dataframe como archivo .csv
# frame3_faulty.to_csv('faulty_evals.csv')

Para fusionar todo en un solo dataframe

In [43]:
lemon_frame = pd.concat(
    [df_healthy,df_faulty],
    axis=0
)

lemon_frame.head(-1)

Unnamed: 0,imagen,0,1,2,3,4,5,6,7,8,...,191,192,193,194,195,196,197,198,199,0.1
0,si0.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
1,si1.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
2,si100.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
3,si1005.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
4,si1006.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,healthy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,si994.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
448,si995.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
449,si996.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty
450,si997.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,faulty


In [143]:
# final_frame = pd.concat(
#     [frame3_healthy,frame3_faulty],
#     axis=0 # para fusionar de forma row-wise
# )

# final_frame.head(-1)

Unnamed: 0,imagen,0,1,2,3,4,5,6,7,8,...,190,191,192,193,194,195,196,197,198,199
0,si0.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,si1.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,si100.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,si1005.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,si1006.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
447,si994.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
448,si995.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
449,si996.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
450,si997.png,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [44]:
lemon_frame.to_csv('eigenvalues_lemons.csv') # Para guardar en .csv

In [45]:
lemon_frame.to_excel('eigenlimones.xlsx') # Para guardar todo en .xlsx o archivo de excel

Falta agregar las etiquetas generadas a los dataframes, de forma que el dataframe tenga la siguiente estructura:

|Imagen|Eigenvalores(1-200)|Etiqueta|
---|---|---
|Imagen1|Eigenvalores|healthy|
|Imagen2|Eigenvalores|faulty|

## Tercera parte

Usar la información para crear modelos clasificadores

Igual y esta parte la hago en Google Colab