In [1]:
# import the necessary packages
import cv2
import numpy as np
import os
import re
import matplotlib.pyplot as plt
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import confusion_matrix
from sklearn.preprocessing import LabelEncoder

IMG_H_SIZE = 32
IMG_W_SIZE = 32



Función para leer imágenes
- En la función read_images, normalizamos cada imagen debido a que se encontró varias imágenes con un nivel de intensidad alto. Se utilizó esta ténica para obtener un mejor contraste en las imágenes con poco contraste debido al brillo.

In [2]:
def read_images(dirname):
    imgpath = dirname + os.sep
    images = []
    directories = []
    dircount = []
    prevRoot=''
    cant=0
    print("leyendo imagenes de ",imgpath)

    for root, dirnames, filenames in os.walk(imgpath):
        for filename in filenames:
            if re.search("\.(jpg|jpeg|png|bmp|tiff)$", filename):
                cant+=1
                filepath = os.path.join(root, filename)
                # image = plt.imread(filepath)
                image = cv2.imread(filepath)
                image = cv2.resize(image, (IMG_H_SIZE, IMG_W_SIZE))
                image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
                #Función de Normalización
                image = cv2.normalize(image, None, alpha=0,beta=200, norm_type=cv2.NORM_MINMAX)
                images.append(image)
                if prevRoot !=root:
                    prevRoot=root
                    directories.append(root)
                    dircount.append(cant)
                    cant=0
    dircount.append(cant)

    dircount = dircount[1:]
    dircount[0]=dircount[0]+1
    print('Directorios leidos:',len(directories))
    print("Imagenes en cada directorio", dircount)
    print('Suma Total de imagenes en subdirs:',sum(dircount))

    tipos=[]
    indice=0
    for directorio in directories:
        name = directorio.split(os.sep)
        print(indice , name[len(name)-1])
        tipos.append(name[len(name)-1])
        indice=indice+1

    labels=[]
    indice=0
    for cantidad in dircount:
        for i in range(cantidad):
            labels.append(tipos[indice])
        indice=indice+1

    X = np.array(images, dtype=np.uint8) #convierto de lista a numpy
    y = np.array(labels)
    return X, y

Creamos Sets de Entrenamiento y Test

In [3]:
X_train,y_train = read_images(os.path.join(os.getcwd(), 'CarneDataset/train'))
X_test,y_test = read_images(os.path.join(os.getcwd(), 'CarneDataset/test'))

leyendo imagenes de  /home/jhonatan/Documentos/UIDE/TRATAMIENT/Clasificacion_Imagenes/CarneDataset/train/
Directorios leidos: 7
Imagenes en cada directorio [64, 213, 62, 204, 105, 37, 948]
Suma Total de imagenes en subdirs: 1633
0 CLASS_08
1 CLASS_03
2 CLASS_02
3 CLASS_07
4 CLASS_04
5 CLASS_06
6 CLASS_05
leyendo imagenes de  /home/jhonatan/Documentos/UIDE/TRATAMIENT/Clasificacion_Imagenes/CarneDataset/test/
Directorios leidos: 8
Imagenes en cada directorio [28, 97, 48, 114, 45, 1, 19, 458]
Suma Total de imagenes en subdirs: 810
0 CLASS_08
1 CLASS_03
2 CLASS_02
3 CLASS_07
4 CLASS_04
5 CLASS_01
6 CLASS_06
7 CLASS_05


Reshape Imagenes

In [4]:
X_test = X_test.reshape((X_test.shape[0], IMG_H_SIZE*IMG_W_SIZE*3))
X_train = X_train.reshape(X_train.shape[0],IMG_H_SIZE*IMG_W_SIZE*3).astype( 'float32' )
X_train = X_train / 255.0

Codificamos las etiquetas como enteros

In [5]:
le = LabelEncoder()
y_train = le.fit_transform(y_train)
y_test = le.fit_transform(y_test)


Creamos el modelo de Decision Classifier y lo entrenamos

In [6]:
model = DecisionTreeClassifier()
model.fit(X_train, y_train)
predicted_test = model.predict(X_test)
predicted_train = model.predict(X_train)
correct_test = np.where(predicted_test==y_test)[0]
correct_train = np.where(predicted_train==y_train)[0]

Predicted Data

In [7]:
predicted_test = model.predict(X_test)
predicted_train = model.predict(X_train)
correct_test = np.where(predicted_test==y_test)[0]
correct_train = np.where(predicted_train==y_train)[0]

Matriz de Confusión con Test data

In [9]:
print ('Correct test values %s from %s'%(len(correct_test),len(y_test)))
print ('Porcentaje valores correctos: %.2f'%(len(correct_test)/len(y_test)*100))
confusion_matrix(y_test, predicted_test)

Correct test values 95 from 810
Porcentaje valores correctos: 11.73


array([[  0,   0,   1,   0,   0,   0,   0,   0],
       [  0,   0,  48,   0,   0,   0,   0,   0],
       [  0,   1,  95,   1,   0,   0,   0,   0],
       [  0,   0,  45,   0,   0,   0,   0,   0],
       [  0,   0, 455,   1,   0,   2,   0,   0],
       [  0,   0,  19,   0,   0,   0,   0,   0],
       [  0,   0, 114,   0,   0,   0,   0,   0],
       [  0,   0,  28,   0,   0,   0,   0,   0]])

Matriz de Confusión con Train data

In [10]:
print ('Correct train values %s from %s'%(len(correct_train),len(y_train)))
print ('Porcentaje valores correctos: %.2f'%(len(correct_train)/len(y_train)*100))
confusion_matrix(y_train, predicted_train)

Correct train values 1633 from 1633
Porcentaje valores correctos: 100.00


array([[ 62,   0,   0,   0,   0,   0,   0],
       [  0, 213,   0,   0,   0,   0,   0],
       [  0,   0, 105,   0,   0,   0,   0],
       [  0,   0,   0, 948,   0,   0,   0],
       [  0,   0,   0,   0,  37,   0,   0],
       [  0,   0,   0,   0,   0, 204,   0],
       [  0,   0,   0,   0,   0,   0,  64]])