## Clasificacion de las imagenes

In [None]:
import os
import glob
import datetime
import tarfile
import urllib.request

train_dir = "dataset"

if not os.path.exists(train_dir):
    os.makedirs(train_dir)

# get the class label limit
class_limit = 70

# take all the images from the dataset
image_paths = glob.glob(train_dir + "\\train\\*.jpg")

# variables to keep track
label = 0
i = 0
j = 70

# flower17 class names
class_names = ['apple', 'bat', 'beetle', 'bell', 'bird', 'Bone', 'bottle', 'brick', 
               'butterfly', 'camel', 'car', 'carriage', 'cattle', 'cellular_phone', 
               'chicken', 'children', 'chopper', 'classic', 'Comma', 'crown', 'cup', 
               'deer', 'device0', 'device1', 'device2', 'device3', 'device4', 'device5', 
               'device6', 'device7', 'device8', 'device9', 'dog', 'elephant', 'face', 
               'fish', 'flatfish', 'fly', 'fork', 'fountain', 'frog', 'Glas', 'guitar', 
               'hammer', 'hat', 'HCircle', 'Heart', 'horse', 'horseshoe', 'jar', 'key', 
               'lizzard', 'lmfish', 'Misk', 'octopus', 'pencil', 'personal_car', 'pocket', 
               'rat', 'ray', 'sea_snake', 'shoe', 'spoon', 'spring', 'stef', 'teddy', 'tree', 
               'truck', 'turtle', 'watch']

# loop over the class labels
for x in range(1, class_limit+1):
    # create a folder for that class
    os.makedirs(train_dir + "\\train\\" + class_names[label])

    # get the current path
    cur_path = train_dir + "\\train\\" + class_names[label] + "\\"

    # loop over the images in the dataset
    for index, image_path in enumerate(image_paths[i:j], start=1):
        original_path   = image_path
        image_path      = image_path.split("\\")
        image_file_name = str(index) + ".png"
        os.rename(original_path, cur_path + image_file_name)

    i += 16
    j += 16
    label += 1

In [5]:
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import mahotas
import cv2
import os
import h5py


images_per_class = 16
fixed_size = tuple((500, 500))
train_path = "dataset/train"
h5_data = 'output/data.h5'
h5_labels = 'output/labels.h5'
bins = 8

# feature-descriptor-1: Hu Moments
def fd_hu_moments(image):
    image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    feature = cv2.HuMoments(cv2.moments(image)).flatten()
    return feature

# feature-descriptor-2: Haralick Texture
def fd_haralick(image):
    # convert the image to grayscale
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    haralick = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return haralick

# feature-descriptor-3: Color Histogram
def fd_histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv2.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv2.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

# get the training labels
train_labels = os.listdir(train_path)

# sort the training labels
train_labels.sort()
print(len(train_labels))

# empty lists to hold feature vectors and labels
global_features = []
labels          = []

# loop over the training data sub-folders
for training_name in train_labels:
    # join the training data path and each species training folder
    dir = os.path.join(train_path, training_name)

    # get the current training label
    current_label = training_name

    # loop over the images in each sub-folder
    for x in range(1,images_per_class+1):
        # get the image file name
        file = dir + "/" + str(x) + ".jpg"

        # read the image and resize it to a fixed-size
        image = cv2.imread(file)
        image = cv2.resize(image, fixed_size)

        ####################################
        # Global Feature extraction
        ####################################
        fv_hu_moments = fd_hu_moments(image)
        fv_haralick   = fd_haralick(image)
        fv_histogram  = fd_histogram(image)

        ###################################
        # Concatenate global features
        ###################################
        global_feature = np.hstack([fv_histogram, fv_haralick, fv_hu_moments])

        # update the list of labels and feature vectors
        labels.append(current_label)
        global_features.append(global_feature)

    print("[STATUS] processed folder: {}".format(current_label))

print("[STATUS] completed Global Feature Extraction...")

# get the overall feature vector size
print("[STATUS] feature vector size {}".format(np.array(global_features).shape))

# get the overall training label size
print("[STATUS] training Labels {}".format(np.array(labels).shape))

# encode the target labels
targetNames = np.unique(labels)
le          = LabelEncoder()
target      = le.fit_transform(labels)
print("[STATUS] training labels encoded...")

# scale features in the range (0-1)
scaler            = MinMaxScaler(feature_range=(0, 1))
rescaled_features = scaler.fit_transform(global_features)
print("[STATUS] feature vector normalized...")

print("[STATUS] target labels: {}".format(target))
print("[STATUS] target labels shape: {}".format(target.shape))

# save the feature vector using HDF5
h5f_data = h5py.File(h5_data, 'w')
h5f_data.create_dataset('dataset_1', data=np.array(rescaled_features))

h5f_label = h5py.File(h5_labels, 'w')
h5f_label.create_dataset('dataset_1', data=np.array(target))

h5f_data.close()
h5f_label.close()

print("[STATUS] end of training..")

70


error: OpenCV(3.4.2) C:\Miniconda3\conda-bld\opencv-suite_1533764349611\work\modules\imgproc\src\resize.cpp:4044: error: (-215:Assertion failed) !ssize.empty() in function 'cv::resize'
