In [1]:
import os
import cv2
import numpy as np
import imutils
import pickle
from skimage.feature import hog

In [2]:
airplane_datapath = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/trainDataAirplane/"
cat_datapath = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/trainDataCat/"
ImageDir = [airplane_datapath, cat_datapath]

The Below Process successfully creates the data set for the features:

1. Standarized (Normalized) Image
2. Image with Edge activation pixel
3. Image with HOG Features (One Filter Type)
4. To do ..... (HOG with multple filters)
    
The below code is generalized to store the features as ndarrays where,

--> The number of rows are the number of images,
--> The number of columns are the flattened feature set.

The dataset is compressed into pickle files and stored in their respective directories


In [5]:
def featureStandarize(image_pxlvals):
    return(image_pxlvals - 255.0/2)/255.0


# We choose 18 orientation for the object recognition task
class HOG:
    def __init__(self, featureParams): 
        self.orienations = featureParams['orientations']
        self.pixelsPerCell = featureParams['pixelsPerCell']
        self.cellsPerBlock = featureParams['cellsPerBlock']
        self.block_norm = featureParams['block_norm']
        self.visualise = featureParams['visualise']
        self.transform_sqrt = featureParams['transform_sqrt']

    def describe(self, image):
        # Use transform_sqrt for Power law Compression before processing the image to increase the accuracy
        # Use visualise to return the image of the histogram
        if self.visualise:
            hist, hog_image = hog(image,
                                orientations = self.orienations,
                                pixels_per_cell = self.pixelsPerCell,
                                cells_per_block = self.cellsPerBlock,
                                visualise= self.visualise,
                                transform_sqrt = self.transform_sqrt)
            return hist, hog_image
        else:
            hog_image = hog(image,
                                orientations = self.orienations,
                                pixels_per_cell = self.pixelsPerCell,
                                cells_per_block = self.cellsPerBlock,
                                transform_sqrt = self.transform_sqrt)
            return hog_image


def featureExtraction(pathTo_images, filenameArr, imageSize=32, mimNumImage=None, numChannels=3):
    datasetSTD = np.ndarray(shape=(len(filenameArr), imageSize, imageSize), dtype=np.float32)
    datasetEDG = np.ndarray(shape=(len(filenameArr), imageSize, imageSize), dtype=np.float32)
    datasetHOGp1 = []
    datasetHOGp2 = []
    datasetHOGp3 = []
#     datasetEDG = np.ndarray(shape=(len(filenameArr), imageSize, imageSize), dtype=np.float32)
    
    featureParams1 = dict(orientations = 18, pixelsPerCell = (9, 9), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
    featureParams2 = dict(orientations = 18, pixelsPerCell = (4, 7), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
    
    featureParams3_1 = dict(orientations = 18, pixelsPerCell = (3, 6), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
    featureParams3_2 = dict(orientations = 18, pixelsPerCell = (6, 3), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
    
    obj_HOG_p1 = HOG(featureParams1)
    obj_HOG_p2 = HOG(featureParams2)
    obj_HOG_p3_1 = HOG(featureParams3_1)
    obj_HOG_p3_2 = HOG(featureParams3_2)
    
    for numImage, image in enumerate(filenameArr):
        imagePath = os.path.join(pathTo_images, image)
#         print (numImage)
        try:
            # Get the Image
#             print ('The input image path is ', imagePath)
            img = cv2.imread(imagePath)
            
            # Convert the Image into Gray Scale
            imgGS = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) 

            # Get normalized image
            imgSTD = featureStandarize(imgGS)

            # Blurr the Gray Scale Image using a Gaussian Blurr
            imgBLR = cv2.GaussianBlur(imgGS, (3,3), 0)                 # The filer size is chosen to be 3 and the standard deviation for the distribution is 0
                      
            # Detect Edges using Canny Filter
            imgEDG = cv2.Canny(imgBLR, 30, 150)                        # The minimum threshold value chosen is 60 and the maximum threshold chosen is 150
            
            # Find the HOG features corresponding the parameter setting 1
            imgHOGp1 = obj_HOG_p1.describe(imgGS)                      # We collect the HOG image pertaining to the first parameter settings
            
            # Find the HOG features corresponding the parameter setting 2
            imgHOGp2 = obj_HOG_p2.describe(imgGS)                      # We collect the HOG image pertaining to the first parameter settings
            
            # MultiHOG kernels stack together, the HOG features corresponding the parameter setting 3
            imgHOGp3_1 = obj_HOG_p3_1.describe(imgGS)                      # We collect the HOG image pertaining to the first parameter settings
            imgHOGp3_2 = obj_HOG_p3_2.describe(imgGS)
            
            
            datasetSTD[numImage, :] = imgSTD
            datasetEDG[numImage, :] = imgEDG
            datasetHOGp1.append(imgHOGp1)
            datasetHOGp2.append(imgHOGp2)
            datasetHOGp3.append(imgHOGp3_1+imgHOGp3_2)
            
            
            if numImage%1000 == 0:
                print ('For image number: ', numImage)
                print ('Count of Non Zero pixels entries in img are: ', len(np.where(np.reshape(img, 32*32*3)!=0)[0]))
                print ('Count of Non Zero pixels entries in imgGS are: ', len(np.where(np.reshape(imgGS, 32*32)!=0)[0]))
                print ('Count of Non Zero pixels entries in imgBLR are: ', len(np.where(np.reshape(imgBLR, 32*32)!=0)[0]))  
                print ('Count of Non Zero pixels entries in imgEDG are: ', len(np.where(np.reshape(imgEDG, 32*32)!=0)[0])) 
                print ('Count of Non Zero pixels entries in imgHOGp1 are: ', len(np.where(imgHOGp1!=0)[0]))
                print ('Count of Non Zero pixels entries in imgHOGp2 are: ', len(np.where(imgHOGp2!=0)[0]))
                print ('Count of Non Zero pixels entries in imgHOGp3_1 are: ', len(np.where(imgHOGp3_1!=0)[0]))
                print ('Count of Non Zero pixels entries in imgHOGp3_2 are: ', len(np.where(imgHOGp3_2!=0)[0]))
                
                print ('')
                
        
        except IOError as e:
            print('Could not read:', image, ':', e, '- hence skipping.')
    
    return (datasetSTD.reshape((-1,imageSize*imageSize)), 
            datasetEDG.reshape((-1,imageSize*imageSize)), 
            np.array(datasetHOGp1), 
            np.array(datasetHOGp2), 
            np.array(datasetHOGp3))

In [7]:
featureSTDPath = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/STD/"
featureEDGPath = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/EDG/"
featureHOGp1Path = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp1/"
featureHOGp2Path = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp2/"
featureHOGp3Path = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp3/"

DataDir = [featureSTDPath, featureEDGPath, featureHOGp1Path, featureHOGp2Path, featureHOGp3Path]

def main(forceDump=None):
    for image_dir in ImageDir:
        objectName = os.path.basename(os.path.normpath(image_dir))
        filenameArr =  os.listdir(image_dir)
        print ('')
        print ('The current image directory is: ', image_dir)
        (datasetSTD, datasetEDG, datasetHOGp1, datasetHOGp2, datasetHOGp3) = featureExtraction(image_dir, filenameArr)
        print ('Standarized Feature DataSet: shape = ', datasetSTD.shape)
        print ('Edge Feature DataSet: shape = ', datasetEDG.shape)
        print ('HOG param1 Feature DataSet: shape = ', datasetHOGp1.shape)
        print ('HOG param2 Feature DataSet: shape = ', datasetHOGp2.shape)
        print ('HOG param3 Feature DataSet: shape = ', datasetHOGp3.shape)

        for data_dir in DataDir:        
            if not os.path.exists(data_dir):
                os.makedirs(data_dir)
                
            featureType = os.path.basename(os.path.normpath(data_dir))    
            fileName = data_dir+objectName+".pickle"

            # DUMP PICKLE FILES
            if os.path.exists(fileName) and not forceDump:
                print ('The path already exists, you should force the dump')
            else:
                try:
                    with open(fileName, 'wb') as f:
                        if featureType=='STD':
                            print ('Storing data for STD Feature set')
                            pickle.dump(datasetSTD, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='EDG':
                            print ('Storing data for EDGE Feature set')
                            pickle.dump(datasetEDG, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp1':
                            print ('Storing data for HOG p1 Feature set')
                            pickle.dump(datasetHOGp1, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp2':
                            print ('Storing data for HOG p2 Feature set')
                            pickle.dump(datasetHOGp2, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp3':
                            print ('Storing data for HOG p3 Feature set')
                            pickle.dump(datasetHOGp3, f, pickle.HIGHEST_PROTOCOL)
                except Exception as e:
                    print('Unable to save data to', fileName1, ':', e)
                    
    
main(forceDump=1)


The current image directory is:  /Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/trainDataAirplane/
For image number:  0
Count of Non Zero pixels entries in img are:  3072
Count of Non Zero pixels entries in imgGS are:  1024
Count of Non Zero pixels entries in imgBLR are:  1024
Count of Non Zero pixels entries in imgEDG are:  95
Count of Non Zero pixels entries in imgHOGp1 are:  106
Count of Non Zero pixels entries in imgHOGp2 are:  249
Count of Non Zero pixels entries in imgHOGp3_1 are:  261
Count of Non Zero pixels entries in imgHOGp3_2 are:  269

For image number:  1000
Count of Non Zero pixels entries in img are:  3068
Count of Non Zero pixels entries in imgGS are:  1023
Count of Non Zero pixels entries in imgBLR are:  1024
Count of Non Zero pixels entries in imgEDG are:  215
Count of Non Zero pixels entries in imgHOGp1 are:  133
Count of Non Zero pixels entries in imgHOGp2 are:  328
Count of Non Zero pixels entries in imgHOGp3_1 are:  356
Count of Non Zero pixels en

--> The Below code calls the class CreateBatches. Class CreateBatches 

1. Gets the pickled feature data from the directory, 
2. ranandomize the data
3. Divides the into train and test dataset. (In this case we dont code for test data as we have the test data as a different test file)
4. Finally, the training data is converted into 10 folds and stored into the respective directory.

In [8]:
import sys,os

# Get the current working directory
cwd = os.getcwd()
mydir = os.path.abspath(os.path.join(cwd, ".."))
sys.path.append(mydir)

from DataPreparation import CreateBatches

In [9]:
#########################
# Standarized Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Strandarized Feature set of Images
imageDim = 32*32
numBatches =10
maxNumImage = 5000
test_percntg = 0

root_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/STD/"
batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/STD/batchData/"

obj_STD = CreateBatches(dimensions=imageDim)
trainData, trainLabels, _, _, labelDict = obj_STD.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=root_dir, test_percntg=test_percntg)

# print (labelDict)
for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_STD.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_STD.dumpBatches(batch_dir, trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)


seed use for randomness is :  8653
The training Data set size is :  (10000, 1024)
The training Labels size is :  (10000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1000, 1024)
Batch No:  0  : Training Batch Labels Shape : (1000,)
Batch No:  1  : Training Batch Data Shape: (1000, 1024)
Batch No:  1  : Training Batch Labels Shape : (1000,)
Batch No:  2  : Training Batch Data Shape: (1000, 1024)
Batch No:  2  : Training Batch Labels Shape : (1000,)
Batch No:  3  : Training Batch Data Shape: (1000, 1024)
Batch No:  3  : Training Batch Labels Shape : (1000,)
Batch No:  4  : Training Batch Data Shape: (1000, 1024)
Batch No:  4  : Training Batch Labels Shape : (1000,)
Batch No:  5  : Training Batch Data Shape: (1000, 1024)
Batch No:  5  : Training Batch Labels Shape : (1000,)
Batch No:  6  : Training Batch Data Shape: (1000, 1024)
Batch No:  6  : Training Batch Labels Shape : (1000,)
Batch No:  7  : Training Batch Data S

In [10]:
#########################
# Edge Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Edge Feature set of Images
imageDim=32*32
numBatches =10
maxNumImage = 5000
test_percntg=0

root_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/EDG/"
batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/EDG/batchData/"

obj_EDG = CreateBatches(dimensions=imageDim)
trainData, trainLabels, _, _, labelDict = obj_EDG.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=root_dir, test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_EDG.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_EDG.dumpBatches(batch_dir, trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (10000, 1024)
The training Labels size is :  (10000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1000, 1024)
Batch No:  0  : Training Batch Labels Shape : (1000,)
Batch No:  1  : Training Batch Data Shape: (1000, 1024)
Batch No:  1  : Training Batch Labels Shape : (1000,)
Batch No:  2  : Training Batch Data Shape: (1000, 1024)
Batch No:  2  : Training Batch Labels Shape : (1000,)
Batch No:  3  : Training Batch Data Shape: (1000, 1024)
Batch No:  3  : Training Batch Labels Shape : (1000,)
Batch No:  4  : Training Batch Data Shape: (1000, 1024)
Batch No:  4  : Training Batch Labels Shape : (1000,)
Batch No:  5  : Training Batch Data Shape: (1000, 1024)
Batch No:  5  : Training Batch Labels Shape : (1000,)
Batch No:  6  : Training Batch Data Shape: (1000, 1024)
Batch No:  6  : Training Batch Labels Shape : (1000,)
Batch No:  7  : Training Batch Data S

In [12]:
#########################
# HOG p1 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
imageDim=162
numBatches =10
maxNumImage = 5000
test_percntg=0

root_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp1/"
batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp1/batchData/"

obj_HOGp1 = CreateBatches(dimensions=imageDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp1.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=root_dir, test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp1.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp1.dumpBatches(batch_dir, trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (10000, 162)
The training Labels size is :  (10000,)
The test Data set size is :  (0, 162)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1000, 162)
Batch No:  0  : Training Batch Labels Shape : (1000,)
Batch No:  1  : Training Batch Data Shape: (1000, 162)
Batch No:  1  : Training Batch Labels Shape : (1000,)
Batch No:  2  : Training Batch Data Shape: (1000, 162)
Batch No:  2  : Training Batch Labels Shape : (1000,)
Batch No:  3  : Training Batch Data Shape: (1000, 162)
Batch No:  3  : Training Batch Labels Shape : (1000,)
Batch No:  4  : Training Batch Data Shape: (1000, 162)
Batch No:  4  : Training Batch Labels Shape : (1000,)
Batch No:  5  : Training Batch Data Shape: (1000, 162)
Batch No:  5  : Training Batch Labels Shape : (1000,)
Batch No:  6  : Training Batch Data Shape: (1000, 162)
Batch No:  6  : Training Batch Labels Shape : (1000,)
Batch No:  7  : Training Batch Data Shape: (10

In [13]:
#########################
# HOG p2 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
imageDim=576
numBatches =10
maxNumImage = 5000
test_percntg=0

root_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp2/"
batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp2/batchData/"

obj_HOGp2 = CreateBatches(dimensions=imageDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp2.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=root_dir, test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp2.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp2.dumpBatches(batch_dir, trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (10000, 576)
The training Labels size is :  (10000,)
The test Data set size is :  (0, 576)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1000, 576)
Batch No:  0  : Training Batch Labels Shape : (1000,)
Batch No:  1  : Training Batch Data Shape: (1000, 576)
Batch No:  1  : Training Batch Labels Shape : (1000,)
Batch No:  2  : Training Batch Data Shape: (1000, 576)
Batch No:  2  : Training Batch Labels Shape : (1000,)
Batch No:  3  : Training Batch Data Shape: (1000, 576)
Batch No:  3  : Training Batch Labels Shape : (1000,)
Batch No:  4  : Training Batch Data Shape: (1000, 576)
Batch No:  4  : Training Batch Labels Shape : (1000,)
Batch No:  5  : Training Batch Data Shape: (1000, 576)
Batch No:  5  : Training Batch Labels Shape : (1000,)
Batch No:  6  : Training Batch Data Shape: (1000, 576)
Batch No:  6  : Training Batch Labels Shape : (1000,)
Batch No:  7  : Training Batch Data Shape: (10

In [14]:
#########################
# HOG p3 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
imageDim=900
numBatches =10
maxNumImage = 5000
test_percntg=0

root_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp3/"
batch_dir = "/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp3/batchData/"

obj_HOGp3 = CreateBatches(dimensions=imageDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp3.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=root_dir, test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp3.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp3.dumpBatches(batch_dir, trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

FileNotFoundError: [Errno 2] No such file or directory: '/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/Model-Sardhendu/HOGp3/'