In [5]:
from __future__ import division
from __future__ import print_function
import os
import cv2
import numpy as np
import imutils
import pickle
import random
from skimage.feature import hog
from sklearn.decomposition import PCA

# Preprocessing and Feature Extraction: 
---------
a> Mean Normalized image: For an object recongnition task or for other preprocessing task such as PCA to work well, We require that (i) The features have to be approximately zero mean, and (ii) The different features have similar variances to each other. With natural images like CIFAR-10, (ii) is already satisfied even without variance normalization, and so we won’t perform any variance normalization. However we would do mean normalization. The function that does it is: "featureaMeanNorm".
   
b> PCA: Principal component analysis a.k.a PCA is a dimensional reduction technique that uses concepts from singular value decomposition for rectangular matrices. It attempts to find the eigen vectors (basis of the original matrix) that captures the highest variation in each dimension. The resultant eigen vector matrix (U) is a orthogonal matrix with where the column vectors are orthogonal to each other. The new transformed matrix ("x_rot") can be obtained by doing (U_transpose.x). Dimensionality reduction can be obtained by taking the first few dimension from the new matrix "x_rot". For an image recognition task we, according to heuristics, it is wise to collect eigen vectors that explains 99% of the variance. This can be done using the eigen values.

c> Whitening with PCA: Whitening is a smoothing concept. Adjecent pixels in an image are highly correlated. For a image recognition task providing the raw image as an input to the learning model is redundant because adjecent pixels are correlated. The goal of whitening is to make the input less redundant. It has two motive 1) The features are less co-reallated and 2) All the features have the same variance.

   1. The input features are made uncorrelated using PCA.
   2. The features are made to have unit variance by rescalling the features by divinding each element in the feature by the squareroot of the corresponding eigen value. This can be thought of as : The larger the eigen value (the more variance is captured by that feature vetor or eigen vector) the larger is the denominator the more the penalty.
   
   The formula looks like : x_PCAwhite(i) = x_rot(i) / square_root(λ(i)), where λ(i) is the eigen value for the ith vector in the new matrix (x_rot)
   

d> ZCA whitening: Unlike PCA_whitening, here we try keeping all the n dimension of the data, we dont reduce the dimension. ZCA transformation sometimes also called as "Mahalanobis transformation" is that it results in whitened data (x_ZCAwhite) that is as close as possible to the original data. The steps included here are.

   1. Get the new transformed matrix:  x_rot =  U_transpose.x, where U is the eigen vector matrix.
   2. Get the PCA whitened matrix: x_PCAwhite(i) = x_rot(i) / square_root(λ(i) + e), where e is epsilon, a small smoothing factor to identify edges.
   1. Get the ZCA whitened matrix by retransforming the x_PCAwhite(i) as : x_ZCAwhite = U.x_PCAwhite where U is the same eigen vector matrix.
 
e> Image Blurr and Edges:

f> HOG: Histogram of oriented gradient captures the gradient magnitude and their orientation. Given a filter size, the filter slides throught the image and stores the magnitude of the gradients for the image (mostly edges in the image will have a high gradient shift) into respective bins of provided orientation. The sum of magnitude for each orientation per filter are the new HOG feature space. Here we use several filters with shape (2,2), (4,4), (5,5), (6,6) and stack them on top on one another and build a redundant feature space.

g> RGB: Used for convolutional features (Implemented using Tensorflow)

source: http://ufldl.stanford.edu/tutorial/unsupervised/PCAWhitening/


In [6]:
epsilon_noise = 0.1


def featureStandarize(image_pxlvals):  # Preferable more for non-natural image like ()
    return(image_pxlvals - 255.0/2)/255.0

def featureaMeanNorm(image):           # A Preferable option for object recognition for CIFAR-10
    '''
        In object recognition task, the brightness or contrast of the pixels dont matter when trying to identify an object. 
        More formally, we are not interested in the mean intensity value of an image patch. Thus given a grayscale image we subtract
        out this mean intensity of each feature (column) of the image from all the feature pixels as part of mean normalization.
        np.mean(image, axis=0) --> The mean intensity vector corresponding to each feature column.
    '''
    return (image - np.mean(image, axis=0))



def ZCA_whiteness(img):
    img = featureaMeanNorm(img)
    sigma_mat = np.dot(img,np.transpose(img))/img.shape[1]
    u,s,_ = np.linalg.svd(sigma_mat)
    img_rot = np.dot(np.transpose(u), img)
#         x_reduced = np.dot(np.transpose(u[:,0:2]), x)#x_rot[:,0:2]
    denominator = np.diag(1/pow(s+epsilon_noise,0.5))
    PCA_whitening = np.dot(denominator, img_rot)
    ZCA_whitening = np.dot(u, PCA_whitening)
    return ZCA_whitening


# We choose 18 orientation for the object recognition task
class HOG:
    def __init__(self, featureParams): 
        self.orienations = featureParams['orientations']
        self.pixelsPerCell = featureParams['pixelsPerCell']
        self.cellsPerBlock = featureParams['cellsPerBlock']
        self.block_norm = featureParams['block_norm']
        self.visualise = featureParams['visualise']
        self.transform_sqrt = featureParams['transform_sqrt']

    def describe(self, image):
        # Use transform_sqrt for Power law Compression before processing the image to increase the accuracy
        # Use visualise to return the image of the histogram
        if self.visualise:
            hist, hog_image = hog(image,
                                orientations = self.orienations,
                                pixels_per_cell = self.pixelsPerCell,
                                cells_per_block = self.cellsPerBlock,
                                visualise= self.visualise,
                                transform_sqrt = self.transform_sqrt)
            return hist, hog_image
        else:
            hog_image = hog(image,
                                orientations = self.orienations,
                                pixels_per_cell = self.pixelsPerCell,
                                cells_per_block = self.cellsPerBlock,
                                transform_sqrt = self.transform_sqrt)
            return hog_image

In [7]:
class FeatureExtraction():
    
    def __init__(self, augmentBY=None):        
        featureParams1 = dict(orientations = 18, pixelsPerCell = (6, 6), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
        featureParams2 = dict(orientations = 9, pixelsPerCell = (4, 4), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)

        featureParams3_1 = dict(orientations = 9, pixelsPerCell = (2, 2), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
        featureParams3_2 = dict(orientations = 9, pixelsPerCell = (4, 4), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
        featureParams3_3 = dict(orientations = 9, pixelsPerCell = (6, 6), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)
        featureParams3_4 = dict(orientations = 9, pixelsPerCell = (8, 8), cellsPerBlock = (1, 1), block_norm = 'L1', visualise = False, transform_sqrt = True)


        self.obj_HOG_p1 = HOG(featureParams1)
        self.obj_HOG_p2 = HOG(featureParams2)
        self.obj_HOG_p3_1 = HOG(featureParams3_1)
        self.obj_HOG_p3_2 = HOG(featureParams3_2)
        self.obj_HOG_p3_3 = HOG(featureParams3_3)
        self.obj_HOG_p3_4 = HOG(featureParams3_4)
        
        self.augmentBY = augmentBY
        
#         if self.augmentBY:
#             seed = 462
# #             randNumbers = random.sample(range(0, 4999), int(np.ceil(5000*(augmentBY/100))))
#             randNumbers = np.random.randint(0,5000, int(np.ceil(5000*(augmentBY/100))))    # generate 3000 random numbers between 0 and 4900
#             print ('Augmenting the data -> randNumber statistics: ' + 
#                    'min = %s ,'%np.min(randNumbers) + 
#                    'max = %s ,'%np.max(randNumbers) +
#                    'Augmenting by = %s ,'%len(np.unique(randNumbers)))
    
    
    def featureExtractor (self, imagePath, flip=None):
        # Fetch the image into matrix form
        img = cv2.imread(imagePath)
        
        # whether you want to flip the image horizontally or not:
        if flip:
#             print ('You have requested to flip the image, hence expect the outcome of all feature extractor to be flipped Horizontally')
            img = cv2.flip(img,1)
        
        # Note tensor flow Fetches image in BGR format, hence converting it into RGB
        imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        
        # Convert the Image into Gray Scale
        imgGRAY = cv2.cvtColor(imgRGB, cv2.COLOR_RGB2GRAY) 

        # Get the ZCA whitened feature set:
        imgZCA = ZCA_whiteness(imgGRAY)

        # Get normalized image
        imgSTD = featureStandarize(imgGRAY)

        # Blurr the Gray Scale Image using a Gaussian Blurr
        imgBLR = cv2.GaussianBlur(imgGRAY, (3,3), 0)                 # The filer size is chosen to be 3 and the standard deviation for the distribution is 0

        # Detect Edges using Canny Filter
        imgEDG = cv2.Canny(imgBLR, 30, 150)                        # The minimum threshold value chosen is 60 and the maximum threshold chosen is 150

        # Find the HOG features corresponding the parameter setting 1
        imgHOGp1 = self.obj_HOG_p1.describe(imgGRAY)                 # We collect the HOG image pertaining to the first parameter settings

        # Find the HOG features corresponding the parameter setting 2
        imgHOGp2 = self.obj_HOG_p2.describe(imgGRAY)                 # We collect the HOG image pertaining to the first parameter settings

        # MultiHOG kernels stack together, the HOG features corresponding the parameter setting 3
        imgHOGp3_1 = self.obj_HOG_p3_1.describe(imgGRAY)             # We collect the HOG image pertaining to the first parameter settings
        imgHOGp3_2 = self.obj_HOG_p3_2.describe(imgGRAY)
        imgHOGp3_3 = self.obj_HOG_p3_3.describe(imgGRAY)
        imgHOGp3_4 = self.obj_HOG_p3_4.describe(imgGRAY)

        return (imgRGB, imgGRAY, imgZCA, imgSTD, imgBLR, imgEDG, imgHOGp1, imgHOGp2, imgHOGp3_1, imgHOGp3_2, imgHOGp3_3, imgHOGp3_4)



    # This features Extraction is performed on augmented DataSet
    def featureMatrixBuilder(self, pathTo_images, filenameArr, imageSize=32, mimNumImage=None, numChannels=3):
        
        datasetRGB = np.ndarray(shape=(len(filenameArr), imageSize, imageSize, numChannels), dtype='uint8')
        datasetZCA = []
        datasetSTD = []
        datasetGRAY = []
        datasetEDG = []
        datasetHOGp1 = []
        datasetHOGp2 = []
        datasetHOGp3 = []
        datasetHOGp4 = []

        i = 0
        for numImage, image in enumerate(filenameArr):
            imagePath = os.path.join(pathTo_images, image)

    #         print (numImage)
            try:
            
                (imgRGB, imgGRAY, imgZCA, imgSTD, imgBLR, imgEDG, imgHOGp1, 
                 imgHOGp2, imgHOGp3_1, imgHOGp3_2, imgHOGp3_3, imgHOGp3_4) = self.featureExtractor(imagePath)
                
                datasetRGB[numImage,:] = imgRGB
                datasetZCA.append(imgZCA.reshape((imageSize*imageSize)))
                datasetSTD.append(imgSTD.reshape((imageSize*imageSize)))
                datasetGRAY.append(imgGRAY.reshape((imageSize*imageSize)))
                datasetEDG.append(imgEDG.reshape((imageSize*imageSize)))
                datasetHOGp1.append(imgHOGp1)
                datasetHOGp2.append(imgHOGp2)
                datasetHOGp3.append(np.hstack((imgHOGp3_1,imgHOGp3_2,imgHOGp3_3)))
                datasetHOGp4.append(np.hstack((imgHOGp3_1,imgHOGp3_2,imgHOGp3_3,imgHOGp3_4)))

                if self.augmentBY:
#                     if numImage in randNumbers:
                    i += 1
                    (imgRGB, imgGRAY, imgZCA, imgSTD, imgBLR, imgEDG, imgHOGp1, imgHOGp2, 
                     imgHOGp3_1, imgHOGp3_2, imgHOGp3_3, imgHOGp3_4) = self.featureExtractor(imagePath, flip=True)

                    datasetZCA.append(imgZCA.reshape((imageSize*imageSize)))
                    datasetSTD.append(imgSTD.reshape((imageSize*imageSize)))
                    datasetGRAY.append(imgGRAY.reshape((imageSize*imageSize)))
                    datasetEDG.append(imgEDG.reshape((imageSize*imageSize)))
                    datasetHOGp1.append(imgHOGp1)
                    datasetHOGp2.append(imgHOGp2)
                    datasetHOGp3.append(np.hstack((imgHOGp3_1,imgHOGp3_2,imgHOGp3_3)))
                    datasetHOGp4.append(np.hstack((imgHOGp3_1,imgHOGp3_2,imgHOGp3_3,imgHOGp3_4)))


            except IOError as e:
                print('Could not read:', image, ':', e, '- hence skipping.')
                
        print ('Augmented the datase by %s flipped images ', i)
        return (datasetRGB.reshape((-1,imageSize*imageSize*numChannels)),
                np.array(datasetZCA),
                np.array(datasetSTD), 
                np.array(datasetGRAY),
                np.array(datasetEDG), 
                np.array(datasetHOGp1), 
                np.array(datasetHOGp2), 
                np.array(datasetHOGp3),
                np.array(datasetHOGp4))

# Feature Matrix Generator:
-------

1. Loop through all the images, augments the data if needed.
2. Calls class "FeatureExtraction", generates feature matrix
3. Dumps the feature matrix as compressed pickle files in the disk.

In [8]:
# Capture the dimensions of the features

def main(ImageDir, DataDir, augmentBY=None, forceDump=None, reduceDimension=None):
    
    obj_FeatureExtraction = FeatureExtraction(augmentBY=augmentBY)
    
    for image_dir in ImageDir:
        objectName = os.path.basename(os.path.normpath(image_dir))
        filenameArr =  os.listdir(image_dir)
        print ('')
        print ('The current image directory is: ', image_dir)
        print ('The count of images in the directory is: ', len(filenameArr))
        
        (datasetRGB, datasetZCA, 
         datasetSTD, datasetGRAY,
         datasetEDG, datasetHOGp1, 
         datasetHOGp2, datasetHOGp3, datasetHOGp4) = obj_FeatureExtraction.featureMatrixBuilder(image_dir, filenameArr)
        
        if reduceDimension:
            decompose = PCA(n_components=reduceDimension)
            datasetHOGp4 = decompose.fit_transform(datasetHOGp4)
#             print(decompose.explained_variance_ratio_) 

        print ('RGB Feature DataSet: shape = ', datasetRGB.shape)
        print ('ZCA whitened Feature DataSet: shape = ', datasetZCA.shape)
        print ('Standarized Feature DataSet: shape = ', datasetSTD.shape)
        print ('GrayScale Feature DataSet: shape = ', datasetGRAY.shape)
        print ('Edge Feature DataSet: shape = ', datasetEDG.shape)
        print ('HOG param1 Feature DataSet: shape = ', datasetHOGp1.shape)
        print ('HOG param2 Feature DataSet: shape = ', datasetHOGp2.shape)
        print ('HOG param3 Feature DataSet: shape = ', datasetHOGp3.shape)
        print ('HOG param3 Feature DataSet: shape = ', datasetHOGp4.shape)
        
        
        # Store feature diensions
        rgbFeatureDim = datasetRGB.shape[1]
        zcaFeatureDim = datasetZCA.shape[1]
        stdFeatureDim = datasetSTD.shape[1]
        grayFeatureDim = datasetGRAY.shape[1]
        edgFeatureDim = datasetEDG.shape[1]
        hogp1FeatureDim = datasetHOGp1.shape[1]
        hogp2FeatureDim = datasetHOGp2.shape[1]
        hogp3FeatureDim = datasetHOGp3.shape[1]
        hogp4FeatureDim = datasetHOGp4.shape[1]

        for data_dir in DataDir:        
            if not os.path.exists(data_dir):
                os.makedirs(data_dir)
                
            featureType = os.path.basename(os.path.normpath(data_dir))    
            fileName = data_dir+objectName+".pickle"
            # DUMP PICKLE FILES
            if os.path.exists(fileName) and not forceDump:
                print ('The path already exists, you should force the dump')
            else:
                try:
                    with open(fileName, 'wb') as f:
                        if featureType=='RGB':
                            print ('Storing data for RGB Feature set')
                            pickle.dump(datasetRGB, f, pickle.HIGHEST_PROTOCOL)
                        if featureType=='ZCA':
                            print ('Storing data for ZCA Feature set')
                            pickle.dump(datasetZCA, f, pickle.HIGHEST_PROTOCOL)
                        if featureType=='STD':
                            print ('Storing data for STD Feature set')
                            pickle.dump(datasetSTD, f, pickle.HIGHEST_PROTOCOL)
                        if featureType=='GRAY':
                            print ('Storing data for GRAY Feature set')
                            pickle.dump(datasetGRAY, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='EDG':
                            print ('Storing data for EDGE Feature set')
                            pickle.dump(datasetEDG, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp1':
                            print ('Storing data for HOG p1 Feature set')
                            pickle.dump(datasetHOGp1, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp2':
                            print ('Storing data for HOG p2 Feature set')
                            pickle.dump(datasetHOGp2, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp3':
                            print ('Storing data for HOG p3 Feature set')
                            pickle.dump(datasetHOGp3, f, pickle.HIGHEST_PROTOCOL)
                        elif featureType=='HOGp4':
                            print ('Storing data for HOG p4 Feature set')
                            pickle.dump(datasetHOGp4, f, pickle.HIGHEST_PROTOCOL)
                except Exception as e:
                    print('Unable to save data to', fileName1, ':', e)
    return (rgbFeatureDim, zcaFeatureDim, stdFeatureDim, grayFeatureDim, 
            edgFeatureDim, hogp1FeatureDim, hogp2FeatureDim, hogp3FeatureDim, hogp4FeatureDim)

# MAIN - Choose your Features (2-Class or 10-Class)
--------

In [10]:
numClasses = 3
augmentBY = False
base_path = '/Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/'

airplaneDatapath = base_path + "trainDataAirplane/"
autoDatapath = base_path + "trainDataAuto/"
birdDatapath = base_path + "trainDataBird/"
# catDatapath = base_path + "trainDataCat/"
# deerDatapath = base_path + "trainDataDeer/"
# dogDatapath = base_path + "trainDataDog/"
# frogDatapath = base_path + "trainDataFrog/"
# horseDatapath = base_path + "trainDataHorse/"
# shipDatapath = base_path + "trainDataShip/"
# truckDatapath = base_path + "trainDataTruck/"

if numClasses == 10:
    ImageDir = [airplaneDatapath, autoDatapath, birdDatapath, catDatapath, deerDatapath, dogDatapath, frogDatapath, horseDatapath, shipDatapath, truckDatapath]
else:
    ImageDir = [airplaneDatapath, autoDatapath, birdDatapath]

In [11]:
feature_names = ['RGB', 'ZCA', 'GRAY', 'STD', 'EDG', 'HOGp1', 'HOGp2', 'HOGp3', 'HOGp4']
feature_paths = {}
feature_batch_paths = {}

if augmentBY==False:
    maxNumImage = 5000    
    for feature_name in feature_names:
        if numClasses == 2:
            feature_paths[feature_name] = base_path + "featureModels/2-Class/regularFeatures/" + feature_name + '/'
            feature_batch_paths[feature_name] = base_path + "featureModels/2-Class/regularFeatures/" + feature_name + '/batch_data/'
        else:
            feature_paths[feature_name] = base_path + "featureModels/10-Class/regularFeatures/" + feature_name + '/'
            feature_batch_paths[feature_name] = base_path + "featureModels/10-Class/regularFeatures/" + feature_name + '/batch_data/'
elif augmentBY==True:
    maxNumImage = 10000   # When all the images are augmented
    for feature_name in feature_names:
        if numClasses == 2:
            feature_paths[feature_name] = base_path + "featureModels/2-Class/augmentedFeatures/" + feature_name + '/'
            feature_batch_paths[feature_name] = base_path + "featureModels/2-Class/augmentedFeatures/" + feature_name + '/batch_data/'
        else:
            feature_paths[feature_name] = base_path + "featureModels/10-Class/augmentedFeatures/" + feature_name + '/'
            feature_batch_paths[feature_name] = base_path + "featureModels/10-Class/augmentedFeatures/" + feature_name + '/batch_data/' 
else:
    raise ValueError('None type provided for dataset required for Feature Extraction--> Augment or Not')

    
    
DataDir = [feature_paths[feature_name] for feature_name in feature_names]


(rgbFeatureDim, zcaFeatureDim, stdFeatureDim, grayFeatureDim,
 edgFeatureDim, hogp1FeatureDim, hogp2FeatureDim, hogp3FeatureDim, 
 hogp4FeatureDim) = main(ImageDir, DataDir=DataDir, augmentBY=augmentBY, forceDump=True, reduceDimension=None)


The current image directory is:  /Users/sam/All-Program/App-DataSet/Kaggle-Challenges/CIFAR-10/trainDataAirplane/
The count of images in the directory is:  5000
Augmented the datase by %s flipped images  0
RGB Feature DataSet: shape =  (5000, 3072)
ZCA whitened Feature DataSet: shape =  (5000, 1024)
Standarized Feature DataSet: shape =  (5000, 1024)
GrayScale Feature DataSet: shape =  (5000, 1024)
Edge Feature DataSet: shape =  (5000, 1024)
HOG param1 Feature DataSet: shape =  (5000, 450)
HOG param2 Feature DataSet: shape =  (5000, 576)
HOG param3 Feature DataSet: shape =  (5000, 3105)
HOG param3 Feature DataSet: shape =  (5000, 3249)
Storing data for RGB Feature set
Storing data for ZCA Feature set
Storing data for GRAY Feature set
Storing data for STD Feature set
Storing data for EDGE Feature set
Storing data for HOG p1 Feature set
Storing data for HOG p2 Feature set
Storing data for HOG p3 Feature set
Storing data for HOG p4 Feature set

The current image directory is:  /Users/sam/

# n-Fold Batch Generator: 
---------
### The Below code calls the class CreateBatches 

1. Gets the pickled feature data from the directory, 
2. ranandomizes the data
3. Divides the into train and test dataset. (In this case we dont code for test data as we have the test data as a different test file)
4. Finally, the training data is converted into 10 folds and stored into the respective directory.

In [12]:
import sys,os

# Get the current working directory
# cwd = os.getcwd()
# mydir = os.path.abspath(os.path.join(cwd, ".."))
# sys.path.append(mydir)

from DataPreparation import CreateBatches

## RGB Feature :
-------

In [13]:
#########################
# RGB Feature Set
#########################

if augmentBY == False:
    # Create 10 Batches and stores in into the provided Batch Directory for the Strandarized Feature set of Images
    featureDim = rgbFeatureDim
    numBatches =10
    test_percntg = 0

    obj_RGB = CreateBatches(dimensions=featureDim)
    trainData, trainLabels, _, _, labelDict = obj_RGB.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['RGB'], test_percntg=test_percntg)

    # print (labelDict)
    for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_RGB.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
        obj_RGB.dumpBatches(feature_batch_paths['RGB'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 3072)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 3072)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 3072)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 3072)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 3072)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 3072)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 3072)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 3072)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 3072)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## ZCA Whitened Feature:
----

In [14]:
#########################
# ZCA whitened Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Strandarized Feature set of Images
featureDim = zcaFeatureDim
numBatches =10
test_percntg = 0

obj_ZCA = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_ZCA.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['ZCA'], test_percntg=test_percntg)

# print (labelDict)
for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_ZCA.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_ZCA.dumpBatches(feature_batch_paths['ZCA'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 1024)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 1024)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 1024)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 1024)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 1024)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 1024)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 1024)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 1024)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## GRAY SCALE:
----

In [15]:
#########################
# GRAY SCALE Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the GRAY SCALE Feature set of Images
featureDim = 1024
numBatches =10
test_percntg = 0

obj_GRAY = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_GRAY.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['GRAY'], test_percntg=test_percntg)

# print (labelDict)
for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_GRAY.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_GRAY.dumpBatches(feature_batch_paths['GRAY'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 1024)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 1024)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 1024)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 1024)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 1024)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 1024)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 1024)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 1024)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## STD - Standarized Feature:
-----

In [16]:
#########################
# Standarized Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Strandarized Feature set of Images
featureDim = stdFeatureDim
numBatches =10
test_percntg = 0

obj_STD = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_STD.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['STD'], test_percntg=test_percntg)

# print (labelDict)
for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_STD.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_STD.dumpBatches(feature_batch_paths['STD'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 1024)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 1024)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 1024)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 1024)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 1024)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 1024)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 1024)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 1024)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## EDGE Feature:
------

In [17]:
#########################
# Edge Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Edge Feature set of Images
featureDim = edgFeatureDim
numBatches =10
test_percntg=0

obj_EDG = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_EDG.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['EDG'], test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_EDG.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_EDG.dumpBatches(feature_batch_paths['EDG'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 1024)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 1024)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 1024)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 1024)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 1024)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 1024)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 1024)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 1024)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 1024)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## HOG Feature :- One Kernel (6,6)  --> 18 Bins
------

In [18]:
#########################
# HOG p1 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
featureDim=hogp1FeatureDim
numBatches =10
test_percntg=0

obj_HOGp1 = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp1.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['HOGp1'], test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp1.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp1.dumpBatches(feature_batch_paths['HOGp1'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 450)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 450)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 450)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 450)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 450)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 450)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 450)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 450)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 450)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data Shape: (15

## HOG Feature :- One Window (4,4) --> 18 Bins
------

In [19]:
#########################
# HOG p2 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
featureDim=hogp2FeatureDim
numBatches =10
test_percntg=0

obj_HOGp2 = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp2.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['HOGp2'], test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp2.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp2.dumpBatches(feature_batch_paths['HOGp2'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 576)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 576)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 576)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 576)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 576)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 576)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 576)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 576)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 576)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data Shape: (15

## HOG Feature :- Three kernels (2,2),(4,4),(6,6) --> 9 Bins
------

In [20]:
#########################
# HOG p3 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
featureDim=hogp3FeatureDim
numBatches =10
test_percntg=0

obj_HOGp3 = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp3.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['HOGp3'], test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp3.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp3.dumpBatches(feature_batch_paths['HOGp3'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 3105)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 3105)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 3105)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 3105)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 3105)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 3105)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 3105)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 3105)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 3105)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S

## HOG Feature :- Four Kernels (2,2),(4,4),(6,6)(8,8) -> {9 bins}
------

In [21]:
#########################
# HOG p4 Feature Set
#########################

# Create 10 Batches and stores in into the provided Batch Directory for the Hog Feature with first parameter settings for the set of Images
featureDim=hogp4FeatureDim
numBatches =10
test_percntg=0

obj_HOGp4 = CreateBatches(dimensions=featureDim)
trainData, trainLabels, _, _, labelDict = obj_HOGp4.gen_TrainTestData(max_num_images=maxNumImage, dir_to_pickle_files=feature_paths['HOGp4'], test_percntg=test_percntg)

for batchNum, (trnBatchData, trnBatchLabel) in enumerate(obj_HOGp4.generateBatches(dataset=trainData, labels=trainLabels, numBatches=numBatches)):
    obj_HOGp4.dumpBatches(feature_batch_paths['HOGp4'], trnBatchData, trnBatchLabel, batchNum=batchNum, labelDict=labelDict)

seed use for randomness is :  8653
The training Data set size is :  (15000, 3249)
The training Labels size is :  (15000,)
The test Data set size is :  (0, 3249)
The test Labels size is :  (0,)
Batch No:  0  : Training Batch Data Shape: (1500, 3249)
Batch No:  0  : Training Batch Labels Shape : (1500,)
Batch No:  1  : Training Batch Data Shape: (1500, 3249)
Batch No:  1  : Training Batch Labels Shape : (1500,)
Batch No:  2  : Training Batch Data Shape: (1500, 3249)
Batch No:  2  : Training Batch Labels Shape : (1500,)
Batch No:  3  : Training Batch Data Shape: (1500, 3249)
Batch No:  3  : Training Batch Labels Shape : (1500,)
Batch No:  4  : Training Batch Data Shape: (1500, 3249)
Batch No:  4  : Training Batch Labels Shape : (1500,)
Batch No:  5  : Training Batch Data Shape: (1500, 3249)
Batch No:  5  : Training Batch Labels Shape : (1500,)
Batch No:  6  : Training Batch Data Shape: (1500, 3249)
Batch No:  6  : Training Batch Labels Shape : (1500,)
Batch No:  7  : Training Batch Data S