In [1]:
import pandas as pd
import numpy as np
from os import listdir
import cv2 as cv
import mahotas

In [2]:
def hu_moments(image):
    image = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    feature = cv.HuMoments(cv.moments(image)).flatten()
    return feature

In [3]:
size = tuple((500,500))
bins=8

In [4]:
def histogram(image, mask=None):
    # convert the image to HSV color-space
    image = cv.cvtColor(image, cv.COLOR_BGR2HSV)
    # compute the color histogram
    hist  = cv.calcHist([image], [0, 1, 2], None, [bins, bins, bins], [0, 256, 0, 256, 0, 256])
    # normalize the histogram
    cv.normalize(hist, hist)
    # return the histogram
    return hist.flatten()

In [5]:
def haralick(image):
    # convert the image to grayscale
    gray = cv.cvtColor(image, cv.COLOR_BGR2GRAY)
    # compute the haralick texture feature vector
    features = mahotas.features.haralick(gray).mean(axis=0)
    # return the result
    return features

In [6]:
files = listdir('.\images')
len(files)

58674

In [7]:
%%time

ids = []
features = []

for file in files:
    i = file.split('.')[0]
    
    path = '.\images\{}'.format(file)
    image = cv.imread(path)
    image = cv.resize(image, size)
    
    hu = hu_moments(image)
    hist = histogram(image)
    hara = haralick(image)
    f = np.hstack([hist, hara, hu])
    
    ids.append(i)
    features.append(f)
    

Wall time: 29min 39s


In [9]:
image_features = pd.DataFrame(features)

In [10]:
image_features['id'] = ids
image_features.set_index('id', inplace=True)

In [13]:
image_features

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,522,523,524,525,526,527,528,529,530,531
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
10706426,0.957029,0.288230,0.011170,0.008057,0.012096,0.014959,0.018216,0.011005,0.000027,0.000623,...,1.834687,-0.574360,0.987342,0.002907,1.441842e-06,4.737565e-11,1.497445e-11,3.846839e-22,1.502156e-14,1.053346e-22
10863511,0.963263,0.012621,0.048110,0.056599,0.069344,0.107403,0.211890,0.072149,0.000000,0.000000,...,3.010600,-0.396199,0.976560,0.001029,2.948345e-07,9.484893e-12,9.297789e-12,8.614734e-23,5.047015e-15,1.422804e-23
11033684,0.000421,0.000859,0.000991,0.002222,0.005560,0.012863,0.295580,0.058473,0.000372,0.000760,...,2.900890,-0.538350,0.999579,0.001084,2.649234e-09,8.989013e-13,2.325992e-12,-2.839506e-24,-9.623192e-17,-1.802536e-24
12058471,0.970883,0.006486,0.009539,0.015721,0.024059,0.052220,0.174772,0.090319,0.000000,0.000025,...,2.836400,-0.403529,0.974306,0.000980,3.504771e-07,1.907410e-13,1.499406e-14,-3.786906e-28,3.661071e-18,-7.068093e-28
12173580,0.992922,0.002807,0.003575,0.008456,0.009727,0.007960,0.006452,0.011703,0.000649,0.001173,...,2.875850,-0.436744,0.986489,0.002249,2.122829e-07,2.855990e-09,4.020739e-10,-7.075602e-20,-4.439564e-14,4.250114e-19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20299845,0.001330,0.003890,0.006787,0.008596,0.013565,0.018868,0.018731,0.999317,0.000000,0.000000,...,2.052563,-0.405417,0.935408,0.000744,6.643839e-11,4.130001e-15,9.703422e-15,6.129200e-29,5.375354e-20,4.077063e-30
20299849,0.117906,0.073931,0.018120,0.015890,0.014584,0.015668,0.021821,0.989469,0.000555,0.000026,...,1.947205,-0.495653,0.964876,0.000904,9.417040e-10,2.870216e-14,7.312821e-14,-2.293503e-27,-1.966874e-18,-2.442214e-27
20299854,0.060384,0.005408,0.007458,0.009939,0.011757,0.017573,0.020179,0.997624,0.000019,0.000046,...,1.313943,-0.539461,0.920285,0.000777,8.781647e-11,4.039044e-15,2.653519e-14,-1.831924e-28,1.675712e-19,-2.047083e-28
20299872,0.001365,0.041610,0.028700,0.005162,0.005727,0.006580,0.005950,0.998652,0.000000,0.000000,...,1.363753,-0.464009,0.860065,0.000719,1.374298e-10,9.327737e-14,1.145232e-13,-9.168439e-27,-1.209842e-18,7.486368e-27


In [12]:
image_features.to_csv('image_features.csv')