In [1]:
import glob
import numpy as np
from PIL import Image
from scipy.ndimage import laplace, sobel
from scipy.stats import kurtosis, skew

def get_img_feat(img):
    feats = [np.mean(img),np.std(img),np.median(img),np.max(img),np.min(img)]  
    #Laplacian, Sobel, kurtosis and skewness
    feats += [laplace(img, mode='reflect', cval=0.0).ravel().var()] #blurr
    sobel0 = sobel(img, axis=0, mode='reflect', cval=0.0).ravel().var()
    sobel1 = sobel(img, axis=1, mode='reflect', cval=0.0).ravel().var()
    feats += [sobel0, sobel1]
    feats += [kurtosis(img.ravel()), skew(img.ravel())]
    # bins = 10, range = 0 to 255
    feats += list(np.histogram(img,10,(0.0,255.0))[0])
    return feats

train_dir = '../input/crop_train'
test_dir = '../input/test'
test_files = sorted(glob.glob(test_dir+'/*'))
train_files = sorted(glob.glob(train_dir+'/*/*'))

def generate_img_feat(img_p):
    if 'manip' in img_p:
        feat = [1]
    else:
        feat = [0]
    im_array = np.array(Image.open((img_p)), dtype="uint8")*1.0
    for ch in range(3):
        tmp_feat = get_img_feat(im_array[:,:,ch])
        feat = feat + tmp_feat
    return feat

print(train_files[0])
print(generate_img_feat(train_files[0]))
print(test_files[0])
print(generate_img_feat(test_files[0]))

../input/crop_train/HTC-1-M7/(HTC-1-M7)100_10_manip.jpg
[1, 103.81816482543945, 59.525573987335754, 128.0, 255.0, 2.0, 43.831771850585938, 499.73059263732284, 1742.2497387817129, -0.26458231943014887, 0.004070778072698835, 54769, 14046, 24463, 3262, 34287, 113615, 6383, 402, 338, 10579, 95.132198333740234, 60.339420225828981, 118.0, 255.0, 0.0, 43.771568298339844, 507.69457779824734, 1816.408475741744, 0.024907945458827907, 0.13222835619670814, 63521, 7689, 23825, 2467, 83149, 69602, 656, 387, 313, 10535, 86.562847137451172, 58.411725161631026, 103.0, 255.0, 0.0, 42.140853881835938, 437.86382149066776, 1474.06302498281, 0.7786400814366612, 0.4790466300984756, 60350, 26889, 4876, 29577, 109603, 17759, 907, 714, 457, 11012]
../input/test/img_0002a04_manip.tif
[1, 187.38053131103516, 34.44172614719637, 205.0, 216.0, 35.0, 55.478790283203125, 1606.8642077660188, 718.40000813361257, 2.598499739004817, -1.8962484620320865, 0, 49, 3061, 10007, 13280, 12280, 21067, 57615, 144785, 0, 181.926029

In [2]:
list_classes = [
 'Sony-NEX-7',
 'Motorola-X',
 'HTC-1-M7',
 'Samsung-Galaxy-Note3',
 'Motorola-Droid-Maxx',
 'iPhone-4s',
 'iPhone-6',
 'LG-Nexus-5x',
 'Samsung-Galaxy-S4',
 'Motorola-Nexus-6']

dict_cls = {}
for i,k in enumerate(list_classes):
    dict_cls[k] = i
print(dict_cls)
print('----------------------------------------------')
tmp_cnt = 0
x_data,y_data = [],[]
for img_p in train_files:
    tmp_cnt += 1
    if tmp_cnt % 1000 == 0:
        print('processing',tmp_cnt)
    #print(img_p)
    res = generate_img_feat(img_p)
    x_data.append(res)
    tmp_y = img_p.split('/')[-2]
    y_data.append(dict_cls[tmp_y])

print('train_x done')
print(x_data[0])
print(y_data[0])

tmp_cnt = 0
test_x_data = []
for img_p in test_files:
    tmp_cnt += 1
    if tmp_cnt % 1000 == 0:
        print('processing',tmp_cnt)
    
    res = generate_img_feat(img_p)
    test_x_data.append(res)

print('test x done')
print(test_x_data[0])

{'Sony-NEX-7': 0, 'iPhone-6': 6, 'LG-Nexus-5x': 7, 'Motorola-Droid-Maxx': 4, 'Motorola-Nexus-6': 9, 'HTC-1-M7': 2, 'Samsung-Galaxy-S4': 8, 'Motorola-X': 1, 'Samsung-Galaxy-Note3': 3, 'iPhone-4s': 5}
----------------------------------------------
processing 1000
processing 2000
processing 3000
processing 4000
processing 5000
processing 6000
processing 7000
processing 8000
processing 9000
processing 10000
processing 11000
processing 12000
processing 13000
processing 14000
processing 15000
processing 16000
processing 17000
processing 18000
processing 19000
processing 20000
processing 21000
processing 22000
processing 23000
processing 24000
processing 25000
processing 26000
processing 27000
processing 28000
processing 29000
processing 30000
processing 31000
processing 32000
processing 33000
processing 34000
processing 35000
processing 36000
processing 37000
processing 38000
processing 39000
processing 40000
processing 41000
processing 42000
processing 43000
train_x done
[1, 103.81816482543

In [3]:
import pickle
with open('../features/other_feat.pkl','wb') as fout:
    pickle.dump([x_data,y_data,test_x_data],fout)
print('save done')

save done
