In [1]:
import numpy as np
import pandas as pd
from random import choice, sample, shuffle, uniform, seed
from math import exp, expm1, log1p, log10, log2, sqrt, ceil, floor, isfinite, isnan
from itertools import combinations
#import for image processing
import cv2
from scipy.stats import kurtosis, skew
from scipy.ndimage import laplace, sobel

In [2]:
def read_jason(file='', loc='../input/'):

    df = pd.read_json('{}{}'.format(loc, file))
    df['inc_angle'] = df['inc_angle'].replace('na', -1).astype(float)
    #print(df['inc_angle'].value_counts())

    band1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in df["band_1"]])
    band2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in df["band_2"]])
    df = df.drop(['band_1', 'band_2'], axis=1)
    ida = df['id']
    bands = np.stack((band1, band2,  0.5 * (band1 + band2)), axis=-1)
    del band1, band2

    return df, bands, ida

In [3]:
train, train_bands, id_train = read_jason(file='train.json', loc='../../input/')

In [4]:
train.head()

Unnamed: 0,id,inc_angle,is_iceberg
0,dfd5f913,43.9239,0
1,e25388fd,38.1562,0
2,58b2aaa0,45.2859,1
3,4cfc3a18,43.8306,0
4,271f93f4,35.6256,0


In [5]:
train_bands.shape

(1604, 75, 75, 3)

In [6]:
paths = [(k, v) for k, v in zip(train['id'].tolist(), train_bands)]

In [9]:
len(paths[0])

2

In [10]:
def img_to_stats(paths):

    img_id, img = paths[0], paths[1]

    #ignored error
    np.seterr(divide='ignore', invalid='ignore')

    bins = 20
    scl_min, scl_max = -50, 50
    opt_poly = True
    #opt_poly = False

    try:
        st = []
        st_interv = []
        hist_interv = []
        for i in range(img.shape[2]):
            img_sub = np.squeeze(img[:, :, i])

            #median, max and min
            sub_st = []
            sub_st += [np.mean(img_sub), np.std(img_sub), np.max(img_sub), np.median(img_sub), np.min(img_sub)]
            sub_st += [(sub_st[2] - sub_st[3]), (sub_st[2] - sub_st[4]), (sub_st[3] - sub_st[4])]
            sub_st += [(sub_st[-3] / sub_st[1]), (sub_st[-2] / sub_st[1]), (sub_st[-1] / sub_st[1])] #normalized by stdev
            st += sub_st
            #Laplacian, Sobel, kurtosis and skewness
            st_trans = []
            st_trans += [laplace(img_sub, mode='reflect', cval=0.0).ravel().var()] #blurr
            sobel0 = sobel(img_sub, axis=0, mode='reflect', cval=0.0).ravel().var()
            sobel1 = sobel(img_sub, axis=1, mode='reflect', cval=0.0).ravel().var()
            st_trans += [sobel0, sobel1]
            st_trans += [kurtosis(img_sub.ravel()), skew(img_sub.ravel())]
            
            if opt_poly:
                st_interv.append(sub_st)
                #
                st += [x * y for x, y in combinations(st_trans, 2)]
                st += [x + y for x, y in combinations(st_trans, 2)]
                st += [x - y for x, y in combinations(st_trans, 2)]

            #hist
            #hist = list(cv2.calcHist([img], [i], None, [bins], [0., 1.]).flatten())
            hist = list(np.histogram(img_sub, bins=bins, range=(scl_min, scl_max))[0])
            hist_interv.append(hist)
            st += hist
            st += [hist.index(max(hist))] #only the smallest index w/ max value would be incl
            st += [np.std(hist), np.max(hist), np.median(hist), (np.max(hist) - np.median(hist))]

        if opt_poly:
            for x, y in combinations(st_interv, 2):
                st += [float(x[j]) * float(y[j]) for j in range(len(st_interv[0]))]

            for x, y in combinations(hist_interv, 2):
                hist_diff = [x[j] * y[j] for j in range(len(hist_interv[0]))]
                st += [hist_diff.index(max(hist_diff))] #only the smallest index w/ max value would be incl
                st += [np.std(hist_diff), np.max(hist_diff), np.median(hist_diff), (np.max(hist_diff) - np.median(hist_diff))]

        #correction
        nan = -999
        for i in range(len(st)):
            if isnan(st[i]) == True:
                st[i] = nan

    except:
        print('except: ')

    return [img_id, st]

In [12]:
a = img_to_stats(paths[0])

In [15]:
len(a[1])


246

In [16]:
img = paths[0][1]

In [17]:
img.shape

(75, 75, 3)

In [18]:
np.seterr(divide='ignore', invalid='ignore')
bins = 20
scl_min, scl_max = -50, 50
opt_poly = True
st = []
st_interv = []
hist_interv = []


In [19]:
            img_sub = np.squeeze(img[:, :, 0])

            #median, max and min
            sub_st = []
            sub_st += [np.mean(img_sub), np.std(img_sub), np.max(img_sub), np.median(img_sub), np.min(img_sub)]
            sub_st += [(sub_st[2] - sub_st[3]), (sub_st[2] - sub_st[4]), (sub_st[3] - sub_st[4])]
            sub_st += [(sub_st[-3] / sub_st[1]), (sub_st[-2] / sub_st[1]), (sub_st[-1] / sub_st[1])] #normalized by stdev
            st += sub_st

In [22]:
            st_trans = []
            st_trans += [laplace(img_sub, mode='reflect', cval=0.0).ravel().var()] #blurr

In [24]:
            sobel0 = sobel(img_sub, axis=0, mode='reflect', cval=0.0).ravel().var()
            sobel1 = sobel(img_sub, axis=1, mode='reflect', cval=0.0).ravel().var()
            st_trans += [sobel0, sobel1]

In [26]:
            st_trans += [kurtosis(img_sub.ravel()), skew(img_sub.ravel())]

In [28]:
                st_interv.append(sub_st)
                #
                st += [x * y for x, y in combinations(st_trans, 2)]


In [31]:
                st += [x + y for x, y in combinations(st_trans, 2)]
                st += [x - y for x, y in combinations(st_trans, 2)]

In [37]:
for x, y in combinations(st_trans,2):
    print(x,y)

34.2444 95.1907
34.2444 105.088
34.2444 15.62562594263381
34.2444 1.9487619400024414
95.1907 105.088
95.1907 15.62562594263381
95.1907 1.9487619400024414
105.088 15.62562594263381
105.088 1.9487619400024414
15.62562594263381 1.9487619400024414


In [38]:
            hist = list(np.histogram(img_sub, bins=bins, range=(scl_min, scl_max))[0])
            hist_interv.append(hist)

In [39]:
hist

[0, 0, 37, 1143, 4016, 381, 19, 11, 10, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]

In [40]:
            st += hist
            st += [hist.index(max(hist))] #only the smallest index w/ max value would be incl
            st += [np.std(hist), np.max(hist), np.median(hist), (np.max(hist) - np.median(hist))]


In [41]:
st

[-27.911045,
 2.7645373,
 -0.213149,
 -27.879921,
 -38.211376,
 27.666773,
 37.998226,
 10.331455,
 10.007741,
 13.744877,
 3.7371371,
 3259.7532,
 3598.6792,
 535.09090069580498,
 66.734272633038927,
 10003.398,
 1487.4144585358688,
 185.50403653880494,
 1642.0652621473155,
 204.79143028387625,
 30.45062512571954,
 129.43515,
 139.33241,
 49.87007174341506,
 36.193207740783691,
 200.27869,
 110.81633791773146,
 97.139473915100098,
 120.71359286157912,
 107.03672885894775,
 17.574387882636252,
 -60.946266,
 -70.843521,
 18.61881985814744,
 32.295683860778809,
 -9.8972549,
 79.565086032463853,
 93.241950035095215,
 89.462340976311509,
 103.13920497894287,
 13.676864002631369,
 0,
 0,
 37,
 1143,
 4016,
 381,
 19,
 11,
 10,
 8,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 4,
 894.42299137488635,
 4016,
 0.0,
 4016.0]

In [42]:
img_sub.shape


(75, 75)

In [44]:
st_trans

[34.244446, 95.190712, 105.08797, 15.62562594263381, 1.9487619400024414]

In [45]:
laplace(img_sub, mode='reflect', cval=0.0).ravel().var()

34.244446