In [1]:
import pandas as pd
train_df = pd.read_json('../input/train.json')
test_df = pd.read_json('../input/test.json')
print(train_df.head())

import numpy as np
import cv2
from itertools import combinations
from scipy.ndimage import laplace, sobel
from scipy.stats import kurtosis, skew

                                              band_1  \
0  [-27.878360999999998, -27.15416, -28.668615, -...   
1  [-12.242375, -14.920304999999999, -14.920363, ...   
2  [-24.603676, -24.603714, -24.871029, -23.15277...   
3  [-22.454607, -23.082819, -23.998013, -23.99805...   
4  [-26.006956, -23.164886, -23.164886, -26.89116...   

                                              band_2        id inc_angle  \
0  [-27.154118, -29.537888, -31.0306, -32.190483,...  dfd5f913   43.9239   
1  [-31.506321, -27.984554, -26.645678, -23.76760...  e25388fd   38.1562   
2  [-24.870956, -24.092632, -20.653963, -19.41104...  58b2aaa0   45.2859   
3  [-27.889421, -27.519794, -27.165262, -29.10350...  4cfc3a18   43.8306   
4  [-27.206915, -30.259186, -30.259186, -23.16495...  271f93f4   35.6256   

   is_iceberg  
0           0  
1           0  
2           1  
3           0  
4           0  


In [4]:


def get_img_feat(img):
    feats = [np.mean(img),np.std(img),np.median(img),np.max(img),np.min(img)]
    return feats
    
def get_other_feat(df):
    feats = []
    band1,band2,band3,band4 = [],[],[],[]
    for i, row in df.iterrows():
        tmp_feat = []
        img1 = np.array(row['band_1']).reshape(75, 75).astype('float32')
        img2 = np.array(row['band_2']).reshape(75, 75).astype('float32')
        if row['inc_angle'] == 'na':
            ang = 0
        else:
            ang = float(row['inc_angle'])
        img3 = (img1+img2)*ang/2.0
        img4 = (img1-img2)*ang/2.0
        band1.append(img1.ravel())
        band2.append(img2.ravel())
        band3.append(img3.ravel())
        band4.append(img4.ravel())
            
        # base
        st_trans = get_img_feat(img1) + get_img_feat(img2) + [ang] 
        tmp_feat += st_trans
        tmp_feat += [x * y for x, y in combinations(st_trans, 2)]
        tmp_feat += [x + y for x, y in combinations(st_trans, 2)]
        tmp_feat += [x - y for x, y in combinations(st_trans, 2)]
        
        # lap
        lap_1 = laplace(img1, mode='reflect', cval=0.0)
        lap_2 = laplace(img2, mode='reflect', cval=0.0)
        st_trans = get_img_feat(lap_1) + get_img_feat(lap_2)
        
        # sob
        sob1 = sobel(img1, axis=0, mode='reflect', cval=0.0)
        sob2 = sobel(img1, axis=1, mode='reflect', cval=0.0)
        sob3 = sobel(img2, axis=0, mode='reflect', cval=0.0)
        sob4 = sobel(img2, axis=1, mode='reflect', cval=0.0)
        st_trans = st_trans + get_img_feat(sob1) + get_img_feat(sob2) + get_img_feat(sob3) + get_img_feat(sob4)
        tmp_feat += st_trans
        
        # hist
        hist = list(np.histogram(img1, bins=20)[0])
        tmp_feat += hist
        tmp_feat += [np.std(hist), np.max(hist), np.median(hist), (np.max(hist) - np.median(hist))]
        
        hist = list(np.histogram(img2, bins=20)[0])
        tmp_feat += hist
        tmp_feat += [np.std(hist), np.max(hist), np.median(hist), (np.max(hist) - np.median(hist))]
        
        tmp_feat += get_img_feat(img3) + get_img_feat(img4)
        feats.append(tmp_feat)
    return band1,band2,band3,band4,feats

a_band1,a_band2,a_band3,a_band4,a_angs = get_other_feat(train_df)
b_band1,b_band2,b_band3,b_band4,b_angs = get_other_feat(test_df)
print('raw feats')

raw feats


In [5]:
from sklearn import decomposition
comp = 50
pca_b1 = decomposition.PCA(n_components=comp, whiten=True, random_state=15)
pca_b2 = decomposition.PCA(n_components=comp, whiten=True, random_state=16)
pca_b3 = decomposition.PCA(n_components=comp, whiten=True, random_state=17)
pca_b4 = decomposition.PCA(n_components=comp, whiten=True, random_state=18)

a_band1_feat = pca_b1.fit_transform(np.array(a_band1))
a_band2_feat = pca_b2.fit_transform(np.array(a_band2))
a_band3_feat = pca_b3.fit_transform(np.array(a_band3))
a_band4_feat = pca_b4.fit_transform(np.array(a_band4))

b_band1_feat = pca_b1.transform(np.array(b_band1))
b_band2_feat = pca_b2.transform(np.array(b_band2))
b_band3_feat = pca_b3.transform(np.array(b_band3))
b_band4_feat = pca_b4.transform(np.array(b_band4))

print('pca done',a_band1_feat.shape)

pca done (1604, 50)


In [6]:
a_bands = np.hstack([a_band1,a_band2,a_band3,a_band4])
b_bands = np.hstack([b_band1,b_band2,b_band3,b_band4])
pca_bx = decomposition.PCA(n_components=60, whiten=True, random_state=15)
a_band_feat = pca_bx.fit_transform(np.array(a_bands))
b_band_feat = pca_bx.transform(np.array(b_bands))
print('pca done',a_band_feat.shape)

pca done (1604, 60)


In [7]:
train_feat = np.hstack([a_band1_feat,a_band2_feat,a_band3_feat,a_band4_feat,a_band_feat,np.array(a_angs)])
test_feat = np.hstack([b_band1_feat,b_band2_feat,b_band3_feat,b_band4_feat,b_band_feat,np.array(b_angs)])

print(train_feat.shape,test_feat.shape)

(1604, 524) (8424, 524)


In [8]:
import pickle
with open('other_feat_v2.pkl','wb') as fout:
    pickle.dump([train_feat,test_feat],fout)
print('done')

done
