#  Angle predict
+ [Predicting Missing Incidence Angles](https://www.kaggle.com/reppic/predicting-missing-incidence-angles)

In [1]:
import numpy as np # linear algebra

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedKFold, StratifiedShuffleSplit
from os.path import join as opj
from matplotlib import pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import pylab
from scipy.ndimage.filters import uniform_filter
plt.rcParams['figure.figsize'] = 10, 10
%matplotlib inline

'\n# train = pd.read_json("./Data/train.json")\n\ntraining_examples = train.shape[0]\nmissing_angles = len(train[train[\'inc_angle\'] == \'na\'])\npercent_missing = (missing_angles/training_examples)*100\n\nprint("{0}/{1} ({2:.2f}%) of examples are missing inc_angle".format(\n    missing_angles, training_examples, percent_missing))\n    '

In [2]:
train = pd.read_json("../ShipIceberg/Data/train.json")
target_train=train['is_iceberg']

# 一直cannot resolve memory block
# test = pd.read_json("../ShipIceberg/Data/test.json") 

'\n# Include the test data in our calculations: \ntest = pd.read_json("./Data/test.json")\ntrain_no_ib = train.drop([\'is_iceberg\'],axis=1)\nexamples = pd.concat([train_no_ib,test])\n\ninc_angles = examples[examples[\'inc_angle\'] != \'na\'][\'inc_angle\']\n\nmean = inc_angles.mean()\nmedian = inc_angles.median()\nmode = inc_angles.astype(np.double).round(1).mode()[0] # round to the nearest tenth for mode\nprint("Mean: {0}\nMedian: {1}\nMode: {2}".format(mean,median,mode))\n'

'\ninc_angles_train, inc_angles_valid = train_test_split(inc_angles, random_state=1, train_size=0.8, test_size=0.2)\n\nones = np.ones(inc_angles_valid.shape[0])\nmean_mae = mean_absolute_error(ones*inc_angles_train.mean(), inc_angles_valid)\nmedian_mae = mean_absolute_error(ones*inc_angles_train.median(), inc_angles_valid)\nmode_mae = mean_absolute_error(ones*inc_angles_train.astype(np.double).round(1).mode()[0], inc_angles_valid)\n\nprint("Mean Error: {0}\nMedian Error: {1}\nMode Error: {2}".format(mean_mae,median_mae,mode_mae))\n'

In [5]:
from keras.models import Input,Model
from keras.layers import Conv3D, MaxPooling3D, Conv2D, MaxPooling2D, Reshape, Dense, Dropout, Flatten, Activation, BatchNormalization
from keras.regularizers import l2
from keras import initializers
from keras.optimizers import Adam
from keras.callbacks import ModelCheckpoint, Callback

def model(dropout=0.1, regularization=0.00005):

    x_input = Input(shape=(75,75,2,1,)) 

    # Layer 1
    x = Conv3D(96, kernel_size=(5, 5, 2),activation='relu',input_shape=(75, 75, 2,1), kernel_regularizer=l2(regularization))(x_input)
    x = BatchNormalization()(x)
    x = MaxPooling3D(pool_size=(2, 2, 1), strides=(2, 2, 1))(x)
    x = Dropout(dropout)(x)

    x = Reshape((35,35,96))(x)

    # Layer 2
    x = Conv2D(128, kernel_size=(3, 3), activation='relu' , kernel_regularizer=l2(regularization))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Dropout(dropout)(x)
    
    # Layer 3
    x = Conv2D(256, kernel_size=(3, 3), activation='relu' , kernel_regularizer=l2(regularization))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Dropout(dropout+0.1)(x)
    
    # Layer 4
    x = Conv2D(128, kernel_size=(3, 3), activation='relu' , kernel_regularizer=l2(regularization))(x)
    x = BatchNormalization()(x)
    x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2))(x)
    x = Dropout(dropout)(x)
    
    x = Flatten()(x)
    
    # Layer 5
    x = Dense(768, kernel_regularizer=l2(regularization))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout+0.1)(x)
    
    # Layer 6
    x = Dense(384, kernel_regularizer=l2(regularization))(x)
    x = BatchNormalization()(x)
    x = Activation('relu')(x)
    x = Dropout(dropout+0.1)(x)
    
    # Linear Output Layer
    y_ = Dense(1)(x)
    
    model = Model(inputs=x_input, outputs=y_)
    adam_otim = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
    model.compile(loss='mean_squared_error', optimizer=adam_otim, metrics=['mae'])
    
#     model.summary()
    return model

Using TensorFlow backend.


In [7]:
# train = pd.read_json("./Data/train.json")
# test = pd.read_json("./Data/test.json")

def load_train_data():
    train = pd.read_json("./Data/train.json")
    test = pd.read_json("./Data/test.json")
    
    train = train.drop(['is_iceberg'],axis=1)
    train = pd.concat([train,test])
    train = train[train['inc_angle'] != 'na']
    
    band_1 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_1"]])
    band_2 = np.array([np.array(band).astype(np.float32).reshape(75, 75) for band in train["band_2"]])
    bands = np.concatenate([band_1[:, :, :, np.newaxis], band_2[:, :, :, np.newaxis]], axis=-1)
    bands = bands.reshape((-1, 75, 75, 2, 1))
    
    angles = train["inc_angle"]
    
    return train_test_split(bands, angles, random_state=1, train_size=0.8, test_size=0.2)

In [13]:
m = model()
x_train, x_valid, y_train, y_valid = load_train_data()
weights_file = './model/inc_angle_weights_pretrained.hdf5'

TRAIN_FROM_SCRATCH = True

if TRAIN_FROM_SCRATCH:
    checkpoint = ModelCheckpoint(weights_file, save_best_only=True)
    m.fit(x_train, y_train, batch_size=32, epochs=25, verbose=1,
              validation_data=(x_valid, y_valid),
              callbacks=[checkpoint])
else:
    m.load_weights(filepath=weights_file)


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_2 (InputLayer)         (None, 75, 75, 2, 1)      0         
_________________________________________________________________
conv3d_2 (Conv3D)            (None, 71, 71, 1, 96)     4896      
_________________________________________________________________
batch_normalization_7 (Batch (None, 71, 71, 1, 96)     384       
_________________________________________________________________
max_pooling3d_2 (MaxPooling3 (None, 35, 35, 1, 96)     0         
_________________________________________________________________
dropout_7 (Dropout)          (None, 35, 35, 1, 96)     0         
_________________________________________________________________
reshape_2 (Reshape)          (None, 35, 35, 96)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 33, 33, 128)       110720    
__________

AttributeError: 'function' object has no attribute 'fit'

In [9]:
predicted_angles = m.predict(x_valid, verbose=1)
model_mae = mean_absolute_error(predicted_angles, y_valid)
print('Model Error: {0}'.format(model_mae))

Model Error: 2.9078589958686725


In [None]:
def predict_inc_angle(ex, model):
    band_1 = np.array([np.array(ex["band_1"]).astype(np.float32).reshape(75, 75)])
    band_2 = np.array([np.array(ex["band_2"]).astype(np.float32).reshape(75, 75)])
    bands = np.concatenate([band_1[:, :, :, np.newaxis], band_2[:, :, :, np.newaxis]], axis=-1)
    bands = bands.reshape((1, 75, 75, 2, 1))
    inc_angle = model.predict(bands)
    return inc_angle.reshape(1)[0]
    
train = pd.read_json("./Data/train.json")
train_out_model = train.copy()

train_out_model['inc_angle'] = [predict_inc_angle(ex,m) if ex['inc_angle'] == 'na' 
                          else ex['inc_angle'] 
                          for _,ex in train_out_model.iterrows()]

# train_out_model.to_json('./Data/train_model_fill.json')
train_out_model.to_json('./Data/train_model_fill_train.json')

In [12]:
print(train_out_model['inc_angle'])

0       43.923900
1       38.156200
2       45.285900
3       43.830600
4       35.625600
5       36.903400
6       34.475100
7       41.176900
8       35.782900
9       43.300700
10      44.624000
11      39.506700
12      41.854400
13      45.290900
14      34.771500
15      43.782000
16      45.356800
17      38.781200
18      42.514500
19      37.280200
20      41.797300
21      38.066900
22      39.663600
23      37.686600
24      40.296000
25      39.234000
26      40.390400
27      43.789500
28      42.589100
29      41.030300
          ...    
1574    35.008137
1575    36.545788
1576    45.019547
1577    36.795406
1578    38.855888
1579    39.111088
1580    39.463596
1581    36.814934
1582    44.539196
1583    40.987122
1584    33.329502
1585    39.193367
1586    38.154198
1587    37.046684
1588    39.450466
1589    44.487198
1590    33.458179
1591    34.658039
1592    36.298862
1593    40.810936
1594    43.235126
1595    42.645870
1596    36.700695
1597    33.396420
1598    37