<a href="https://colab.research.google.com/github/AshwinDeshpande96/TGS-Salt-Identification/blob/master/tgs_salt_identification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [0]:
#1
from google.colab import drive
from google.colab import files
#1.2.
import pandas as pd
#1.3.1, 1.3.2
import numpy as np
#1.3.2
import os
import cv2
from keras.preprocessing.image import array_to_img, img_to_array, load_img
from skimage.transform import resize
import h5py
#2.1
from keras.models import Model, load_model
from keras.layers import Input
from keras.layers.core import Lambda, RepeatVector, Reshape
from keras.layers.convolutional import Conv2D, Conv2DTranspose
from keras.layers.pooling import MaxPooling2D
from keras.layers.merge import concatenate
#2.2.
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau

# 1. Data Import

In [0]:
#Link up to Google Drive to store Extracted Feature Data
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
files.upload()
#Make /root/.kaggle/ directory. So that kaggle command can download the data
!mkdir /root/.kaggle/
#Move downloaded 'kaggle.json' to previously created directory
!mv /content/kaggle.json /root/.kaggle/
#change permissions of 'kaggle.json' so that it is only accessible by you
!chmod 600 /root/.kaggle/kaggle.json
!ls /content/

mkdir: cannot create directory ‘/root/.kaggle/’: File exists
mv: cannot stat '/content/kaggle.json': No such file or directory
depths.csv  images  sample_data		   test.zip   train.zip
gdrive	    masks   sample_submission.csv  train.csv


## 1.2. Download Files

In [0]:
!kaggle competitions download -c tgs-salt-identification-challenge

In [0]:
!unzip train.zip

Archive:  train.zip
replace images/8d08955cdf.png? [y]es, [n]o, [A]ll, [N]one, [r]ename: N


## 1.2 Read Files

In [0]:
train = pd.read_csv('train.csv')
depth = pd.read_csv('./depths.csv')

In [0]:
train.head()

Unnamed: 0,id,rle_mask
0,575d24d81d,
1,a266a2a9df,5051 5151
2,75efad62c1,9 93 109 94 210 94 310 95 411 95 511 96 612 96...
3,34e51dba6a,48 54 149 54 251 53 353 52 455 51 557 50 659 4...
4,4875705fb0,1111 1 1212 1 1313 1 1414 1 1514 2 1615 2 1716...


In [0]:
depth.head()

Unnamed: 0,id,z
0,4ac19fb269,306
1,1825fadf99,157
2,f59821d067,305
3,5b435fad9d,503
4,e340e7bfca,783


In [0]:
file_list = list(train['id'].values)

## 1.3. Data Preprocessing

### 1.3.1. Run Lenght Encoding to Images

In [0]:
def get_mask(rle, rows, cols):
    img = np.zeros(rows*cols, dtype=np.uint8)
    try:
        
        rle = np.asarray(rle.split(' '), dtype=np.uint8).reshape(-1,2)
        
        for index, length in rle:
            index -=1
            img[index:index+length] = 255
        img = img.reshape(cols, rows)
        img = img.T
    except:
        img= np.zeros((cols, rows))
        
    return img

In [0]:
train['mask'] = train['rle_mask'].apply(lambda x: get_mask(x,101, 101))
train = train.merge(depth)
train.head()

Unnamed: 0,id,rle_mask,mask,z
0,575d24d81d,,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...",843
1,a266a2a9df,5051 5151,"[[0, 0, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",794
2,75efad62c1,9 93 109 94 210 94 310 95 411 95 511 96 612 96...,"[[0, 255, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0,...",468
3,34e51dba6a,48 54 149 54 251 53 353 52 455 51 557 50 659 4...,"[[0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",727
4,4875705fb0,1111 1 1212 1 1313 1 1414 1 1514 2 1615 2 1716...,"[[0, 255, 255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0...",797


### 1.3.2. Creating Dataset

In [0]:
train_ids = next(os.walk("/content/images"))[2]
im_width = 128
im_height = 128
im_depth = 2


n_features = 1 


X = np.zeros((len(train_ids), im_height, im_width, im_depth), dtype=np.float32)
y = np.zeros((len(train_ids), im_height, im_width, 1), dtype=np.float32)
X_feat = np.zeros((len(train_ids), n_features), dtype=np.float32)

for n, ids in enumerate(train_ids):

    im = cv2.imread('/content/images/'+ids, 0).astype(np.float32)
    im = resize(im, (128, 128, 1), mode='constant', preserve_range=True)
    
    mask = cv2.imread('/content/masks/'+ids, 0).astype(np.float32)/255.
    mask = resize(mask, (128, 128, 1), mode='constant', preserve_range=True)
    
    
    mid_mean = im[5:-5, 5:-5].mean()
    zero_centered = im-mid_mean
    cum = (zero_centered).cumsum(axis=0)
    mean = cum.mean()
    std_dev = max(1e-3, cum[5:-5, 5:-5].std())
    
    feature2 = cum - mean
    feature2 = feature2/std_dev

    
    X[n, ..., 0] = im.squeeze() /255.
    X[n, ..., 1] = feature2.squeeze()
    y[n] = mask

Check Dimension and save to HDF5 files

In [0]:
print X.shape, y.shape, X_feat.shape
Xfile = h5py.File("/content/gdrive/My Drive/Projects/Kaggle Challenges/TGS Salt Identification/X.hdf5", "w")
yfile =  h5py.File("/content/gdrive/My Drive/Projects/Kaggle Challenges/TGS Salt Identification/y.hdf5", "w")
Xfeat_file =  h5py.File("/content/gdrive/My Drive/Projects/Kaggle Challenges/TGS Salt Identification/X_feat.hdf5", "w")

Xfile.create_dataset('dataset_left', data=X)
yfile.create_dataset('dataset_right', data=y)
Xfeat_file.create_dataset('dataset_out', data=X_feat)

Xfile.close()
yfile.close()
Xfeat_file.close()

(4000, 128, 128, 2) (4000, 128, 128, 1) (4000, 1)


Data split for training and testing

In [0]:
from sklearn.model_selection import train_test_split

X_train, X_valid, X_feat_train, X_feat_valid, y_train, y_valid = train_test_split(X, X_feat, y, test_size=0.25, random_state=42)

# 2. Network Architecture

## 2.1. Network Definiton

In [0]:
input_img = Input((im_height, im_width, im_depth), name='img')
input_features = Input((n_features, ), name='feat')

c1 = Conv2D(8, (3, 3), activation='relu', padding='same', name='c11') (input_img)
c1 = Conv2D(8, (3, 3), activation='relu', padding='same', name='c12') (c1)
max1 = MaxPooling2D((2, 2), name='max1') (c1)

c2 = Conv2D(16, (3, 3), activation='relu', padding='same', name='c21') (max1)
c2 = Conv2D(16, (3, 3), activation='relu', padding='same', name='c22') (c2)
max2 = MaxPooling2D((2, 2), name='max2') (c2)

c3 = Conv2D(32, (3, 3), activation='relu', padding='same', name='c31') (max2)
c3 = Conv2D(32, (3, 3), activation='relu', padding='same', name='c32') (c3)
max3 = MaxPooling2D((2, 2), name='max3') (c3)

c4 = Conv2D(64, (3, 3), activation='relu', padding='same', name='c41') (max3)
c4 = Conv2D(64, (3, 3), activation='relu', padding='same', name='c42') (c4)
max4 = MaxPooling2D(pool_size=(2, 2), name='max4') (c4)


f_repeat = RepeatVector(8*8, name='repeat')(input_features)
f_conv = Reshape((8, 8, n_features), name='reshape')(f_repeat)
p4_feat = concatenate([max4, f_conv], -1)

c5 = Conv2D(128, (3, 3), activation='relu', padding='same', name='c51') (p4_feat)
c5 = Conv2D(128, (3, 3), activation='relu', padding='same', name='c52') (c5)

u6 = Conv2DTranspose(64, (2, 2), strides=(2, 2), padding='same', name='ctrans1') (c5)
u6 = concatenate([u6, c4], name='skip1')
c6 = Conv2D(64, (3, 3), activation='relu', padding='same', name='c61') (u6)
c6 = Conv2D(64, (3, 3), activation='relu', padding='same', name='c62') (c6)

u7 = Conv2DTranspose(32, (2, 2), strides=(2, 2), padding='same', name='ctrans2') (c6)
u7 = concatenate([u7, c3], name='skip2')
c7 = Conv2D(32, (3, 3), activation='relu', padding='same', name='c71') (u7)
c7 = Conv2D(32, (3, 3), activation='relu', padding='same', name='c72') (c7)

u8 = Conv2DTranspose(16, (2, 2), strides=(2, 2), padding='same', name='ctrans3') (c7)
u8 = concatenate([u8, c2], name='skip3')
c8 = Conv2D(16, (3, 3), activation='relu', padding='same', name='c81') (u8)
c8 = Conv2D(16, (3, 3), activation='relu', padding='same', name='c82') (c8)

u9 = Conv2DTranspose(8, (2, 2), strides=(2, 2), padding='same', name='ctrans4') (c8)
u9 = concatenate([u9, c1], axis=3, name='skip4')
c9 = Conv2D(8, (3, 3), activation='relu', padding='same', name='c91') (u9)
c9 = Conv2D(8, (3, 3), activation='relu', padding='same', name='92') (c9)

outputs = Conv2D(1, (1, 1), activation='sigmoid') (c9)

model = Model(inputs=[input_img, input_features], outputs=[outputs])
model.compile(optimizer='adam', loss='binary_crossentropy')
model.summary()

W0618 08:48:57.193907 140698898798464 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:517: The name tf.placeholder is deprecated. Please use tf.compat.v1.placeholder instead.

W0618 08:48:57.238414 140698898798464 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:4138: The name tf.random_uniform is deprecated. Please use tf.random.uniform instead.

W0618 08:48:57.288265 140698898798464 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:3976: The name tf.nn.max_pool is deprecated. Please use tf.nn.max_pool2d instead.

W0618 08:48:57.433732 140698898798464 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:74: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.

W0618 08:48:57.989689 140698898798464 deprecation_wrapp

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
img (InputLayer)                (None, 128, 128, 2)  0                                            
__________________________________________________________________________________________________
c11 (Conv2D)                    (None, 128, 128, 8)  152         img[0][0]                        
__________________________________________________________________________________________________
c12 (Conv2D)                    (None, 128, 128, 8)  584         c11[0][0]                        
__________________________________________________________________________________________________
max1 (MaxPooling2D)             (None, 64, 64, 8)    0           c12[0][0]                        
__________________________________________________________________________________________________
c21 (Conv2

## 2.2. Hyperparameters

In [0]:
callbacks = [
    EarlyStopping(patience=3, verbose=1),
    ReduceLROnPlateau(patience=3, verbose=1),
    ModelCheckpoint('/content/gdrive/My Drive/Projects/Kaggle Challenges/TGS Salt Identification/weights.h5', verbose=1, save_best_only=True, save_weights_only=True)
]


## 2.3. Training

In [0]:
results = model.fit({'img': X_train, 'feat': X_feat_train}, y_train, batch_size=16, epochs=30, callbacks=callbacks,
                    validation_data=({'img': X_valid, 'feat': X_feat_valid}, y_valid))
model.save('/content/gdrive/My Drive/Projects/Kaggle Challenges/TGS Salt Identification/model.h5')

W0618 08:49:26.615636 140698898798464 deprecation_wrapper.py:119] From /usr/local/lib/python2.7/dist-packages/keras/backend/tensorflow_backend.py:986: The name tf.assign_add is deprecated. Please use tf.compat.v1.assign_add instead.



Train on 3000 samples, validate on 1000 samples
Epoch 1/30
