In [1]:
import glob
import numpy as np
from PIL import Image

list_classes = [
 'Sony-NEX-7',
 'Motorola-X',
 'HTC-1-M7',
 'Samsung-Galaxy-Note3',
 'Motorola-Droid-Maxx',
 'iPhone-4s',
 'iPhone-6',
 'LG-Nexus-5x',
 'Samsung-Galaxy-S4',
 'Motorola-Nexus-6']

list_dict = {}
for i in range(10):
    key = list_classes[i]
    v = [0,0,0,0,0,0,0,0,0,0]
    v[i] = 1
    list_dict[key] = v
print(list_dict)

train_dir = '../input/train'
test_dir = '../input/test'
test_files = sorted(glob.glob(test_dir+'/*'))
train_files = sorted(glob.glob(train_dir+'/*/*'))
train_data_cnt = len(train_files)


def get_img(img_path,crop=224,train_flag = True):
    # read and resize
    im_array = np.array(Image.open((img_path)), dtype="uint8")
    pil_im = Image.fromarray(im_array)
    # new_array = np.array(pil_im.resize((512, 512)))
    
    # center crop
    center_x = im_array.shape[1] // 2
    half_crop = crop // 2
    if train_flag:
        rnd1 = np.random.randint(112) - 56
        rnd2 = np.random.randint(112) - 56
    else:
        rnd1 = 0
        rnd2 = 0
    a,b = center_x + rnd1 - half_crop, center_x + rnd1 + half_crop
    c,d = center_x + rnd2 - half_crop, center_x + rnd2 + half_crop
    final_img = im_array[a:b,c:d,:]
    return final_img/255.0

def get_y(img_path):
    n = img_path.split('/')[-2]
    return list_dict[n]

{'Motorola-Nexus-6': [0, 0, 0, 0, 0, 0, 0, 0, 0, 1], 'iPhone-6': [0, 0, 0, 0, 0, 0, 1, 0, 0, 0], 'Samsung-Galaxy-Note3': [0, 0, 0, 1, 0, 0, 0, 0, 0, 0], 'Motorola-X': [0, 1, 0, 0, 0, 0, 0, 0, 0, 0], 'LG-Nexus-5x': [0, 0, 0, 0, 0, 0, 0, 1, 0, 0], 'HTC-1-M7': [0, 0, 1, 0, 0, 0, 0, 0, 0, 0], 'Sony-NEX-7': [1, 0, 0, 0, 0, 0, 0, 0, 0, 0], 'iPhone-4s': [0, 0, 0, 0, 0, 1, 0, 0, 0, 0], 'Samsung-Galaxy-S4': [0, 0, 0, 0, 0, 0, 0, 0, 1, 0], 'Motorola-Droid-Maxx': [0, 0, 0, 0, 1, 0, 0, 0, 0, 0]}


In [2]:
BATCH_SIZE = 48

def data_gen(file_list, batch_size=BATCH_SIZE, train_flag = True):
    curr_idx = 0
    data_cnt = len(file_list)
    while True:
        if curr_idx + batch_size > data_cnt:
            start_idx = data_cnt-batch_size
            end_idx = data_cnt
            curr_idx = 0
        else:
            start_idx = curr_idx
            end_idx = curr_idx + batch_size
            curr_idx += batch_size
        curr_fl = file_list[start_idx:end_idx]
        curr_x = np.array([get_img(p,224,train_flag) for p in curr_fl],dtype='float32')
        curr_y = np.array([get_y(p) for p in curr_fl])
        yield curr_x,curr_y

from sklearn.utils import shuffle
train_files = shuffle(train_files,random_state=42)


train_gen = data_gen(train_files, BATCH_SIZE, True)
valid_gen = data_gen(train_files, BATCH_SIZE, False)
train_step = train_data_cnt//BATCH_SIZE
valid_step = train_step

# test
for x,y in train_gen:
    print(x.shape,y.shape)
    print(x.dtype)
    print(y[:3])
    break
    

(48, 224, 224, 3) (48, 10)
float32
[[0 0 0 1 0 0 0 0 0 0]
 [0 0 0 0 0 0 0 0 0 1]
 [1 0 0 0 0 0 0 0 0 0]]


In [3]:
# def model
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.callbacks import ModelCheckpoint,ReduceLROnPlateau
from keras.optimizers import Adam


def create_model():
    '''Create the FCN and return a keras model.'''

    model = Sequential()
    model.add(Conv2D(32, (3, 3),input_shape=(224, 224, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(64, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(64, (3, 3), activation='relu', strides=2))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(96, (3, 3), activation='relu', strides=1))
    model.add(Conv2D(96, (3, 3), activation='relu', strides=2))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Conv2D(128, (3, 3), activation='relu', strides=1))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))
    
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.3))

    model.add(Dense(10, activation='softmax'))
    return model
print('model model')
tmp_m = create_model()
tmp_m.summary()

Using TensorFlow backend.


model model
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 222, 222, 32)      896       
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 111, 111, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 109, 109, 64)      18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 54, 54, 64)        36928     
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 27, 27, 64)        0         
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 25, 25, 96)        55392     
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 12, 12, 96)        83040    

In [4]:
model = create_model()
model.compile(loss='categorical_crossentropy', optimizer=Adam(0.001), metrics=['accuracy'])
model_p = 'best_cnn2_m.h5'
model_chk = ModelCheckpoint(filepath=model_p, monitor='val_acc', save_best_only=True, verbose=1)
reduce_lr = ReduceLROnPlateau(monitor='val_acc', factor=0.5,
                              patience=3, min_lr=0.00001)
model.fit_generator(train_gen,
          steps_per_epoch = train_step,
          epochs=150,
          validation_data = valid_gen,
          validation_steps = valid_step,
          callbacks=[model_chk,reduce_lr]
         )

Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150


Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/150
Epoch 78/150
Epoch 79/150
Epoch 80/150
Epoch 81/150
Epoch 82/150
Epoch 83/150
Epoch 84/150
Epoch 85/150
Epoch 86/150
Epoch 87/150
Epoch 88/150
Epoch 89/150


Epoch 90/150
Epoch 91/150
Epoch 92/150
Epoch 93/150
Epoch 94/150
Epoch 95/150
Epoch 96/150
Epoch 97/150
Epoch 98/150
Epoch 99/150
Epoch 100/150
Epoch 101/150
Epoch 102/150
Epoch 103/150
Epoch 104/150
Epoch 105/150
Epoch 106/150
Epoch 107/150
Epoch 108/150
Epoch 109/150
Epoch 110/150
Epoch 111/150
Epoch 112/150
Epoch 113/150
Epoch 114/150
Epoch 115/150
Epoch 116/150
Epoch 117/150
Epoch 118/150
Epoch 119/150
Epoch 120/150


Epoch 121/150
Epoch 122/150
Epoch 123/150
Epoch 124/150
Epoch 125/150
Epoch 126/150
Epoch 127/150
Epoch 128/150
Epoch 129/150
Epoch 130/150
Epoch 131/150
Epoch 132/150
Epoch 133/150
Epoch 134/150
Epoch 135/150
Epoch 136/150
Epoch 137/150
Epoch 138/150
Epoch 139/150
Epoch 140/150
Epoch 141/150
Epoch 142/150
Epoch 143/150
Epoch 144/150
Epoch 145/150
Epoch 146/150
Epoch 147/150
Epoch 148/150
Epoch 149/150
Epoch 150/150


<keras.callbacks.History at 0x7fccccb96ba8>

In [5]:
best_model = load_model(model_p)
test_y = []
for img_p in test_files:
    tmp_x = get_img(img_p,224,False)
    tmp_y = best_model.predict(np.array([tmp_x]))[0]
    test_y.append(tmp_y)

test_y = np.array(test_y)
print(test_y[:5])

[[  9.56657469e-01   2.55366522e-05   1.38590876e-02   8.37864936e-04
    3.37969488e-03   7.07230720e-06   6.01768716e-06   1.31101499e-03
    1.08352059e-03   2.28328146e-02]
 [  3.62497729e-16   9.37924779e-06   7.54788789e-06   4.08824814e-11
    7.89072874e-05   9.99583423e-01   1.86383782e-04   1.34003145e-04
    2.45667126e-10   3.46559688e-07]
 [  1.52506053e-27   4.50045452e-11   6.66459800e-06   1.29530777e-15
    3.20453243e-17   3.62353103e-06   2.37218134e-09   9.99989748e-01
    7.80898600e-17   1.55445989e-09]
 [  1.50473945e-18   4.02927911e-03   2.77140799e-07   5.26122050e-03
    2.73179691e-11   8.38490507e-07   2.76731566e-06   1.52581606e-06
    9.90696073e-01   8.08921413e-06]
 [  0.00000000e+00   8.91116541e-03   5.22025940e-15   5.79860881e-02
    4.34272914e-23   6.05720904e-20   4.75697362e-26   8.32157373e-01
    9.64871737e-13   1.00945406e-01]]


In [6]:
import pandas as pd
y_res = np.argmax(test_y,axis=1)
y_res = [list_classes[i] for i in y_res]
df = pd.read_csv('../input/sample_submission.csv')
print(df.head())
f_name = [p.split('/')[-1] for p in test_files]
df['fname'] = f_name
df['camera'] = y_res
print(df.head())
df.to_csv('../results/s_cnn_2_224.csv',index=False)

                   fname    camera
0  img_0002a04_manip.tif  iPhone-6
1  img_001e31c_unalt.tif  iPhone-6
2  img_00275cf_manip.tif  iPhone-6
3  img_0034113_unalt.tif  iPhone-6
4  img_00344b7_unalt.tif  iPhone-6
                   fname             camera
0  img_0002a04_manip.tif         Sony-NEX-7
1  img_001e31c_unalt.tif          iPhone-4s
2  img_00275cf_manip.tif        LG-Nexus-5x
3  img_0034113_unalt.tif  Samsung-Galaxy-S4
4  img_00344b7_unalt.tif        LG-Nexus-5x
