In [4]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from multiprocessing import Pool, cpu_count
import itertools
%matplotlib inline

rootpath = '/media/share/data/kaggle/ieee-camera/'

In [None]:
## MultiGPU Code
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.allow_growth=True
set_session(tf.Session(config=config))


import keras.utils.training_utils
from keras.utils import multi_gpu_model
gdev = keras.utils.training_utils._get_available_devices()
print(gdev)
gdev_count = 0
for i, n in enumerate(gdev):
    if 'device:GPU' in n:
        gdev_count+=1
if gdev_count > 0:
    print('Found {} GPUs, will attempt to use all of them.'.format(gdev_count))
else:
    gdev_count=1
    print('Did not find any GPUs, this will be SLOW!')

# Preprocess

In [None]:
# creat a flickr_val
urlpath = os.path.join(rootpath, 'val_images')
subdir = os.listdir(urlpath)
# subdir.remove('good_jpgs')
temp_df = []
for idx in subdir:
    temp_df.append((idx, pd.read_csv(os.path.join(urlpath, idx) + '/' + os.listdir(os.path.join(urlpath, idx))[0], header=None)))

df_flickr = pd.DataFrame(temp_df, columns=['class', 'fname'])

df_flickr['class'] = ['Motorola-X', 'iPhone-6', 'Motorola-Nexus-6', 'LG-Nexus-5x', 'Motorola-Droid-Maxx', 'HTC-1-M7', 'iPhone-4s', 'Sony-NEX-7', 'Samsung-Galaxy-S4', 'Samsung-Galaxy-Note3']

df = pd.DataFrame([])
for idx in range(len(df_flickr['class'])):
    target = df_flickr['fname'][idx]
    target.columns = ['fname']
    target = pd.DataFrame(target)
    n_class = pd.DataFrame(np.repeat(df_flickr['class'][idx], len(target)))
    df_temp = pd.concat([n_class.reset_index(drop=True), target.reset_index(drop=True)], 1)
    df = pd.concat([df, df_temp], 0)
    
df.columns = ['class', 'fname']
df.to_csv('/media/share/data/kaggle/ieee-camera/val_flickr.csv', index=None)

In [None]:
# creat a flickr_train
urlpath = os.path.join(rootpath, 'flickr_images')
subdir = os.listdir(urlpath)
subdir.remove('good_jpgs')
temp_df = []
for idx in subdir:
    temp_df.append((idx, pd.read_csv(os.path.join(urlpath, idx) + '/urls_final', header=None)))

df_flickr = pd.DataFrame(temp_df, columns=['class', 'fname'])

df_flickr['class'] = ['Motorola-X', 'iPhone-6', 'Motorola-Nexus-6', 'LG-Nexus-5x', 'Motorola-Droid-Maxx', 'HTC-1-M7', 'iPhone-4s', 'Sony-NEX-7', 'Samsung-Galaxy-S4', 'Samsung-Galaxy-Note3']

df = pd.DataFrame([])
for idx in range(len(df_flickr['class'])):
    target = df_flickr['fname'][idx]
    target.columns = ['fname']
    target = pd.DataFrame(target)
    n_class = pd.DataFrame(np.repeat(df_flickr['class'][idx], len(target)))
    df_temp = pd.concat([n_class.reset_index(drop=True), target.reset_index(drop=True)], 1)
    df = pd.concat([df, df_temp], 0)
    
df.columns = ['class', 'fname']
df.to_csv('/media/share/data/kaggle/ieee-camera/train_flickr.csv', index=None)

In [9]:
train_df = pd.read_csv('/media/share/data/kaggle/ieee-camera/train_flickr.csv')
train_df.head()

Unnamed: 0,class,fname
0,Motorola-X,https://c1.staticflickr.com/9/8340/28442295042...
1,Motorola-X,https://c2.staticflickr.com/8/7645/27350291351...
2,Motorola-X,https://c2.staticflickr.com/8/7533/27387770246...
3,Motorola-X,https://c2.staticflickr.com/2/1555/24073174483...
4,Motorola-X,https://c2.staticflickr.com/2/1636/24073199913...


In [10]:
val_df = pd.read_csv('/media/share/data/kaggle/ieee-camera/val_flickr.csv')
val_df.head()

Unnamed: 0,class,fname
0,Motorola-X,https://3.img-dpreview.com/files/p/sample_gall...
1,Motorola-X,https://3.img-dpreview.com/files/p/sample_gall...
2,Motorola-X,https://2.img-dpreview.com/files/p/sample_gall...
3,Motorola-X,https://2.img-dpreview.com/files/p/sample_gall...
4,Motorola-X,https://1.img-dpreview.com/files/p/sample_gall...


In [None]:
def mp_idx(idx):
    # multiple processing with cpus
    from skimage import io

    img_file = []
    label = []
    crop_size = 512
    
    if idx == 'train_idx':
        df = train_df
    else:
        df = val_df
        
    print(df['fname'][idx])
    img = io.imread(df['fname'][idx])
    if len(img.shape) == 3:
        h1, w1, _ = img.shape

        for i in range(4):
            hr = np.random.randint(0, h1-crop_size)
            wr = np.random.randint(0, w1-crop_size)
            tem_img = img[hr:hr+crop_size, wr:wr+crop_size, :]
            img_file.append(tem_img)
            label.append(df['class'][idx])
            
    return img_file, label


def image_crop(indexlist):
    from keras.applications.densenet import preprocess_input
    
    with Pool(cpu_count()) as pool:
        img_file, label = zip(*pool.map(mp_idx, indexlist))
    
    img_file = list(itertools.chain(*img_file))
    label = list(itertools.chain(*label))
        
    from sklearn.preprocessing import LabelEncoder
    from keras.utils import to_categorical
    le = LabelEncoder().fit(n_class)
    labels = to_categorical(le.transform(label), num_classes=len(n_class))
    
    img_file = preprocess_input(np.array(img_file))
    return img_file, labels

In [7]:
def image_crop(indexlist):
    from keras.applications.densenet import preprocess_input
    from skimage import io

    img_file = []
    label = []
    crop_size = 512
    
    if indexlist == 'train_idx':
        df = train_df
    else:
        df = val_df
        
    for idx in range(len(df['fname'])):
        print(df['fname'][idx])
        img = io.imread(df['fname'][idx])
        
        if len(img.shape) == 3:
            h1, w1, _ = img.shape

            for i in range(4):
                hr = np.random.randint(0, h1-crop_size)
                wr = np.random.randint(0, w1-crop_size)
                tem_img = img[hr:hr+crop_size, wr:wr+crop_size, :]
                img_file.append(tem_img)
                label.append(df['class'][idx])

        
    from sklearn.preprocessing import LabelEncoder
    from keras.utils import to_categorical
    le = LabelEncoder().fit(n_class)
    labels = to_categorical(le.transform(label), num_classes=len(n_class))
    
    img_file = preprocess_input(np.array(img_file))
    return img_file, labels

In [None]:
# from sklearn.model_selection import StratifiedKFold
# n_fold = 5
# skf = StratifiedKFold(n_fold, shuffle=True, random_state=np.random)
# for train_idx, val_idx in skf.split(train_df['fname'], train_df['class']):
#     break

In [None]:
from sklearn.utils import class_weight

class_weight = class_weight.compute_class_weight('balanced', np.unique(train_df['class']), train_df['class'])

# CNN model

## DenseNet-201

In [None]:
import keras.backend as K
from keras.applications import densenet
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, Flatten, GlobalAveragePooling2D
from keras.callbacks import Callback, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.optimizers import Adam
import datetime

K.clear_session()

base_model = densenet.DenseNet201(include_top=False, weights='imagenet', input_shape=(512, 512, 3))
x = GlobalAveragePooling2D()(base_model.output)
x = Dense(10, activation='softmax')(x)

smodel = Model(inputs=base_model.input, outputs=x)
smodel.summary()

# Multimodel
if gdev_count > 1:
    model = multi_gpu_model(smodel, gdev_count)
    model.summary()
else:
    model = smodel
    
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Trainning

In [11]:
import random
train_idx = np.arange(len(train_df))
random.shuffle(train_idx)
val_idx = np.arange(len(val_df))
random.shuffle(val_idx)

X_train, y_train = image_crop(train_idx)
X_val, y_val = image_crop(val_idx)

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.
  if __name__ == '__main__':


https://3.img-dpreview.com/files/p/sample_galleries/5521548249/0593558516.jpg
https://3.img-dpreview.com/files/p/sample_galleries/5521548249/3675963866.jpg
https://2.img-dpreview.com/files/p/sample_galleries/5521548249/0312125868.jpg
https://2.img-dpreview.com/files/p/sample_galleries/5521548249/9025633101.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5521548249/4163184428.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5521548249/3813083218.jpg
https://2.img-dpreview.com/files/p/sample_galleries/5521548249/3928181352.jpg
https://3.img-dpreview.com/files/p/sample_galleries/5521548249/4122247355.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5521548249/5752873473.jpg
https://2.img-dpreview.com/files/p/sample_galleries/5521548249/2725668255.jpg
https://4.img-dpreview.com/files/p/sample_galleries/5521548249/5123862399.jpg
https://3.img-dpreview.com/files/p/sample_galleries/5521548249/6078095745.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5521548249/7

https://s.blogcdn.com/slideshows/images/slides/366/800/4/S3668004/slug/l/img-20151017-122730-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/800/5/S3668005/slug/l/img-20151017-122828-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/800/6/S3668006/slug/l/img-20151017-122846-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/800/7/S3668007/slug/l/img-20151017-122907-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/800/8/S3668008/slug/l/img-20151017-122922-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/800/9/S3668009/slug/l/img-20151017-123434-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/801/0/S3668010/slug/l/img-20151017-124105-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/801/1/S3668011/slug/l/img-20151017-125059-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/801/2/S3668012/slug/l/img-20151017-133233-1.jpg
https://s.blogcdn.com/slideshows/images/slides/366/801/3/S3668013/slug/l/img-20151017-133239-1.jpg
https://s.

https://images.anandtech.com/galleries/1495/23-4S-LL.jpg
https://images.anandtech.com/galleries/1495/24-4S-LL.jpg
https://images.anandtech.com/galleries/1495/25-4S.jpg
https://images.anandtech.com/galleries/1495/26-4S.jpg
https://images.anandtech.com/galleries/1495/27-4S.jpg
https://images.anandtech.com/galleries/1495/28-4S.jpg
https://images.anandtech.com/galleries/1495/29-4S.jpg
https://images.anandtech.com/galleries/1495/30-4S.jpg
https://images.anandtech.com/galleries/1495/iPhone%204S.JPG
https://2.img-dpreview.com/files/p/sample_galleries/5563556166/3958537665.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5563556166/7040276174.jpg
https://4.img-dpreview.com/files/p/sample_galleries/5563556166/7899519935.jpg
https://1.img-dpreview.com/files/p/sample_galleries/5563556166/7421691743.jpg
https://4.img-dpreview.com/files/p/sample_galleries/5563556166/8671492354.jpg
https://3.img-dpreview.com/files/p/sample_galleries/5563556166/9140533128.jpg
https://3.img-dpreview.com/files/p

KeyboardInterrupt: 

In [None]:
train_gen = ImageDataGenerator(horizontal_flip=True, 
                               vertical_flip=True)

In [None]:
model.load_weights('ieeev3-irv2-36-0.0800.hdf5')

model_checkpoint = ModelCheckpoint('ieeev3-irv2cw-{epoch:02d}-{val_loss:.4f}.hdf5',
                                   monitor='val_loss', save_best_only=True, save_weights_only=True)

model_earlystop = EarlyStopping(patience=32, monitor='val_loss')

batch_size = 64 * gdev_count

train_history = model.fit_generator(train_gen.flow(X_train, y_train, batch_size), 
                                    epochs=2**8, steps_per_epoch=2**11, 
                                    validation_data=train_gen.flow(X_val, y_val, batch_size), 
                                    validation_steps=len(),
                                    verbose=2, callbacks=[model_checkpoint, model_earlystop])

# Prediction

In [None]:
testpath = os.path.join(rootpath, 'test')
submitpath = '/media/share/jiaxin_cmu/kaggle/ieee_camera/submition/'

submit_df = pd.read_csv(rootpath + '/sample_submission.csv')
    
model.load_weights('ieeev3-irv2-39-0.0715.hdf5')

img_file = []
for idx in submit_df['fname']:
    img = plt.imread(testpath + '/' + idx)

prediction = model.predict(np.array(img_file))

In [None]:
# prediction = [0]

# for ii in range(4):
#     prediction = np.add(model.predict(np.array(img_file[ii*len(submit_df):(ii+1)*len(submit_df)])), prediction)

In [None]:
# def mp_predic():
#     img_file = []
#     for idx in submit_df['fname']:
#         img = plt.imread(testpath + '/' + idx)

#         hr = np.random.randint(0, 256)
#         wr = np.random.randint(0, 256)
#         tem_img = img[hr:hr+256, wr:wr+256, :]
#         img_file.append(tem_img / 255.)
        
#     predPoolion = model.predict(np.array(img_file))
        
#     return prediction

# with Pool(cpu_count()) as pool:
#     prediction = pool.map(mp_idx, range(128))
    
# prediction = np.add(model.predict(np.array(img_file)), prediction)

In [None]:
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder().fit(n_class)
predict_class = le.inverse_transform(prediction.argmax(axis=-1))

In [None]:
submit_df['camera'] = predict_class
submit_df.to_csv(submitpath + '/' + 'submit07.csv', index=False)

In [None]:
urlpath = '/media/share/data/kaggle/ieee-camera/flickr_images/iphone_4s/'
urls_final = pd.read_csv(urlpath + 'urls_final', header=None)
urls_final.columns = ['url']

In [None]:
from skimage import io
image = io.imread(urls_final['url'].iloc[1])

In [None]:
urls_final['url'].iloc[1]

In [None]:
plt.imshow(image)