In [2]:
import sys
from inception_v3 import InceptionV3
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras import backend as K

In [3]:
from read_data import Reader
class Config():
    batch_size = 16
    
    # path
    root_path = "../planet/"
    imgs_path = root_path + "train-jpg/"
    labels_file = root_path + "train_validation_v2_bin.csv"
    
    # iterations config
    max_iteration = 500
    summary_iters = 50
    valid_iters = 250
    usecols = range(1,18)

def random_batch_generator(config):
    reader = Reader(config)
    while True:
        batch_features, batch_labels = reader.random_batch()
        yield batch_features, batch_labels
def batch_generator(config):
    reader = Reader(config)
    while True:
        batch_features, batch_labels = reader.batch()
        yield batch_features, batch_labels

In [3]:

# create the base pre-trained model
base_model = InceptionV3(weights='imagenet', include_top=False)

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- 17 classes
predictions = Dense(17, activation='softmax')(x)

# this is the model we will train
print "init model"
model = Model(inputs=base_model.input, outputs=predictions)

# first: train only the top layers (which were randomly initialized)
# i.e. freeze all convolutional InceptionV3 layers
for layer in base_model.layers:
    layer.trainable = False

# compile the model (should be done *after* setting layers to non-trainable)
print "compile model"
model.compile(optimizer='rmsprop', loss='categorical_crossentropy')

print "fit model's new layer"
train_config = Config()
valid_config = Config()
valid_config.labels_file = valid_config.root_path +  "validation_train_v2_bin.csv"
# train the model on the new data for a few epochs
model.fit_generator(generator=random_batch_generator(train_config), steps_per_epoch=50, epochs=50)

# at this point, the top layers are well trained and we can start fine-tuning
# convolutional layers from inception V3. We will freeze the bottom N layers
# and train the remaining top layers.

# let's visualize layer names and layer indices to see how many layers
# we should freeze:
for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

# we chose to train the top 2 inception blocks, i.e. we will freeze
# the first 172 layers and unfreeze the rest:
for layer in model.layers[:172]:
    layer.trainable = False
for layer in model.layers[172:]:
    layer.trainable = True

# we need to recompile the model for these modifications to take effect
# we use SGD with a low learning rate
from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
model.fit_generator(generator=random_batch_generator(train_config), steps_per_epoch=50, epochs=20, 
                    validation_data=batch_generator(valid_config), validation_steps=np.int32(np.ceil(4048/float(valid_config.batch_size))))

model.save("./model/inception_1.h5")

init model
compile model
fit model's new layer
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50

KeyboardInterrupt: 

In [4]:
#train again
from keras.models import load_model
model = load_model("./model/inception_1.h5")

# only train full connection layer
for layer in model.layers:
    layer.trainable = False
for layer in model.layers[-2:]:
    layer.trainalbe = True

train_config = Config()
valid_config = Config()
valid_config.labels_file = valid_config.root_path +  "validation_train_v2_bin.csv"
# train the model on the new data for a few epochs
model.fit_generator(generator=random_batch_generator(train_config), steps_per_epoch=50, epochs=100)

for i, layer in enumerate(base_model.layers):
    print(i, layer.name)

for layer in model.layers[:172]:
    layer.trainable = False
for layer in model.layers[172:]:
    layer.trainable = True

from keras.optimizers import SGD
model.compile(optimizer=SGD(lr=0.0001, momentum=0.9), loss='categorical_crossentropy')

model.fit_generator(generator=random_batch_generator(train_config), steps_per_epoch=50, epochs=50, 
                    validation_data=batch_generator(valid_config), validation_steps=np.int32(np.ceil(4048/float(valid_config.batch_size))))

model.save("./model/inception_1.h5")

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
 4/50 [=>............................] - ETA: 7s - loss: 4.2770

KeyboardInterrupt: 

In [4]:
from keras.models import load_model
import numpy as np
from fbeta_score import f2_score, optimise_f2_thresholds
from sklearn.metrics import accuracy_score
from read_data import Reader


# f2 score
model = load_model("./model/inception_1.h5")
valid_config = Config()
valid_config.labels_file = valid_config.root_path +  "validation_train_v2_bin.csv"
valid_config.batch_size = 32
valid_reader = Reader(valid_config)

labels = ['tags','agriculture', 'artisinal_mine', 'bare_ground', 'blooming', 'blow_down', 'clear',
              'cloudy', 'conventional_mine', 'cultivation', 'habitation', 'haze', 'partly_cloudy',
              'primary', 'road', 'selective_logging', 'slash_burn', 'water']
labels = np.array(labels)[valid_config.usecols].tolist()

valid_pred = []
valid_true_out = []
print "preding..."
for x in  xrange(np.int32(np.ceil(4048/float(valid_config.batch_size)))):
    valid_img, valid_label = valid_reader.batch()
    valid_prob = model.predict(valid_img)
    valid_pred = np.append(valid_pred, valid_prob)
    valid_true_out = np.append(valid_true_out, valid_label)

valid_pred = np.reshape(valid_pred,[-1, len(valid_config.usecols)])
valid_true_out = np.reshape(valid_true_out, [-1, len(valid_config.usecols)])

thres = optimise_f2_thresholds(valid_true_out, valid_pred)
valid_pred_out = np.zeros_like(valid_pred)
for i in range(len(valid_config.usecols)):
    valid_pred_out[:, i] = (valid_pred[:, i] > thres[i]).astype(np.int)
valid_f2_score = f2_score(valid_true_out, valid_pred_out)
print "valid f2 score:", valid_f2_score

for i in xrange(len(valid_config.usecols)):
    acy_score = accuracy_score(valid_true_out[:, i], valid_pred_out[:, i])
    print "acy_score:\t", labels[i], "\t", acy_score

preding...
(0, 0.0, 0.40716542182510396)
(1, 0.0, 0.40716542182510396)
(2, 0.0, 0.40716542182510396)
(3, 0.0, 0.40716542182510396)
(4, 0.0, 0.40716542182510396)
(5, 0.0, 0.6535971770292277)
(6, 0.0, 0.6535971770292277)
(7, 0.0, 0.6535971770292277)
(8, 0.0, 0.6535971770292277)
(9, 0.0, 0.6535971770292277)
(10, 0.0, 0.6535971770292277)
(11, 0.0, 0.6535971770292277)
(12, 0.0, 0.6535971770292277)
(13, 0.0, 0.6535971770292277)
(14, 0.0, 0.6535971770292277)
(15, 0.0, 0.6535971770292277)
(16, 0.0, 0.6535971770292277)
valid f2 score: 0.653597177029
acy_score:	agriculture 	0.693428853755
acy_score:	artisinal_mine 	0.990859683794
acy_score:	bare_ground 	0.978507905138
acy_score:	blooming 	0.993083003953
acy_score:	blow_down 	0.995800395257
acy_score:	clear 	0.711462450593
acy_score:	cloudy 	0.95652173913
acy_score:	conventional_mine 	0.996541501976
acy_score:	cultivation 	0.892786561265
acy_score:	habitation 	0.909584980237
acy_score:	haze 	0.937005928854
acy_score:	partly_cloudy 	0.817934782609

In [4]:
#predict
from keras.models import load_model
import numpy as np
from fbeta_score import f2_score, optimise_f2_thresholds
from sklearn.metrics import accuracy_score
from read_data import Reader
import csv

class Config():
    batch_size = 32
    # path
    root_path = "../planet/"
    imgs_path = root_path + "test-jpg/"
    labels_file = root_path + "sample_submission_v2.csv"
    usecols = range(1,18)
    number = 61191
# reload fine-tune model
model = load_model("./model/inception.h5")
# set config
config = Config()
reader = Reader(config)

labels = ['tags','agriculture', 'artisinal_mine', 'bare_ground', 'blooming', 'blow_down', 'clear',
              'cloudy', 'conventional_mine', 'cultivation', 'habitation', 'haze', 'partly_cloudy',
              'primary', 'road', 'selective_logging', 'slash_burn', 'water']
labels = np.array(labels)[config.usecols].tolist()

# load validation set
pred = []
valid_true_out = []
print "preding..."
iter_num = np.int32(np.ceil(config.number/float(config.batch_size)))
for x in  xrange(iter_num):
    img, label = reader.batch()
    # predict with preprogressing in reader
    prob = model.predict(img)
    pred = np.append(pred, prob)
    true_out = np.append(true_out, label)
    print x , '/' , iter_num 

pred = np.reshape(pred,[-1, len(config.usecols)])
true_out = np.reshape(true_out, [-1, len(config.usecols)])


thres = [0.04, 0.06, 0.03, 0.13, 0.01, 0.03, 0.06, 0.01, 0.05, 0.03, 0.08, 0.07, 0.05, 0.05, 0.09, 0.03, 0.06]
pred_out = np.zeros_like(pred)
# predict output
for i in range(len(config.usecols)):
    pred_out[:, i] = (pred[:, i] > thres[i]).astype(np.int)
    
print "writing data"
# writting data
with open('./result.csv', 'wb') as f:
    writer = csv.writer(f)
    writer.writerow(['image_name', 'tags'])
    for i in xrange(config.number):
        tag = ""
        for j in xrange(len(config.usecols)):
            if pred_out[i,j] == 1:
                tag = tag + synset[j] + ' '
                #if j == 6:
                    #break
        row = [imgnames[i][0]] + [tag[:-1]]
        writer.writerow(row)


array([[1, 2, 3]])