In [1]:
import numpy as np
import pandas as pd
import os

import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization

import cv2
from tqdm import tqdm

Using TensorFlow backend.


In [2]:
x_train = []
x_test = []
y_train = []

In [3]:
df_train = pd.read_csv('dataset/train_v2.csv')

flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split(' ') for l in df_train['tags'].values])))

label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}

In [4]:
print label_map

{'selective_logging': 16, 'cultivation': 8, 'clear': 1, 'habitation': 11, 'conventional_mine': 5, 'cloudy': 4, 'primary': 3, 'water': 6, 'haze': 7, 'slash_burn': 0, 'partly_cloudy': 9, 'artisinal_mine': 10, 'blooming': 2, 'bare_ground': 12, 'blow_down': 13, 'agriculture': 14, 'road': 15}


In [5]:
for f, tags in tqdm(df_train.values):
    img = cv2.imread('dataset/train-jpg/{}.jpg'.format(f))
    targets = np.zeros(17)
    for t in tags.split(' '):
        targets[label_map[t]] = 1 
    x_train.append(cv2.resize(img, (64, 64)))
    y_train.append(targets)
    
y_train = np.array(y_train, np.uint8)
x_train = np.array(x_train, np.float16)

100%|██████████| 40479/40479 [01:45<00:00, 381.99it/s]


In [6]:
print x_train.shape
print y_train.shape

(40479, 64, 64, 3)
(40479, 17)


In [7]:
split = 35000
x_train, x_test, y_train, y_test = x_train[:split], x_train[split:], y_train[:split], y_train[split:]

In [8]:
print x_train.shape
print x_test.shape
print y_train.shape
print y_test.shape

(35000, 64, 64, 3)
(5479, 64, 64, 3)
(35000, 17)
(5479, 17)


In [9]:
model = Sequential()
model.add(BatchNormalization(input_shape=(64, 64, 3)))
model.add(Conv2D(32, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(32, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
        
model.add(Conv2D(128, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
        
model.add(Conv2D(256, kernel_size=(3, 3),padding='same', activation='relu'))
model.add(Conv2D(256, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
        
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(BatchNormalization())
model.add(Dropout(0.5))

model.add(Dense(17, activation='sigmoid'))

In [10]:
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [11]:
model.fit(x_train, y_train,
          batch_size=128,
          epochs=4,
          verbose=1)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


<keras.callbacks.History at 0x113f16b10>

In [12]:
from sklearn.metrics import fbeta_score

predict = model.predict(x_test, batch_size=128)
print(fbeta_score(y_test, np.array(predict) > 0.2, beta=2, average='samples'))

0.715048060412
