In [1]:
from keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers
from tensorflow.keras import optimizers
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gc

In [2]:
gc.collect()

In [3]:
def append_ext(fn):
    return fn + ".jpg"

traindf = pd.read_csv('../input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv', dtype=str)
testdf = pd.read_csv("../input/planet-understanding-the-amazon-from-space/sample_submission_v2.csv/sample_submission_v2.csv", dtype=str)

traindf["image_name"] = traindf["image_name"].apply(append_ext)
testdf["image_name"] = testdf["image_name"].apply(append_ext)

datagen = ImageDataGenerator(rescale = 1./255.,
                             validation_split = 0.25,
                             rotation_range = 10,
                             width_shift_range = 0.2,
                             height_shift_range = 0.2,
                             zoom_range = 0.2,
                             horizontal_flip = True,
                             brightness_range = [0.2,1.2])

In [4]:
train_generator = datagen.flow_from_dataframe(
    dataframe = traindf,
    directory = '../input/amazonsatelliteimages/train-jpg/train-jpg',
    x_col = 'image_name',
    y_col = 'tags',
    subset = 'training',
    batch_size = 340,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (32, 32))

valid_generator = datagen.flow_from_dataframe(
    dataframe = traindf,
    directory = '../input/amazonsatelliteimages/train-jpg/train-jpg',
    x_col = 'image_name',
    y_col = 'tags',
    subset = 'validation',
    batch_size = 340,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (32, 32))

test_generator = datagen.flow_from_dataframe(
    dataframe = testdf,
    directory = '../input/amazonsatelliteimages/test-jpg/test-jpg',
    x_col = 'image_name',
    y_col = None,
    batch_size = 340,
    seed = 42,
    shuffle = False,
    class_mode = None,
    target_size = (32, 32))

In [None]:
test_generator = datagen.flow_from_dataframe(
    dataframe = testdf,
    directory = '../input/amazonsatelliteimages/test-jpg/test-jpg',
    x_col = 'image_name',
    y_col = None,
    batch_size = 340,
    seed = 42,
    shuffle = False,
    class_mode = None,
    target_size = (32, 32))

In [None]:
model = Sequential()

model.add(BatchNormalization(input_shape=(32, 32, 3)))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.25))

model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Conv2D(128, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))

model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Conv2D(256, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))

model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Conv2D(512, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2,2)))
# model.add(Dropout(0.5))

model.add(Flatten())
model.add(Dense(4096))
model.add(Dense(4096))
model.add(Activation('relu'))
# model.add(Dropout(0.5))

model.add(Dense(449, activation='softmax'))
opt = optimizers.RMSprop(learning_rate=0.0001)
model.compile(optimizer=opt,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [6]:
STEP_SIZE_TRAIN = train_generator.n//train_generator.batch_size
STEP_SIZE_VALID = valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST = test_generator.n//test_generator.batch_size

train = model.fit(train_generator,
          steps_per_epoch = STEP_SIZE_TRAIN,
          validation_data = valid_generator,
          validation_steps = STEP_SIZE_VALID,
          epochs=50)

In [8]:
plt.figure()
plt.plot(train.history['loss'], 'blue')
plt.plot(train.history['val_loss'], 'red')
plt.legend(['Training Loss', 'Validation Loss'])
plt.xlabel('Epochs')
plt.ylabel('Loss')

plt.figure()
plt.plot(train.history['accuracy'], 'blue')
plt.plot(train.history['val_accuracy'], 'red')
plt.legend(['Training Accuracy', 'Validation Accuracy'])
plt.xlabel('Epochs')
plt.ylabel('Accuracy')

In [12]:
model.evaluate(valid_generator, steps=STEP_SIZE_TEST)

#### making predictions

In [64]:
test_generator.reset()
pred = model.predict(test_generator, steps=STEP_SIZE_TEST, verbose=1)

mapping the predicted class indices/labels with the filenames

In [71]:
predicted_class_indices = np.argmax(pred, axis=1)

labels = (train_generator.class_indices)
labels = dict((v, k) for k, v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames = test_generator.filenames
predict = pd.DataFrame(predictions, columns=['tags'])
filename = pd.DataFrame(filenames, columns=['image_name'])[:40460]

In [86]:
# saving predictions to csv file
results = pd.concat([filename, predict], axis=1)
results.to_csv("./results.csv", index=False)

In [87]:
results.isnull().sum()