In [1]:
!pip install natsort

Collecting natsort
  Downloading natsort-7.0.1-py3-none-any.whl (33 kB)
Installing collected packages: natsort
Successfully installed natsort-7.0.1
You should consider upgrading via the '/opt/conda/bin/python3.7 -m pip install --upgrade pip' command.[0m


In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_hub as hub
import os
import matplotlib.pyplot as plt
import glob
import cv2
from natsort import natsorted

%matplotlib inline

In [3]:
!ls /kaggle/input/

planet-understanding-the-amazon-from-space  planets-dataset


In [4]:
!ls /kaggle/input/planets-dataset/planet/planet/

sample_submission.csv  test-jpg  train-jpg  train_classes.csv


In [5]:
train_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/train_classes.csv')
train_df.head(10)

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road
5,train_5,haze primary water
6,train_6,agriculture clear cultivation primary water
7,train_7,haze primary
8,train_8,agriculture clear cultivation primary
9,train_9,agriculture clear cultivation primary road


In [6]:
train_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/train_classes.csv')
train_df.tail(10)

Unnamed: 0,image_name,tags
40469,train_40469,haze primary water
40470,train_40470,clear habitation primary road
40471,train_40471,clear primary
40472,train_40472,agriculture clear primary
40473,train_40473,agriculture clear habitation primary road
40474,train_40474,clear primary
40475,train_40475,cloudy
40476,train_40476,agriculture clear primary
40477,train_40477,agriculture clear primary road
40478,train_40478,agriculture cultivation partly_cloudy primary


In [7]:
sample_submission_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/sample_submission.csv')
sample_submission_df.head(10)

Unnamed: 0,image_name,tags
0,test_0,primary clear agriculture road water
1,test_1,primary clear agriculture road water
2,test_2,primary clear agriculture road water
3,test_3,primary clear agriculture road water
4,test_4,primary clear agriculture road water
5,test_5,primary clear agriculture road water
6,test_6,primary clear agriculture road water
7,test_7,primary clear agriculture road water
8,test_8,primary clear agriculture road water
9,test_9,primary clear agriculture road water


In [8]:
sample_submission_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/sample_submission.csv')
sample_submission_df.tail(10)

Unnamed: 0,image_name,tags
61181,file_9990,primary clear agriculture road water
61182,file_9991,primary clear agriculture road water
61183,file_9992,primary clear agriculture road water
61184,file_9993,primary clear agriculture road water
61185,file_9994,primary clear agriculture road water
61186,file_9995,primary clear agriculture road water
61187,file_9996,primary clear agriculture road water
61188,file_9997,primary clear agriculture road water
61189,file_9998,primary clear agriculture road water
61190,file_9999,primary clear agriculture road water


In [9]:
!ls /kaggle/input/planet-understanding-the-amazon-from-space/

Kaggle-planet-test-tif.torrent	 sample_submission_v2.csv  train_v2.csv
Kaggle-planet-train-tif.torrent  test_v2_file_mapping.csv


In [10]:
label_list = []
for tag_str in train_df.tags.values:
    labels = tag_str.split(' ')
    for label in labels:
        if label not in label_list:
            label_list.append(label)

In [11]:
for label in label_list:
    train_df[label] = train_df['tags'].apply(lambda x: 1 if label in x.split(' ') else 0)

train_df.sample(10)

Unnamed: 0,image_name,tags,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
17825,train_17825,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
6918,train_6918,agriculture clear cultivation primary water,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0
25031,train_25031,partly_cloudy primary,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
10447,train_10447,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
35341,train_35341,agriculture clear primary,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
38242,train_38242,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
20798,train_20798,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
22680,train_22680,agriculture clear primary water,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
5263,train_5263,clear habitation primary,0,1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0
31876,train_31876,haze primary,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0


In [12]:
target_list = train_df.drop(columns=['image_name','tags']).columns.tolist()
target_list

['haze',
 'primary',
 'agriculture',
 'clear',
 'water',
 'habitation',
 'road',
 'cultivation',
 'slash_burn',
 'cloudy',
 'partly_cloudy',
 'conventional_mine',
 'bare_ground',
 'artisinal_mine',
 'blooming',
 'selective_logging',
 'blow_down']

In [13]:
train_df['image_file'] = train_df['image_name']+'.jpg'
train_df.head(10)

Unnamed: 0,image_name,tags,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down,image_file
0,train_0,haze primary,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,train_0.jpg
1,train_1,agriculture clear primary water,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,train_1.jpg
2,train_2,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,train_2.jpg
3,train_3,clear primary,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,train_3.jpg
4,train_4,agriculture clear habitation primary road,0,1,1,1,0,1,1,0,0,0,0,0,0,0,0,0,0,train_4.jpg
5,train_5,haze primary water,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,train_5.jpg
6,train_6,agriculture clear cultivation primary water,0,1,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,train_6.jpg
7,train_7,haze primary,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,train_7.jpg
8,train_8,agriculture clear cultivation primary,0,1,1,1,0,0,0,1,0,0,0,0,0,0,0,0,0,train_8.jpg
9,train_9,agriculture clear cultivation primary road,0,1,1,1,0,0,1,1,0,0,0,0,0,0,0,0,0,train_9.jpg


In [14]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40479 entries, 0 to 40478
Data columns (total 20 columns):
 #   Column             Non-Null Count  Dtype 
---  ------             --------------  ----- 
 0   image_name         40479 non-null  object
 1   tags               40479 non-null  object
 2   haze               40479 non-null  int64 
 3   primary            40479 non-null  int64 
 4   agriculture        40479 non-null  int64 
 5   clear              40479 non-null  int64 
 6   water              40479 non-null  int64 
 7   habitation         40479 non-null  int64 
 8   road               40479 non-null  int64 
 9   cultivation        40479 non-null  int64 
 10  slash_burn         40479 non-null  int64 
 11  cloudy             40479 non-null  int64 
 12  partly_cloudy      40479 non-null  int64 
 13  conventional_mine  40479 non-null  int64 
 14  bare_ground        40479 non-null  int64 
 15  artisinal_mine     40479 non-null  int64 
 16  blooming           40479 non-null  int64

In [15]:
training_df = train_df[:int(len(train_df)*0.7)]
validation_df = train_df[int(len(train_df)*0.7):int(len(train_df)*0.9)]
synth_test_df = train_df[int(len(train_df)*0.9):]

In [16]:
len(training_df), len(validation_df), len(synth_test_df)

(28335, 8096, 4048)

In [17]:
len(train_df), (len(training_df) + len(validation_df) + len(synth_test_df))

(40479, 40479)

In [18]:
batch_size = 64

image_gen_train = ImageDataGenerator(rescale=1./255, 
                                     rotation_range=45, 
                                     width_shift_range=.15, 
                                     height_shift_range=.15, 
                                     horizontal_flip=True, 
                                     vertical_flip=True,
                                     zoom_range=0.35
                                    )


train_data_gen = image_gen_train.flow_from_dataframe(training_df, 
                                                     directory='/kaggle/input/planets-dataset/planet/planet/train-jpg', 
                                                     x_col='image_file', 
                                                     y_col=target_list, 
                                                     class_mode='raw', 
                                                     target_size=(256,256),
                                                     batch_size=batch_size)

Found 28335 validated image filenames.


In [19]:
image_gen_val = ImageDataGenerator(rescale=1./255)

val_data_gen = image_gen_val.flow_from_dataframe(validation_df, 
                                                   directory='/kaggle/input/planets-dataset/planet/planet/train-jpg', 
                                                   x_col='image_file', 
                                                   y_col=target_list,
                                                   class_mode='raw', 
                                                   shuffle=False,
                                                   target_size=(256,256),
                                                   batch_size=batch_size)

Found 8096 validated image filenames.


In [20]:
image_gen_synth_test = ImageDataGenerator(rescale=1./255)

synth_test_data_gen = image_gen_synth_test.flow_from_dataframe(synth_test_df, 
                                                   directory='/kaggle/input/planets-dataset/planet/planet/train-jpg', 
                                                   x_col='image_file', 
                                                   class_mode=None, 
                                                   shuffle=False,
                                                   target_size=(256,256),
                                                   batch_size=1)

Found 4048 validated image filenames.


data_augmentation_layer = Sequential(
[
    tf.keras.layers.experimental.preprocessing.Rescaling(scale=1./255),
    tf.keras.layers.experimental.preprocessing.RandomFlip(),
    tf.keras.layers.experimental.preprocessing.RandomWidth(.15),
    tf.keras.layers.experimental.preprocessing.RandomHeight(.15),
    tf.keras.layers.experimental.preprocessing.RandomZoom(.35),
    tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
])

In [21]:
transfer_learning_layer = tf.keras.applications.VGG16(include_top=False, 
                                                      weights='imagenet', 
                                                      input_shape=(256,256,3)
                                                     )

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


In [36]:
URL = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/feature_vector/2"
mobile_net_layer = hub.KerasLayer(URL,
                                   input_shape=(256, 256,3))

In [22]:
transfer_learning_layer.trainable = False

In [40]:
model = ''

In [41]:
model = Sequential([
    transfer_learning_layer,
    Flatten(),
    Dropout(0.2),
    Dense(17, activation='sigmoid')
])

In [42]:
model.compile(optimizer='Adam', 
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False), 
              metrics=['accuracy']
             )

In [None]:
epochs = 2
history = model.fit(train_data_gen, 
                    steps_per_epoch=int(np.ceil(len(training_df)/batch_size)),
                    epochs=epochs, 
                    validation_data=val_data_gen,
                    validation_steps=int(np.ceil(len(validation_df)/batch_size)))

Epoch 1/2
Epoch 2/2

In [28]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
activation (Activation)      (None, 256, 256, 32)      0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 254, 254, 32)      9248      
_________________________________________________________________
activation_1 (Activation)    (None, 254, 254, 32)      0         
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 127, 127, 32)      0         
_________________________________________________________________
dropout (Dropout)            (None, 127, 127, 32)      0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 127, 127, 64)      1

In [33]:
synth_test_data_gen.reset()
synth_test_pred = model.predict_generator(synth_test_data_gen, verbose=1)



In [34]:
synth_test_pred_bool = (synth_test_pred >0.5)

In [35]:
synth_test_predictions = synth_test_pred_bool.astype(int)
columns=target_list
#columns should be the same order of y_col
synth_test_results=pd.DataFrame(synth_test_predictions, columns=columns)
synth_test_results["image_name"]=synth_test_data_gen.filenames
ordered_cols=["image_name"]+columns
synth_test_results=synth_test_results[ordered_cols]#To get the same column order
synth_test_results

Unnamed: 0,Filenames,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,train_36431.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,train_36432.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,train_36433.jpg,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,train_36434.jpg,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0
4,train_36435.jpg,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4043,train_40474.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4044,train_40475.jpg,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4045,train_40476.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4046,train_40477.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0


In [36]:
from sklearn.metrics import accuracy_score

In [46]:
synth_test_labels = train_df[int(len(train_df)*0.9):][target_list]
synth_test_labels

Unnamed: 0,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
36431,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
36432,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
36433,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0
36434,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0
36435,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40474,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40475,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0
40476,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40477,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0


In [47]:
synth_test_results[target_list]

Unnamed: 0,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0
3,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0
4,0,1,1,1,1,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4043,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4044,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
4045,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
4046,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0


In [48]:
accuracy_score(synth_test_labels, synth_test_results[target_list])

0.4523221343873518

In [70]:
synth_test_labels['tags'] = ''

for col_name in synth_test_labels.columns:
    synth_test_labels.loc[synth_test_labels[col_name]==1,'tags'] = synth_test_labels['tags']+' '+col_name
synth_test_labels

Unnamed: 0,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down,tags
36431,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
36432,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
36433,0,1,1,0,0,0,1,0,0,0,1,0,0,0,0,0,0,primary agriculture road partly_cloudy
36434,0,1,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,primary clear water habitation
36435,0,1,0,1,1,0,1,0,0,0,0,0,0,0,0,0,0,primary clear water road
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40474,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
40475,0,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,cloudy
40476,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary agriculture clear
40477,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,primary agriculture clear road


In [71]:
synth_test_labels['tags']

36431                                     primary clear
36432                                     primary clear
36433            primary agriculture road partly_cloudy
36434                    primary clear water habitation
36435                          primary clear water road
                              ...                      
40474                                     primary clear
40475                                            cloudy
40476                         primary agriculture clear
40477                    primary agriculture clear road
40478     primary agriculture cultivation partly_cloudy
Name: tags, Length: 4048, dtype: object

In [72]:
import glob

In [73]:
test_images_list = []

for filename in glob.glob('../input/planets-dataset/planet/planet/test-jpg'+'/*.jpg'):
    test_images_list.append(os.path.basename(filename))

test_df = pd.DataFrame(test_images_list, columns=['filename'])
test_df

Unnamed: 0,filename
0,test_29545.jpg
1,test_28912.jpg
2,test_28187.jpg
3,test_17633.jpg
4,test_894.jpg
...,...
40664,test_36590.jpg
40665,test_17506.jpg
40666,test_4928.jpg
40667,test_15479.jpg


In [74]:
additional_test_images = []

for filename in glob.glob('../input/planets-dataset/test-jpg-additional/test-jpg-additional'+'/*.jpg'):
    additional_test_images.append(os.path.basename(filename))

add_test_df = pd.DataFrame(additional_test_images, columns=['filename'])
add_test_df 

Unnamed: 0,filename
0,file_10472.jpg
1,file_19653.jpg
2,file_19865.jpg
3,file_2043.jpg
4,file_3113.jpg
...,...
20517,file_6512.jpg
20518,file_3874.jpg
20519,file_9618.jpg
20520,file_20149.jpg


In [75]:
test_df_full = test_df.append(add_test_df, ignore_index=True)
test_df_full

Unnamed: 0,filename
0,test_29545.jpg
1,test_28912.jpg
2,test_28187.jpg
3,test_17633.jpg
4,test_894.jpg
...,...
61186,file_6512.jpg
61187,file_3874.jpg
61188,file_9618.jpg
61189,file_20149.jpg


In [76]:
image_gen_test = ImageDataGenerator(rescale=1./255)

test_data_gen_1 = image_gen_test.flow_from_dataframe(test_df, 
                                                   directory='../input/planets-dataset/planet/planet/test-jpg', 
                                                   x_col='filename', 
                                                   class_mode=None, 
                                                   shuffle=False,
                                                   target_size=(256,256),
                                                   batch_size=1)

test_data_gen_2 = image_gen_test.flow_from_dataframe(add_test_df, 
                                                   directory='../input/planets-dataset/test-jpg-additional/test-jpg-additional', 
                                                   x_col='filename', 
                                                   class_mode=None, 
                                                   shuffle=False,
                                                   target_size=(256,256),
                                                   batch_size=1)

Found 40669 validated image filenames.
Found 20522 validated image filenames.


In [77]:
test_data_gen_1.reset()
pred_1 = model.predict_generator(test_data_gen_1, verbose=1)



In [92]:
pred_bool = (pred_1 >0.5)

In [94]:
predictions = pred_bool.astype(int)
columns=target_list
results=pd.DataFrame(predictions, columns=columns)
results["image_name"]=test_data_gen_1.filenames
ordered_cols=["image_name"]+columns
results=results[ordered_cols]
results

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,test_29545.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
1,test_28912.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,test_28187.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,test_17633.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,test_894.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40664,test_36590.jpg,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
40665,test_17506.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40666,test_4928.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40667,test_15479.jpg,0,1,0,1,1,0,0,0,0,0,0,0,0,0,0,0,0


In [133]:
results.image_name = results.image_name.astype('category')

In [135]:
results.image_name.cat.reorder_categories(natsorted(results.image_name), inplace=True, ordered=True)

In [138]:
results = results.sort_values(by='image_name', ignore_index=True)
results

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,test_0.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,test_1.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,test_2.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,test_3.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,test_4.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40664,test_40664.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40665,test_40665.jpg,0,1,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0
40666,test_40666.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0
40667,test_40667.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0


In [80]:
test_data_gen_2.reset()
pred_2 = model.predict_generator(test_data_gen_2, verbose=1)



In [143]:
pred_bool_2 = (pred_2 >0.5)
predictions_2 = pred_bool_2.astype(int)
results_2=pd.DataFrame(predictions_2, columns=columns)
results_2["image_name"]=test_data_gen_2.filenames
results_2=results_2[ordered_cols]
results_2

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,file_10472.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,file_19653.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,file_19865.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,file_2043.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,file_3113.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20517,file_6512.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
20518,file_3874.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
20519,file_9618.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
20520,file_20149.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [144]:
results_2.image_name = results_2.image_name.astype('category')

In [145]:
results_2.image_name.cat.reorder_categories(natsorted(results_2.image_name), inplace=True, ordered=True)

In [146]:
results_2 = results_2.sort_values(by='image_name', ignore_index=True)
results_2

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,file_0.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,file_1.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
2,file_2.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
3,file_3.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,file_4.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
20517,file_20517.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
20518,file_20518.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
20519,file_20519.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
20520,file_20520.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [147]:
submission_df = pd.concat([results, results_2], ignore_index=True, copy=False)
submission_df

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down
0,test_0.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
1,test_1.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
2,test_2.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
3,test_3.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
4,test_4.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61186,file_20517.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0
61187,file_20518.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0
61188,file_20519.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0
61189,file_20520.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0


In [148]:
submission_df['tags'] = ''

for col_name in submission_df[target_list].columns:
    submission_df.loc[submission_df[col_name]==1,'tags'] = submission_df['tags']+' '+col_name
submission_df

Unnamed: 0,image_name,haze,primary,agriculture,clear,water,habitation,road,cultivation,slash_burn,cloudy,partly_cloudy,conventional_mine,bare_ground,artisinal_mine,blooming,selective_logging,blow_down,tags
0,test_0.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
1,test_1.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
2,test_2.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,primary partly_cloudy
3,test_3.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
4,test_4.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,primary partly_cloudy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
61186,file_20517.jpg,0,1,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary clear
61187,file_20518.jpg,0,1,0,0,0,0,0,0,0,0,1,0,0,0,0,0,0,primary partly_cloudy
61188,file_20519.jpg,0,1,1,1,0,0,1,0,0,0,0,0,0,0,0,0,0,primary agriculture clear road
61189,file_20520.jpg,0,1,1,1,0,0,0,0,0,0,0,0,0,0,0,0,0,primary agriculture clear


In [149]:
submission_df = submission_df.drop(columns=target_list)
submission_df

Unnamed: 0,image_name,tags
0,test_0.jpg,primary clear
1,test_1.jpg,primary clear
2,test_2.jpg,primary partly_cloudy
3,test_3.jpg,primary clear
4,test_4.jpg,primary partly_cloudy
...,...,...
61186,file_20517.jpg,primary clear
61187,file_20518.jpg,primary partly_cloudy
61188,file_20519.jpg,primary agriculture clear road
61189,file_20520.jpg,primary agriculture clear


In [150]:
submission_df['image_name'] = submission_df['image_name'].apply(lambda x: x.strip('.jpg'))
submission_df

Unnamed: 0,image_name,tags
0,test_0,primary clear
1,test_1,primary clear
2,test_2,primary partly_cloudy
3,test_3,primary clear
4,test_4,primary partly_cloudy
...,...,...
61186,file_20517,primary clear
61187,file_20518,primary partly_cloudy
61188,file_20519,primary agriculture clear road
61189,file_20520,primary agriculture clear


In [128]:
!rm submission.csv

In [129]:
submission_df.to_csv('/kaggle/working/submission.csv', index=False)