In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import cv2
import os
import sys
import gc
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
train_classes = pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')
train_classes.head()

In [None]:
gc.collect()

In [None]:
!pip install tensorflow-gpu==1.15

In [None]:
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array

In [None]:
class_labels = set()
for i in train_classes['tags'].values:
    class_labels.update(i.split(' '))

In [None]:
class_labels

In [None]:
for label in class_labels:
    train_classes[label] = train_classes['tags'].apply(lambda x: 1 if label in x.split(' ') else 0)

train_classes["image_name"] = train_classes["image_name"] + ".jpg"

train_classes.head()

In [None]:
from sklearn.metrics import fbeta_score
from tensorflow.keras import backend
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.optimizers import SGD
from tensorflow.keras import backend
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dropout

In [None]:
image_gen = ImageDataGenerator(rescale=1./255)

# loading images from dataframe
X = image_gen.flow_from_dataframe(dataframe=train_classes,
        directory='/kaggle/input/planets-dataset/planet/planet/train-jpg/', x_col='image_name', y_col=list(train_classes.columns[2:]),
       target_size=(128, 128), class_mode='raw', seed=1, batch_size=128)

In [None]:
train_image_generator = ImageDataGenerator(rescale=1/255,validation_split=0.1,
                                          rotation_range = 180, horizontal_flip = True)

train_generator = train_image_generator.flow_from_dataframe(dataframe=train_classes,
        directory='../input/planets-dataset/planet/planet/train-jpg/', x_col='image_name', y_col=list(train_classes.columns[2:]),
       target_size=(128, 128), class_mode='raw', seed=1, batch_size=128, subset='training')

In [None]:
validation_generator = train_image_generator.flow_from_dataframe(dataframe=train_classes,
        directory='../input/planets-dataset/planet/planet/train-jpg/', x_col='image_name', y_col=list(train_classes.columns[2:]),
       target_size=(128, 128), class_mode='raw', seed=0, batch_size=128, subset='validation')

In [None]:
train_step_size = int(np.ceil(train_generator.samples / train_generator.batch_size))

In [None]:
from tensorflow import keras

# Using Resnet50 model
in_shape = (128, 128, 3)
out_shape= 17

resnet50 = keras.applications.resnet50
conv_model = resnet50.ResNet50(weights='imagenet', include_top=False, input_shape=in_shape)
    
# add new classifier layers
class0 = Flatten()(conv_model.layers[-1].output)
class1 = Dense(128, activation='relu', kernel_initializer='he_uniform')(class0)
class2 = Dense(128, activation='relu', kernel_initializer='he_uniform')(class1)
predictions = Dense(out_shape, activation='sigmoid')(class2)

full_model = keras.models.Model(inputs=conv_model.input, outputs=predictions)
full_model.summary()

In [None]:
def fbeta(y_true , y_pred, beta=2, epsilon=1e-4):
    squared_beta = beta**2

    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.greater(tf.cast(y_pred, tf.float32), tf.constant(0.2)), tf.float32)
        
    tp = tf.reduce_sum(y_true * y_pred, axis=1)
    fp = tf.reduce_sum(y_pred, axis=1) - tp
    fn = tf.reduce_sum(y_true, axis=1) - tp
    
    p = tp / (tp + fp + epsilon)   #precision
    r = tp / (tp + fn + epsilon)   #recall
    
    fb = (1 + squared_beta) * p * r / (squared_beta * p + r + epsilon)
    return fb

In [None]:
opt = keras.optimizers.Adam(lr=0.0001)
full_model.compile(optimizer=opt, loss='binary_crossentropy', metrics=[fbeta])

In [None]:
from tensorflow.python.keras.callbacks import EarlyStopping, ModelCheckpoint

checkpoint = ModelCheckpoint(filepath='resnet50_model.hdf5',monitor='val_fbeta', 
                             save_best_only=True, save_weights_only=True, mode='max', verbose = 1)
early = EarlyStopping(monitor='val_fbeta', min_delta=0, patience=10, verbose=1, mode='max')

In [None]:
train_history = full_model.fit(train_generator, steps_per_epoch=train_step_size, 
                                  validation_data=validation_generator, validation_steps=train_step_size, 
                                   epochs=20,callbacks=[checkpoint,early])

In [None]:
sample_sub = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
sample_sub['image_name'] = sample_sub['image_name'].apply(lambda i: '{}.jpg'.format(i))
sample_sub.head()

In [None]:
test1 = sample_sub.iloc[:40669]['image_name'].reset_index().drop('index', axis=1)
test1.head()

In [None]:
test_image_generator1 = ImageDataGenerator(rescale=1./255)


test_generator1 = test_image_generator1.flow_from_dataframe(dataframe=test1,
            directory='../input/planets-dataset/planet/planet/test-jpg', x_col='image_name', y_col=None,
            batch_size=128, shuffle=False, class_mode=None, target_size=(128, 128))

test_step_size1 = int(np.ceil(test_generator1.samples / test_generator1.batch_size))

In [None]:
test_generator1.reset() 
prediction1 = model2.predict(test_generator1, steps=test_step_size1, verbose=1) 

In [None]:
filenames1 = test_generator1.filenames 
        
# converting the predictions of the first 40669 to tag names
predict_tags1 = pd.DataFrame(prediction1)
predict_tags1 = predict_tags1.apply(lambda x: ' '.join(np.array(label_list)[x > 0.2]), axis=1)

# converting the predictions of the first 40669 to a dataframe
result_df1 = pd.DataFrame({'image_name': filenames1, 'tags': predict_tags1})
result_df1.head()

In [None]:
test2 = sample_sub.iloc[40669:]['image_name'].reset_index().drop('index', axis=1)
test2.head()

In [None]:
test_image_generator2 = ImageDataGenerator(rescale=1./255)


test_generator2 = test_image_generator2.flow_from_dataframe(dataframe=test2, 
            directory='../input/planets-dataset/test-jpg-additional/test-jpg-additional', x_col='image_name', 
            y_col=None, batch_size=128, shuffle=False, class_mode=None, target_size=(128, 128))

test_step_size2 = int(np.ceil(test_generator2.samples / test_generator2.batch_size))

In [None]:
test_generator2.reset() 
prediction2 = model2.predict(test_generator2, steps=test_step_size2, verbose=1)

In [None]:
filenames2 = test_generator2.filenames 
        
# converting the predictions of the remaining images to tag names
predict_tags2 = pd.DataFrame(prediction2)
predict_tags2 = predict_tags2.apply(lambda x: ' '.join(np.array(label_list)[x > 0.2]), axis=1)

# converting the predictions of the remaining to a dataframe
result_df2 = pd.DataFrame({'image_name': filenames2, 'tags': predict_tags2})

In [None]:
final_result = pd.concat([result_df1, result_df2]) 
final_result = final_result.reset_index().drop('index', axis=1)

final_result.head()

In [None]:
final_result['image_name'] = final_result['image_name'].apply(lambda x: x[:-4])

In [None]:
final_result.to_csv('best_submission.csv', index=False)