## Importing Data and Libraries

In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 5GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import gc
from matplotlib import pyplot
from matplotlib.image import imread
import matplotlib.pyplot as plt
import cv2
from tensorflow import keras
from tqdm import tqdm
from sklearn.model_selection import train_test_split

from keras import optimizers
from keras.models import Sequential , Model
from keras.layers import Input , Dense , Dropout , Flatten
from keras.layers import Conv2D,MaxPooling2D , BatchNormalization
from keras.callbacks import EarlyStopping,ModelCheckpoint 
from keras.preprocessing.image import ImageDataGenerator
from keras import backend as K


In [3]:
# Defining the fbeta metric
def fbeta(y_true, y_pred, threshold_shift=0):
    beta = 2
    
    y_pred = K.clip(y_pred, 0, 1)
 
    
    y_pred_bin = K.round(y_pred + threshold_shift)
 
    tp = K.sum(K.round(y_true * y_pred_bin)) + K.epsilon()
    fp = K.sum(K.round(K.clip(y_pred_bin - y_true, 0, 1)))
    fn = K.sum(K.round(K.clip(y_true - y_pred, 0, 1)))
 
    precision = tp / (tp + fp)
    recall = tp / (tp + fn)
 
    beta_squared = beta ** 2
    return (beta_squared + 1) * (precision * recall) / (beta_squared * precision + recall + K.epsilon())

## Exploratory data analysis

In [4]:
# viewing the images in the dataset
plt.figure(figsize=(20,20))

folder = '../input/planets-dataset/planet/planet/train-jpg/'

for i in range(9):
    pyplot.subplot(330 + 1 + i)
    filename = folder + 'train_' + str(i) + '.jpg'
    image = imread(filename)
    pyplot.imshow(image)
pyplot.show()

In [5]:
# loading the training and test datasets
df_train_data = pd.read_csv("../input/planets-dataset/planet/planet/train_classes.csv" )
df_test_data = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
df_train_data


In [6]:
# Flatten the tags column of the training dataset into a list
flatten = lambda l: [item for sublist in l for item in sublist]
labels = list(set(flatten([l.split (' ') for l in df_train_data ['tags'].values])))

In [7]:
# getting a dictionary of the test label
label_map = {l: i for i, l in enumerate(labels)}
inv_label_map = {i: l for l, i in label_map.items()}
label_map

In [8]:
gc.collect()

In [9]:
# Reading in the training image dataset
x_train= []
y_train= []
for img, label in tqdm(df_train_data.values, miniters = 1000):
  target = np.zeros(17)
  
  for tag in label.split(' '):
    target[label_map[tag]]=1
  
  # Reshaping and assigning to arbitrary variables
  x_train.append(cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}.jpg'.format(img)), (64,64)))
  y_train.append(target)

In [10]:
# checking the length of the training set
len(x_train)

In [11]:
gc.collect()

In [12]:
# Reading in the test image dataset and merge the test_additional
x_test = []
 
for img, label in tqdm(df_test_data[0:40669].values, miniters=1000):
    fil = cv2.resize(cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(img)), (64, 64))
    x_test.append(fil)
 
for img, label in tqdm(df_test_data[40669:].values, miniters=1000):
    fil = cv2.resize(cv2.imread('../input/planets-dataset/test-jpg-additional/test-jpg-additional/{}.jpg'.format(img)), (64, 64))
    x_test.append(fil)

In [13]:
# checking the length of the test set
len(x_test)

In [14]:
gc.collect()

In [15]:
x_train = np.array(x_train, np.float16)/255.
y_train = np.array(y_train, np.uint8)
x_test = np.array(x_test, np.float16)/255.



In [16]:
# Train Test Split
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size = 0.2, shuffle = True, random_state = 1)

print(x_train.shape, y_train.shape, x_val.shape, y_val.shape)

In [17]:
gc.collect()

# Building Model Architecture

In [18]:
input_size = 64
input_channels = 3
 

model = Sequential()

model.add(BatchNormalization(input_shape=(input_size, input_size, input_channels)))

model.add(Conv2D(32, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
 
model.add(Conv2D(128, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
 
model.add(Conv2D(256, kernel_size=(3, 3), padding='same', activation='relu'))
model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))

model.add(Flatten())

model.add(Dense(512, activation='relu'))
model .add(BatchNormalization())
model.add(Dropout(0.5))

# Output layer
model.add(Dense(17, activation='sigmoid'))

Loading Pre-trained CNN Architectures

In [19]:
import tensorflow.keras as keras
from tensorflow.keras.applications.vgg16 import VGG16


# Extract the pre - trained architecture
base_model = VGG16(input_shape =(input_size,input_size,3),include_top =False,weights ='imagenet')
base_model.summary()

x = base_model.output
x = Flatten()(x)
x = Dense (512 , activation ='relu')(x)

predictions = Dense (17 , activation ='sigmoid')(x)
model = Model(inputs= base_model.input,outputs = predictions)

In [20]:
gc.collect()

## Model Training

In [21]:
# Implementing ImageDataGenerator for data augmentation.
datagen = ImageDataGenerator(horizontal_flip =True, vertical_flip =True, zoom_range =0.2,
                             rotation_range =90,fill_mode ='reflect')

In [22]:
# Defining other parameters
epochs=20
opt = keras.optimizers.Adam(learning_rate=0.0001)


# Compiling the model
model.compile(loss='binary_crossentropy',optimizer=opt,metrics=[fbeta])
callbacks = [EarlyStopping(monitor='val_loss', patience=2, verbose=0)]

In [23]:
gc.collect()

In [25]:
# We fit our model now. The code below fits the model while generating extra images due to the Imagedatagenerator and fitting them on the fly!
model.fit_generator(datagen.flow(x_train, y_train, batch_size = 24),
                    steps_per_epoch = len(x_train) / 32,
                    validation_data = datagen.flow(x_val, y_val, batch_size = 24),
                    validation_steps = len(x_val) / 32, epochs = epochs,
                    callbacks = callbacks, verbose =1)

In [26]:
gc.collect()

In [27]:
test_1 =[]
test_1.append(model.predict(x_test, batch_size = 128, verbose =2))

In [28]:
gc.collect()

In [29]:
# compiling the results in a pandas dataframe form
result = np.array(test_1[0])
for i in range(1,len(test_1)):
    result += np.array(test_1)
result = pd.DataFrame(result, columns = labels)

In [30]:
result

In [31]:
preds = []
for i in tqdm(range(result.shape[0]), miniters=1000):
    a = result.loc[[i]]
    a = a.apply(lambda x: x > 0.2, axis=1)
    a = a.transpose()
    a = a.loc[a[i] == True]
    ' '.join(list(a.index))
    preds.append(' '.join(list(a.index)))    

In [32]:
# converting the predictions to a dataframe
planet_result_csv = pd.DataFrame({'image_name': df_test_data['image_name'], 'tags': preds})
planet_result_csv

In [33]:
# Saving result to a csv
planet_result_csv.to_csv('planet_result.csv', index = False)