In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
data=pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')
data.head()

In [3]:
labels=set()

In [4]:
def split_tags(t):
    [labels.add(tag) for tag in t.split()]

In [5]:
data2 = data.copy()

In [6]:
data2['tags'].apply(split_tags)
labels=list(labels)
labels

In [7]:
for tag in labels:
    data2[tag]=data2['tags'].apply(lambda x:1 if tag in x.split() else 0)

In [8]:
data2['image_name']=data2['image_name'].apply(lambda x:'{}.jpg'.format(x))
data2.head()

In [9]:
data2.columns[2:]

In [10]:
tgs = list(data2.columns[2:])
tgs

In [11]:
import tensorflow as tf
from tensorflow.keras.layers import Dense,BatchNormalization,Conv2D,MaxPooling2D,Dropout,Flatten
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam,SGD
from tensorflow.keras.callbacks import TensorBoard,EarlyStopping,ModelCheckpoint
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [12]:
def fscore_beta(y_true,y_pred,beta=2,epsilon=1e-4):
    
    beta_sqr=beta**2
    y_true=tf.cast(y_true,tf.float32)
    y_pred=tf.cast(tf.greater(tf.cast(y_pred,tf.float32),tf.constant(0.5)),tf.float32)
    
    tp=tf.reduce_sum(y_true*y_pred,axis=1)
    fp=tf.reduce_sum(y_pred,axis=1)-tp
    fn=tf.reduce_sum(y_true,axis=1)-tp
    
    pres = tp/(tp+fp+epsilon)
    recall = tp/(tp+fn+epsilon)
    
    fsb = ((1+beta_sqr)*pres*recall)/(beta_sqr*pres+recall+epsilon)
    
    return fsb

In [13]:
def accur(y_true,y_pred,epsilon=1e-4):
    y_true=tf.cast(y_true,tf.float32)
    y_pred=tf.cast(tf.greater(tf.cast(y_pred,tf.float32),tf.constant(0.5)),tf.float32)
    
    tp=tf.reduce_sum(y_true*y_pred,axis=1)
    fp=tf.reduce_sum(y_pred,axis=1)-tp
    fn=tf.reduce_sum(y_true,axis=1)-tp
    
    y_true=tf.cast(y_true,tf.bool)
    y_pred=tf.cast(y_pred,tf.bool)
    
    tn=tf.reduce_sum(tf.cast(tf.logical_not(y_true),tf.float32)*tf.cast(tf.logical_not(y_pred),tf.float32),axis=1)
    acc = (tp+tn)/(tp+tn+fp+fn+epsilon)
    
    return acc

In [14]:
def model_build():
    model=Sequential()
    
    model.add(BatchNormalization(input_shape=(128,128,3)))
    model.add(Conv2D(32,kernel_size=(3,3),padding='same',activation='relu'))
    model.add(Conv2D(32,kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(64,kernel_size=(3,3),padding='same',activation='relu'))
    model.add(Conv2D(64,kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(128,kernel_size=(3,3),padding='same',activation='relu'))
    model.add(Conv2D(128,kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Conv2D(256,kernel_size=(3,3),padding='same',activation='relu'))
    model.add(Conv2D(256,kernel_size=(3,3),activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(17, activation='sigmoid'))
    
    opt=Adam(lr=1e-4)
    
    model.compile(loss='binary_crossentropy',
                 optimizer=opt,
                 metrics=[accur,fscore_beta])
    
    return model

In [15]:
mcp=ModelCheckpoint(filepath = 'best_model.hdf5',
                   monitor = 'val_fbeta',
                   mode = 'max',
                   save_best_only = True,
                   save_weights_only = True)

In [16]:
image_gen=ImageDataGenerator(rescale=1/255,validation_split=0.2)
train_generator=image_gen.flow_from_dataframe(dataframe=data2,
                                             directory="../input/planets-dataset/planet/planet/train-jpg",
                                             x_col="image_name",
                                             y_col=tgs,
                                             subset="training",
                                             batch_size=16,
                                             seed=2021,
                                             shuffle=True,
                                             class_mode="raw",
                                             target_size=(128,128))

val_generator=image_gen.flow_from_dataframe(dataframe=data2,
                                           directory="../input/planets-dataset/planet/planet/train-jpg",
                                           x_col="image_name",
                                           y_col=tgs,
                                           subset="validation",
                                           batch_size=16,
                                           seed=2021,
                                           shuffle=True,
                                           class_mode="raw",
                                           target_size=(128,128))

In [17]:
step_train_size=int(np.ceil(train_generator.samples/train_generator.batch_size))
step_val_size=int(np.ceil(val_generator.samples/val_generator.batch_size))

In [18]:
model1=model_build()
model1.summary()

In [19]:
model1.fit(x=train_generator,
          steps_per_epoch=step_train_size,
          validation_data=val_generator,
          validation_steps=step_val_size,
          epochs=30,
          callbacks=[mcp])

In [20]:
! ls ./

In [21]:
model2=model_build()
model2.load_weights('best_model.hdf5')

In [22]:
sample_sub=pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
sample_sub1=sample_sub.copy()
sample_sub1['image_name']=sample_sub1['image_name'].apply(lambda x:'{}.jpg'.format(x))
sample_sub1.head()

In [23]:
import gc

gc.collect()

In [24]:
test1_df = sample_sub1.iloc[:40669]['image_name'].reset_index().drop('index', axis =1)
test1_df.head()

In [27]:
test_image_gen = ImageDataGenerator(rescale = 1/255)

test_generator1 = test_image_gen.flow_from_dataframe(dataframe=test1_df, 
                                                directory="../input/planets-dataset/planet/planet/test-jpg", 
                                                x_col="image_name", 
                                                y_col=None, 
                                                batch_size=16, 
                                                shuffle=False, 
                                                class_mode=None, 
                                                target_size=(128,128))

step_test_size1 = int(np.ceil(test_generator1.samples/test_generator1.batch_size))

In [28]:
test_generator1.reset()
pred1 = model2.predict(test_generator1, steps = step_test_size1, verbose = 1)

In [29]:
file_names1 = test_generator1.filenames

pred_tags1 = pd.DataFrame(pred1)
pred_tags1 = pred_tags1.apply(lambda x: ' '.join(np.array(labels)[x>0.5]), axis = 1)

In [30]:
result1 = pd.DataFrame({'image_name': file_names1, 'tags': pred_tags1})
result1.head()