In [1]:
import numpy as np
import pandas as pd 
import tensorflow as tf

In [2]:
from numba import cuda 
device = cuda.get_current_device()
device.reset()

In [3]:
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'

In [4]:
gpu_devices = tf.config.list_physical_devices('GPU')
print(gpu_devices)
if gpu_devices:
  print(tf.config.experimental.get_memory_usage('GPU:0'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
Instructions for updating:
Use tf.config.experimental.get_memory_info(device)['current'] instead.
0


In [5]:
trainDF = pd.read_csv('train_v2.csv')
trainDF.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [53]:
submission= pd.read_csv('sample_submission_v2.csv')
submission['image_name'] = submission['image_name']+".jpg"
submission.head()

Unnamed: 0,image_name,tags
0,test_0.jpg,primary clear agriculture road water
1,test_1.jpg,primary clear agriculture road water
2,test_2.jpg,primary clear agriculture road water
3,test_3.jpg,primary clear agriculture road water
4,test_4.jpg,primary clear agriculture road water


In [7]:
parameters = set()

tags = trainDF['tags']
for i in tags:
    #print(i)
    sep = i.split()
    for j in sep:
        #print(j)
        parameters.add(j)
        
parameters = list(parameters)
parameters

['artisinal_mine',
 'blooming',
 'primary',
 'cultivation',
 'habitation',
 'cloudy',
 'road',
 'conventional_mine',
 'selective_logging',
 'partly_cloudy',
 'haze',
 'agriculture',
 'slash_burn',
 'clear',
 'blow_down',
 'water',
 'bare_ground']

In [8]:
for tag in parameters:
    trainDF[tag] = trainDF['tags'].apply(lambda x: 1 if tag in x.split() else 0)
    
trainDF['image_name'] = trainDF['image_name']+".jpg"
trainDF.head()

Unnamed: 0,image_name,tags,artisinal_mine,blooming,primary,cultivation,habitation,cloudy,road,conventional_mine,selective_logging,partly_cloudy,haze,agriculture,slash_burn,clear,blow_down,water,bare_ground
0,train_0.jpg,haze primary,0,0,1,0,0,0,0,0,0,0,1,0,0,0,0,0,0
1,train_1.jpg,agriculture clear primary water,0,0,1,0,0,0,0,0,0,0,0,1,0,1,0,1,0
2,train_2.jpg,clear primary,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
3,train_3.jpg,clear primary,0,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
4,train_4.jpg,agriculture clear habitation primary road,0,0,1,0,1,0,1,0,0,0,0,1,0,1,0,0,0


In [9]:
tagList = list(trainDF.columns[2:])

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Conv2D, MaxPool2D , Flatten, BatchNormalization, Dropout
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint

In [11]:
def fbeta(ytrue , ypred, beta=2, epsilon=1e-4):
    beta_squared = beta**2

    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
        
    tp = tf.reduce_sum(ytrue * ypred, axis=1)
    fp = tf.reduce_sum(ypred, axis=1) - tp
    fn = tf.reduce_sum(ytrue, axis=1) - tp
    
    precision = tp/(tp+fp+epsilon)
    recall = tp/(tp+fn+epsilon)
    
    fb = (1+beta_squared)*precision*recall / (beta_squared*precision + recall + epsilon)
    return fb

In [12]:
def multi_label_acc(ytrue , ypred, epsilon=1e-4):
    
    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
    
    tp = tf.reduce_sum(ytrue * ypred, axis=1)
    fp = tf.reduce_sum(ypred, axis=1) - tp
    fn = tf.reduce_sum(ytrue, axis=1) - tp
    
    ytrue = tf.cast(ytrue, tf.bool)
    ypred = tf.cast(ypred, tf.bool)
    
    tn = tf.reduce_sum(tf.cast(tf.logical_not(ytrue), tf.float32) * tf.cast(tf.logical_not(ypred), tf.float32),\
                       axis=1)
    
    return (tp+tn)/(tp+tn+fp+fn+epsilon)

In [13]:
def build_model():
    model = Sequential()
    model.add(Conv2D(filters=128, kernel_size=3, input_shape=(64, 64, 3), padding="same", activation="relu"))
    model.add(BatchNormalization())
    model.add(Conv2D(filters= 64,kernel_size= 3,activation='relu',padding='same'))
    model.add(MaxPool2D(pool_size=(2,2),strides=(2,2),padding='same'))
    model.add(Dropout(0.2))
    model.add(Flatten())
    model.add(Dense(units = 128,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(units = 64,activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(17, activation='sigmoid'))
    opt = Adam()
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=[multi_label_acc, fbeta])
    
    return model

In [14]:
modelCP = ModelCheckpoint(filepath='model.hdf5',
                          monitor='val_fbeta',
                          mode='max', 
                          save_best_only=True, 
                          save_weights_only=True)

In [15]:
imageGenerator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255,validation_split=0.25)

train_img = imageGenerator.flow_from_dataframe(dataframe=trainDF,
                                               directory='train-jpg/', 
                                               x_col='image_name', 
                                               y_col=tagList,
                                               target_size=(64, 64), 
                                               class_mode='raw', 
                                               seed=0, 
                                               batch_size=32, 
                                               subset='training')

val_img = imageGenerator.flow_from_dataframe(dataframe=trainDF, 
                                             directory='train-jpg/', 
                                             x_col='image_name', 
                                             y_col=tagList,
                                             target_size=(64, 64), 
                                             class_mode='raw', 
                                             seed=0, 
                                             batch_size=32, 
                                             subset='validation')

Found 30360 validated image filenames.
Found 10119 validated image filenames.


In [16]:
step_train_size = int(np.ceil(train_img.samples / train_img.batch_size))
step_val_size = int(np.ceil(val_img.samples / train_img.batch_size))

In [17]:
model = build_model() 

model.fit(x=train_img, 
          steps_per_epoch=step_train_size, 
          validation_data=val_img, 
          validation_steps=step_val_size,
          epochs=10, 
          callbacks=[modelCP])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2239d471fd0>

In [54]:
test_df = submission.iloc[:40669]['image_name'].reset_index().drop('index', axis=1)
test_df.head()

Unnamed: 0,image_name
0,test_0.jpg
1,test_1.jpg
2,test_2.jpg
3,test_3.jpg
4,test_4.jpg


In [44]:
testGenerator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1/255)

testImages = testGenerator.flow_from_dataframe(dataframe=test_df,
                                               directory='test-jpg/', 
                                               x_col='image_name',
                                               y_col = None,
                                               batch_size=32, 
                                               class_mode=None, 
                                               target_size=(64, 64))

testStepSize = int(np.ceil(testImages.samples / testImages.batch_size))

Found 40669 validated image filenames.


In [45]:
pred_Test = model.predict(testImages, steps=testStepSize, verbose=1)



In [69]:
testNames = testImages.filenames 
        
res1 = pd.DataFrame(pred_Test)
res1 = res1.apply(lambda x: ' '.join(np.array(parameters)[x > 0.4]), axis=1)

ans1 = pd.DataFrame({'image_name': testNames, 'tags': res1})
ans1.head()

Unnamed: 0,image_name,tags
0,test_0.jpg,primary partly_cloudy agriculture
1,test_1.jpg,primary clear
2,test_2.jpg,primary partly_cloudy
3,test_3.jpg,primary clear
4,test_4.jpg,primary clear


In [70]:
additional_df = submission.iloc[40669:]['image_name'].reset_index().drop('index', axis=1)
additional_df.head()

Unnamed: 0,image_name
0,file_0.jpg
1,file_1.jpg
2,file_10.jpg
3,file_100.jpg
4,file_1000.jpg


In [71]:
testAdditional = testGenerator.flow_from_dataframe(dataframe=additional_df,
                                                   directory='test-jpg-additional/', 
                                                   x_col='image_name',
                                                   y_col = None,
                                                   batch_size=32,
                                                   class_mode=None, 
                                                   target_size=(64, 64))

additionalStepSize = int(np.ceil(testAdditional.samples / testAdditional.batch_size))

Found 20522 validated image filenames.


In [58]:
pred_Add = model.predict(testAdditional, steps=additionalStepSize, verbose=1)



In [72]:
additionalNames = testAdditional.filenames 
        
res2 = pd.DataFrame(pred_Add)
res2 = res2.apply(lambda x: ' '.join(np.array(parameters)[x > 0.4]), axis=1)

ans2 = pd.DataFrame({'image_name': additionalNames, 'tags': res2})
ans2.head()

Unnamed: 0,image_name,tags
0,file_0.jpg,primary clear
1,file_1.jpg,primary road agriculture clear
2,file_10.jpg,primary clear
3,file_100.jpg,primary agriculture clear
4,file_1000.jpg,primary clear


In [73]:
df = pd.concat([ans1, ans2])
df = df.reset_index().drop('index', axis=1) 

print(df.shape)
df.head()

(61191, 2)


Unnamed: 0,image_name,tags
0,test_0.jpg,primary partly_cloudy agriculture
1,test_1.jpg,primary clear
2,test_2.jpg,primary partly_cloudy
3,test_3.jpg,primary clear
4,test_4.jpg,primary clear


In [74]:
df['image_name'] = df['image_name'].str.rstrip('.jpg')

In [75]:
df.to_csv('submission.csv', index=False)