In [35]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    #for filename in filenames:
        print(os.path.join(dirname))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [36]:
import numpy as np
import pandas as pd
from tensorflow import keras
from keras.preprocessing.image import load_img
from keras.preprocessing.image import img_to_array
import keras.models as models
import keras.layers as layers
from keras import backend

In [37]:
train_df=pd.read_csv('/kaggle/input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv')
train_df.head()

In [38]:
print(train_df.isna().sum())

In [39]:
test_df=pd.read_csv('/kaggle/input/planet-understanding-the-amazon-from-space/test_v2_file_mapping.csv/test_v2_file_mapping.csv')

In [40]:
def tag_mapping(data):
    labels=set()
    for i in range(len(data)):
        tags=data['tags'][i].split(' ')
        labels.update(tags)
    labels=list(labels)
    labels.sort()
    labels_dict={labels[i]:i for i in range(len(labels))}
    inv_labels={i:labels[i] for i in range(len(labels))}
    return labels_dict,inv_labels

In [41]:
label_map,invmap=tag_mapping(train_df)

In [42]:
def file_mapping(data):
    mapping={}
    for i in range(len(data)):
        name,tags=train_df['image_name'][i],train_df['tags'][i]
        mapping[name]=tags.split(' ')
    return mapping

In [43]:
def one_hot_encode(tags, mapping):
    encoding = np.zeros(len(mapping), dtype='uint8')
    for tag in tags:
        encoding[mapping[tag]] = 1
    return encoding

In [44]:
def load_dataset(path,file_mapping,tag_mapping):
    photos,targets=list(),list()
    for filename in os.listdir(path):
        photo=load_img(path+filename,target_size=(32,32))
        photo=img_to_array(photo,dtype='uint8')
        tags=file_mapping[filename[:-4]]
        target=one_hot_encode(tags,tag_mapping)
        photos.append(photo)
        targets.append(target)
    x_train=np.asarray(photos,dtype='uint8')
    y_train=np.asarray(targets,dtype='uint8')
    return x_train,y_train

In [45]:
tags_mapping,_=tag_mapping(train_df)
files_mapping=file_mapping(train_df)
path='/kaggle/input/planets-dataset/planet/planet/train-jpg/'
x_train,y_train=load_dataset(path,files_mapping,tags_mapping)

In [46]:
print(x_train.shape)
print(y_train.shape)

In [47]:
split = 35000
x_train, x_valid, y_train, y_valid = x_train[:split], x_train[split:], y_train[:split], y_train[split:]

In [48]:
print(x_train.shape)
print(y_train.shape)
print(x_valid.shape)
print(y_valid.shape)

In [49]:
import keras as k
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D

In [52]:
model = Sequential()
model.add(Conv2D(32, kernel_size=(3, 3),
                 activation='relu',
                 input_shape=(32, 32, 3)))

model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(17, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
model.fit(x_train, y_train,
          batch_size=128,
          epochs=3,
          verbose=1,
          validation_data=(x_valid, y_valid))

In [53]:
from sklearn.metrics import fbeta_score
p_valid= model.predict(x_valid, batch_size=128)
print(y_valid)
print(p_valid)
print('fbeta_score: {}'.format(fbeta_score(y_valid, np.array(p_valid) > 0.2, beta=2, average='samples')))

In [54]:
submission_df=pd.read_csv('/kaggle/input/planet-understanding-the-amazon-from-space/sample_submission_v2.csv/sample_submission_v2.csv')

In [56]:
test_path_1='/kaggle/input/planets-dataset/planet/planet/test-jpg/'
test_path_2='/kaggle/input/planets-dataset/test-jpg-additional/test-jpg-additional/'

In [57]:
photo_test=[]
for filename in submission_df['image_name']:
    if filename[:1]=='t':
        img=load_img(test_path_1+filename+'.jpg',target_size=(32,32))
    elif filename[:1]=='f':
        img=load_img(test_path_2+filename+'.jpg',target_size=(32,32))
    ph=img_to_array(img,dtype='uint8')
    photo_test.append(ph)

In [59]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
test_x=np.asarray(photo_test,dtype='uint8')

image_gen_test=ImageDataGenerator(rescale=1/255.0)
test_data_gen=image_gen_test.flow(test_x,shuffle=False,batch_size=64)

In [60]:
result=model.predict(test_data_gen)

new_df=pd.DataFrame(result,columns=tags_mapping.keys())

In [61]:
tags=new_df.columns
pred_tags=new_df.apply(lambda x: ' '.join(tags[x>0.5]),axis=1)

pred_tag=pd.DataFrame(pred_tags,columns=['tags'])

In [63]:
submission_df['tags']=pred_tag['tags']
submission_df.to_csv('submission.csv',index=False)

submission_df.head()