In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [2]:
import matplotlib.pyplot as plt
import cv2
import tensorflow as tf
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm import tqdm
import imageio
import gc

In [3]:
sample_sub = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
train_classes = pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')

sample_sub.head()

In [4]:
train_classes.head()

In [5]:
train_classes['split_tags'] = train_classes.tags.map(lambda row: row.split(' '))

train_classes.head(3)

In [6]:
multibzr = MultiLabelBinarizer()
multibzr.fit(train_classes['split_tags'])

y_train = multibzr.transform(train_classes['split_tags'])
y_train[:3]

In [7]:
train_len = len(os.listdir('../input/planets-dataset/planet/planet/train-jpg'))
test_len = len(os.listdir('../input/planets-dataset/planet/planet/test-jpg'))
test_add_len = len(os.listdir('../input/planets-dataset/test-jpg-additional/test-jpg-additional'))

                
print('Train files %d' % train_len)
print('Test files %d' % test_len)
print('Test_add files %d' % test_add_len)

In [8]:
# gc.collect()
train_images_10k = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/train-jpg/train_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000), miniters=1000)])

# gc.collect()

# im = imageio.imread(f'../input/planets-dataset/planet/planet/train-jpg/train_{1}.jpg')

# plt.imshow(im)
# plt.show()

In [9]:
print(train_images_10k.shape)

In [10]:
y_train.shape

In [11]:
# gc.collect()
from tensorflow_addons.metrics import FBetaScore

fbeta = FBetaScore(num_classes=y_train.shape[1],
                   average='weighted',
                   beta=2.0,
                   threshold=0.2,
                   name='fbeta')

def get_model():

    inputs = tf.keras.layers.Input(shape=(224, 224, 3))
    
    preprocess = tf.keras.applications.resnet50.preprocess_input
    base_model = tf.keras.applications.resnet.ResNet50(input_shape=(224, 224, 3),
                                                               include_top=False,
                                                               weights='imagenet')
    # Let's take a look to see how many layers are in the base model
    print("Number of layers in the base model: ", len(base_model.layers))
    
#     Fine-tune from this layer onwards
    fine_tune_at = 165

    # Freeze all the layers before the `fine_tune_at` layer
    for layer in base_model.layers[:fine_tune_at]:
        layer.trainable =  False

    preprocessed = preprocess(inputs)
    base_model = base_model(preprocessed)
    global_pool = tf.keras.layers.GlobalAveragePooling2D()(base_model)
    dense_1 = tf.keras.layers.Dense(512, activation='relu')(global_pool)
    batch_1 = tf.keras.layers.BatchNormalization()(dense_1)
    dropt_1 = tf.keras.layers.Dropout(0.6)(batch_1)
    dense_2 = tf.keras.layers.Dense(128, activation='relu')(dropt_1)
    batch_2 = tf.keras.layers.BatchNormalization()(dense_2)
    dropt_2 = tf.keras.layers.Dropout(0.6)(batch_2)
    
#     conv1 = tf.keras.layers.Conv2D(16, (3, 3), activation = 'relu', padding='same')(inputs)
#     max_pool_1 = tf.keras.layers.MaxPooling2D(2, 2)(conv1)
#     conv2 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(max_pool_1)
#     max_pool_2 = tf.keras.layers.MaxPooling2D(2, 2)(conv2)
#     flatd = tf.keras.layers.Flatten()(max_pool_2)
#     dense_1 = tf.keras.layers.Dense(128, activation='relu')(flatd)
#     dense_2 = tf.keras.layers.Dense(64, activation='relu')(dense_1)
#     dropt = tf.keras.layers.Dropout(0.2)(dense_2)
    final = tf.keras.layers.Dense(y_train.shape[1], activation='sigmoid')(dropt_2)

    model = tf.keras.models.Model(inputs =[inputs], outputs=[final])

    model.compile(loss='binary_crossentropy',
                  optimizer = 'adam',
                  metrics=[fbeta, tf.keras.metrics.AUC()])

    return model

model = get_model()
model.summary()

# gc.collect()

In [12]:
EPOCHS = 10

history = model.fit(train_images_10k, y_train[:10000], epochs=EPOCHS)

In [13]:
del train_images_10k
gc.collect()

In [14]:
train_images_10k2 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/train-jpg/train_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000, 20000), miniters=1000)])

history = model.fit(train_images_10k2, y_train[10000: 20000], epochs=EPOCHS)

del train_images_10k2
gc.collect()

In [15]:
train_images_10k3 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/train-jpg/train_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(20000, 30000), miniters=1000)])

history = model.fit(train_images_10k3, y_train[20000: 30000], epochs=EPOCHS)

del train_images_10k3
gc.collect()

In [16]:
train_images_10k4 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/train-jpg/train_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(30000, train_len), miniters=1000)])

history = model.fit(train_images_10k4, y_train[30000:], epochs=EPOCHS, validation_split=0.1)

del train_images_10k4
gc.collect()

In [17]:
# test_predictions = model.predict(test_ds)
# additional_predictions = model.predict(additional_ds)

threshold = { 'agriculture':0.164,
          'artisinal_mine':0.114,
          'bare_ground':0.138,
          'blooming':0.168,
          'blow_down':0.2,
          'clear':0.13,
          'cloudy':0.076,   
          'conventional_mine':0.1,
          'cultivation':0.204,
          'habitation':0.17,
          'haze':0.204,
          'partly_cloudy':0.112,
          'primary':0.204,
          'road':0.156,
          'selective_logging':0.154,
          'slash_burn':0.38,
          'water':0.182
            }
            
thresholds = np.fromiter(threshold.values(), dtype=float)

In [18]:
labels = list(multibzr.classes_)
labels

In [19]:
def get_tag(prediction):
    return ' '.join(prediction)

# final_test_predictions = list(map(get_tag, test_predictions))
# final_additional_predictions = list(map(get_tag, additional_predictions))

In [20]:
test_images_10k = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/test-jpg/test_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000), miniters=1000)])

pred_10k = model.predict(test_images_10k)

del test_images_10k
gc.collect()

In [21]:
test_images_10k2 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/test-jpg/test_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000, 20000), miniters=1000)])

pred_10k2 = model.predict(test_images_10k2)

del test_images_10k2
gc.collect()

In [22]:
test_images_10k3 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/test-jpg/test_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(20000, 30000), miniters=1000)])

pred_10k3 = model.predict(test_images_10k3)

del test_images_10k3
gc.collect()

In [23]:
test_images_10k4 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/test-jpg/test_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(30000, test_len), miniters=1000)])

pred_10k4 = model.predict(test_images_10k4)

del test_images_10k4
gc.collect()

In [24]:
test_images_10kadd = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/test-jpg-additional/test-jpg-additional/file_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000), miniters=1000)])

pred_10kadd = model.predict(test_images_10kadd)

del test_images_10kadd
gc.collect()

In [25]:
test_images_10kadd1 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/test-jpg-additional/test-jpg-additional/file_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000, test_add_len), miniters=1000)])

pred_10kadd1 = model.predict(test_images_10kadd1)

del test_images_10kadd1
gc.collect()

In [26]:
preds = np.concatenate((pred_10k, pred_10k2, pred_10k3, pred_10k4, pred_10kadd, pred_10kadd1), axis=0)

print(preds.shape)

In [27]:
# final_test_predictions = list(map(get_tag, preds))
predos= multibzr.inverse_transform((preds[:10] >= thresholds).astype('int'))
# predo = [('cloudy') for labs in predo if 'cloudy' in labs]
predos = [i if 'cloudy' not in i else ('cloudy',) for i in predos]
seros = pd.Series(predos).apply(lambda x: ' '.join(x))
# my_list = ('ir', 'om', 'bi',)
# ' '.join(my_list)
seros

In [28]:
final_predictions = multibzr.inverse_transform((preds >= thresholds).astype('int'))
final_predictions = [i if 'cloudy' not in i else ('cloudy',) for i in final_predictions]

sample_sub['tags'] = final_predictions

sample_sub['tags'] = sample_sub['tags'].apply(lambda x: ' '.join(x))

In [29]:
sample_sub.to_csv('/kaggle/working/tolu_sub.csv', index=False)

sample_sub.head(10)

In [30]:
sample_sub.tail(10)

In [35]:
del test_images_10kadd1
gc.collect()

test_images_10kadd1 = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/test-jpg-additional/test-jpg-additional/file_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000, test_add_len), miniters=1000)])

test_images_10k = np.array([cv2.resize(imageio.imread(f'../input/planets-dataset/planet/planet/test-jpg/test_{i}.jpg'),
                     (224, 224)) for i in tqdm(range(10000), miniters=1000)])

In [36]:
model.evaluate(test_images_10k, preds[:10000])

In [37]:
model.evaluate(test_images_10kadd1, preds[-10522:])