In [22]:
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import numpy as np
import os
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.preprocessing import MultiLabelBinarizer

In [23]:
!ls -lha ../input
!ls -lha ../input/planets-dataset/planet

total 8.0K
drwxr-xr-x 4 root   root    4.0K Mar 21 22:07 .
drwxr-xr-x 5 root   root    4.0K Mar 21 22:07 ..
drwxr-xr-x 4 nobody nogroup    0 Mar  6 00:25 planets-dataset
drwxr-xr-x 2 nobody nogroup    0 Mar 21 20:38 test-mappimg
total 0
drwxr-xr-x 3 nobody nogroup 0 Mar  6 00:21 .
drwxr-xr-x 4 nobody nogroup 0 Mar  6 00:25 ..
drwxr-xr-x 4 nobody nogroup 0 Mar  6 00:25 planet


In [24]:
PLANET_KAGGLE_ROOT = os.path.abspath("../input/planets-dataset/planet/planet")
PLANET_KAGGLE_JPEG_DIR = os.path.join(PLANET_KAGGLE_ROOT, 'train-jpg')
PLANET_KAGGLE_TEST_JPEG_DIR=os.path.join(PLANET_KAGGLE_ROOT, 'test-jpg')
PLANET_KAGGLE_LABEL_CSV = os.path.join(PLANET_KAGGLE_ROOT, 'train_classes.csv')
PLANET_KAGGLE_submission_CSV = os.path.join(PLANET_KAGGLE_ROOT, 'sample_submission.csv')
PLANET_KAGGLE_TEST_MAPPING_CSV=os.path.abspath("../input/test-mapping/test_v2_file_mapping.csv")
assert os.path.exists(PLANET_KAGGLE_ROOT)
assert os.path.exists(PLANET_KAGGLE_JPEG_DIR)
assert os.path.exists(PLANET_KAGGLE_LABEL_CSV)

In [25]:
labels_df = pd.read_csv(PLANET_KAGGLE_LABEL_CSV)
labels_df.head()

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road


In [26]:
submission_df = pd.read_csv(PLANET_KAGGLE_submission_CSV)
submission_df.head()

Unnamed: 0,image_name,tags
0,test_0,primary clear agriculture road water
1,test_1,primary clear agriculture road water
2,test_2,primary clear agriculture road water
3,test_3,primary clear agriculture road water
4,test_4,primary clear agriculture road water


In [27]:

# Add the .jpg extension to the filenames in the image_name column
labels_df['image_name'] = labels_df['image_name'].apply(lambda x: x + '.jpg')
labels_df.head()

Unnamed: 0,image_name,tags
0,train_0.jpg,haze primary
1,train_1.jpg,agriculture clear primary water
2,train_2.jpg,clear primary
3,train_3.jpg,clear primary
4,train_4.jpg,agriculture clear habitation primary road


In [28]:
from sklearn.model_selection import train_test_split
train_set,val_set=train_test_split(labels_df, test_size=0.2, random_state=42)

In [29]:
test_mapping=pd.read_csv("/kaggle/input/test-mappimg/test_v2_file_mapping.csv")
test_mapping.head()

Unnamed: 0,old,new
0,file_4158.tif,file_18830.tif
1,file_1668.tif,file_19164.tif
2,file_2411.tif,file_15749.tif
3,file_16047.tif,file_7531.tif
4,file_1271.tif,file_18347.tif


In [30]:
# Create a list of all unique tags
tags = set()
for s in labels_df['tags'].str.split():
    tags.update(s)

In [31]:
# Convert the tags into binary labels
mlb = MultiLabelBinarizer()
train_labels = mlb.fit_transform(train_set['tags'].str.split())
validation_labels=mlb.fit_transform(val_set['tags'].str.split())
num_classes = len(tags)
print(num_classes)
train_labels
print(validation_labels)

17
[[0 0 0 ... 0 0 0]
 [0 0 0 ... 0 0 1]
 [0 0 0 ... 0 0 1]
 ...
 [0 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 0]
 [1 0 0 ... 0 0 1]]


In [32]:
# Set the image size and batch size
img_size = 256
batch_size = 32

# Create an ImageDataGenerator for data augmentation
train_datagen = ImageDataGenerator(rescale=1./255,
                                   rotation_range=30,
                                   zoom_range=0.2,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   fill_mode='nearest')

# Generate the train and validation datasets
train_generator = train_datagen.flow_from_dataframe(dataframe=labels_df,
                                                    directory=PLANET_KAGGLE_JPEG_DIR,
                                                    x_col='image_name',
                                                    y_col='tags',
                                                    batch_size=batch_size,
                                                    seed=42,
                                                    shuffle=True,
                                                    class_mode='categorical',
                                                    target_size=(img_size, img_size))

In [33]:
from PIL import Image

# Load an example image from your dataset
img = Image.open('/kaggle/input/planets-dataset/planet/planet/train-jpg/train_1.jpg')

# Get the size of the image
img_size = img.size
print(img_size)


(256, 256)


In [34]:
# Create an ImageDataGenerator object with data augmentation settings
datagen = ImageDataGenerator(
    rescale=1./225,
    dtype='float32',
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True
)

In [35]:
# Generate batches of augmented images from the train dataframe
train_generator = datagen.flow_from_dataframe(
    dataframe=train_set,
    directory=PLANET_KAGGLE_JPEG_DIR,
    x_col='image_name',
    y_col='tags',
    subset='training',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='categorical',  # for multiclass classification
    target_size=(256, 256)
)

Found 32383 validated image filenames belonging to 418 classes.


test_generator = test_datagen.flow_from_dataframe(
    dataframe=test_set,
    directory=PLANET_KAGGLE_JPEG_DIR,
    x_col='image_name',
    y_col='tags',
    subset='training',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='categorical',  # for multiclass classification
    target_size=(img_size, img_size)
)

In [36]:
# Generate the test dataset
val_datagen = ImageDataGenerator(rescale=1./255)
val_generator = val_datagen.flow_from_dataframe(dataframe=train_set,
                                                  directory=PLANET_KAGGLE_JPEG_DIR,
                                                  x_col='image_name',
                                                  y_col="tags",
                                                  batch_size=32,
                                                  seed=42,
                                                  shuffle=False,
                                                  class_mode="categorical",
                                                  target_size=(256, 256))

Found 32383 validated image filenames belonging to 418 classes.


In [37]:
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(256, 256, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(128, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(256, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Flatten(),
    layers.Dense(512, activation='relu'),
    layers.Dense(num_classes, activation='sigmoid')
])

In [38]:
# Compile the model with the appropriate loss and optimizer
model.compile(loss=tf.keras.losses.categorical_crossentropy,
              optimizer=Adam(0.001),
              metrics=['accuracy'])

In [39]:
import math

batch_size = 32
steps_per_epoch = math.ceil(len(train_set) * 0.8 / batch_size)
validation_steps = math.ceil(len(val_set) * 0.2 / batch_size)

model.fit(
    train_generator,
    epochs=10,
    validation_data=val_generator,
    steps_per_epoch=steps_per_epoch,
    validation_steps=validation_steps
)

Epoch 1/10


InvalidArgumentError: Graph execution error:

Detected at node 'categorical_crossentropy/softmax_cross_entropy_with_logits' defined at (most recent call last):
    File "/opt/conda/lib/python3.7/runpy.py", line 193, in _run_module_as_main
      "__main__", mod_spec)
    File "/opt/conda/lib/python3.7/runpy.py", line 85, in _run_code
      exec(code, run_globals)
    File "/opt/conda/lib/python3.7/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/conda/lib/python3.7/site-packages/traitlets/config/application.py", line 1041, in launch_instance
      app.start()
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/opt/conda/lib/python3.7/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/conda/lib/python3.7/asyncio/base_events.py", line 541, in run_forever
      self._run_once()
    File "/opt/conda/lib/python3.7/asyncio/base_events.py", line 1786, in _run_once
      handle._run()
    File "/opt/conda/lib/python3.7/asyncio/events.py", line 88, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 510, in dispatch_queue
      await self.process_one()
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 499, in process_one
      await dispatch(*args)
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 406, in dispatch_shell
      await result
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/kernelbase.py", line 730, in execute_request
      reply_content = await reply_content
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/ipkernel.py", line 387, in do_execute
      cell_id=cell_id,
    File "/opt/conda/lib/python3.7/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 2976, in run_cell
      raw_cell, store_history, silent, shell_futures, cell_id
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3030, in _run_cell
      return runner(coro)
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/async_helpers.py", line 78, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3258, in run_cell_async
      interactivity=interactivity, compiler=compiler, result=result)
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3473, in run_ast_nodes
      if (await self.run_code(code, result,  async_=asy)):
    File "/opt/conda/lib/python3.7/site-packages/IPython/core/interactiveshell.py", line 3553, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_22/1381721746.py", line 12, in <module>
      validation_steps=validation_steps
    File "/opt/conda/lib/python3.7/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1650, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1249, in train_function
      return step_function(self, iterator)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1233, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1222, in run_step
      outputs = model.train_step(data)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1024, in train_step
      loss = self.compute_loss(x, y, y_pred, sample_weight)
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/training.py", line 1083, in compute_loss
      y, y_pred, sample_weight, regularization_losses=self.losses
    File "/opt/conda/lib/python3.7/site-packages/keras/engine/compile_utils.py", line 265, in __call__
      loss_value = loss_obj(y_t, y_p, sample_weight=sw)
    File "/opt/conda/lib/python3.7/site-packages/keras/losses.py", line 152, in __call__
      losses = call_fn(y_true, y_pred)
    File "/opt/conda/lib/python3.7/site-packages/keras/losses.py", line 284, in call
      return ag_fn(y_true, y_pred, **self._fn_kwargs)
    File "/opt/conda/lib/python3.7/site-packages/keras/losses.py", line 2005, in categorical_crossentropy
      y_true, y_pred, from_logits=from_logits, axis=axis
    File "/opt/conda/lib/python3.7/site-packages/keras/backend.py", line 5539, in categorical_crossentropy
      labels=target, logits=output, axis=axis
Node: 'categorical_crossentropy/softmax_cross_entropy_with_logits'
logits and labels must be broadcastable: logits_size=[32,17] labels_size=[32,418]
	 [[{{node categorical_crossentropy/softmax_cross_entropy_with_logits}}]] [Op:__inference_train_function_4629]

In [None]:
from keras.models import Model
from tensorflow.keras.applications.vgg16 import VGG16

# load pre-trained VGG16 model
model = VGG16(weights='imagenet', include_top=False, input_shape=(256, 256, 3))

# freeze all layers in the model
for layer in model.layers:
    layer.trainable = False
    
# add custom layers on top of the pre-trained model
x = layers.Flatten()(model.output)
x = layers.Dense(256, activation='relu')(x)
x = layers.Dropout(0.5)(x)
x = layers.Dense(num_classes, activation='softmax')(x)

# create the final model
model = Model(model.input, x)


In [None]:
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.fit(train_generator, epochs=10, validation_data=val_generator)


In [40]:


conv_base = VGG16(weights='imagenet', include_top=False, input_shape=(150, 150, 3))

model = models.Sequential()
model.add(conv_base)
model.add(layers.GlobalAveragePooling2D())
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

conv_base.trainable = False

model.compile(optimizer='rmsprop', loss='binary_crossentropy', metrics=['acc'])

In [None]:
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(monitor='val_loss', patience=5, verbose=1, restore_best_weights=True)

model.fit(train_generator, epochs=10, validation_data=(val_generator), callbacks=[early_stop])

Epoch 1/10

In [None]:
train_generator = datagen.flow_from_dataframe(
    dataframe=train_set,
    directory=PLANET_KAGGLE_JPEG_DIR,
    x_col='image_name',
    y_col='tags',
    subset='training',
    batch_size=32,
    seed=42,
    shuffle=True,
    class_mode='categorical',  # for multiclass classification
    target_size=(256, 256)
)