In [1]:
import numpy as np 
import pandas as pd 
import os
import gc
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from sklearn.metrics import fbeta_score
from tqdm import tqdm
import cv2
from PIL import Image
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D, BatchNormalization
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras import optimizers

from sklearn.model_selection import KFold, train_test_split
from sklearn.metrics import fbeta_score
import time
%matplotlib inline

In [2]:
train_classes = pd.read_csv('../input/planets-dataset/planet/planet/train_classes.csv')
#sam_sub = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
train_classes.head()

In [3]:
len(train_classes)

In [4]:
train_classes.describe(include='all')

In [5]:
# Add .jpg
train_classes['image_name'] = train_classes['image_name'].apply(lambda x: "{}{}".format(x, '.jpg'))
train_classes.head()

In [6]:
#define a function to split the tags and store a set of the tags in a variable called labels.
#set is used to return the unique labels in the tags
labels = set()
def splitting_tags(tags):
    for tag in tags.split():
        labels.add(tag)

#we redefine the train_classes by creating a copy of it so as not to overwrite the existing one. 
#so a copy of the train classes is stored in the variable train_classes1, we convert labels which is a set to a list.
train_classes1 = train_classes.copy()
train_classes1['tags'].apply(splitting_tags)
labels = list(labels)
print(labels)

In [7]:
##One hot encoding is performed on the labels in train classes
for tag in labels:
    train_classes1[tag] = train_classes1['tags'].apply(lambda x: 1 if tag in x.split() else 0)

In [8]:
train_classes1

In [9]:
columns = train_classes1.columns.to_list()[2:]
columns

Build CNN

In [10]:
target = train_classes1[train_classes1.columns.to_list()[2:]].to_numpy()

In [11]:
from tqdm import tqdm
X_train, y_train = [], []

for img in train_classes1['image_name']:
  X_train.append(cv2.imread('../input/planets-dataset/planet/planet/train-jpg/{}'.format(img), cv2.IMREAD_GRAYSCALE))
  y_train.append(target)

In [12]:
X_train = np.array(X_train)

In [13]:
X_train.shape

In [14]:
y_train = y_train[0]

In [15]:
y_train[0]

In [16]:
X_train.shape

In [17]:
X_train = X_train.reshape(-1, 256, 256, 1)

In [18]:
X_train

In [19]:
import tensorflow as tf #define a function for fbeta scoring
def fbeta(y_true, y_pred, beta = 2, epsilon = 1e-4):
    
    beta_squared = beta**2
    
    y_true = tf.cast(y_true, tf.float32)
    y_pred = tf.cast(tf.greater(tf.cast(y_pred, tf.float32), tf.constant(0.5)), tf.float32)
    
    tp = tf.reduce_sum(y_true * y_pred, axis = 1)
    fp = tf.reduce_sum(y_pred, axis = 1) - tp
    fn = tf.reduce_sum(y_true, axis = 1) - tp
    
    precision = tp/(tp+fp+epsilon)
    recall = tp/(tp+fn+epsilon)
    
    fb = (1+beta_squared)*precision*recall / (beta_squared*precision+recall+epsilon)
    return fb

In [20]:
cnn_model = Sequential([
    Conv2D(32, kernel_size=(3, 3), activation='relu', padding='same', input_shape=(256, 256, 1)),
    Conv2D(32, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.1),

    Conv2D(64, kernel_size=(3, 3), activation='relu', padding='same'),
    Conv2D(64, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.1),

    Conv2D(128, kernel_size=(3, 3), activation='relu', padding='same'),
    Conv2D(128, kernel_size=(3, 3), activation='relu'),
    Conv2D(128, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.1),

    Conv2D(256, kernel_size=(3, 3), activation='relu', padding='same'),
    Conv2D(256, kernel_size=(3, 3), activation='relu'),
    Conv2D(256, kernel_size=(3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Dropout(0.1),

    Flatten(),

    Dense(1024, activation='relu'),
    Dense(17, activation='sigmoid') 
])

cnn_model.compile(loss='binary_crossentropy', optimizer='adam', metrics=[fbeta])
cnn_model.summary()             



In [21]:
X_train1, X_val, y_train1, y_val = train_test_split(X_train, y_train, test_size = 0.2, random_state = 1)

In [22]:
X_train1.shape, X_val.shape, y_train1.shape, y_val.shape

In [23]:
len(X_val), len(y_val)

In [None]:
cnn_model.fit(
    X_train1, y_train1,
    batch_size = 128,
    epochs = 5,
    verbose = 1,
    validation_data = (X_val, y_val))

In [None]:
X_train.shape

In [None]:
y_pred = cnn_model.predict(X_val, batch_size=128)

In [None]:
sample_sub = pd.read_csv('../input/planets-dataset/planet/planet/sample_submission.csv')
X_test = []

for img2 in sample_sub['image_name']:
  X_test.append(cv2.imread('../input/planets-dataset/planet/planet/test-jpg/{}.jpg'.format(img2), cv2.IMREAD_GRAYSCALE))


In [None]:
y_ = cnn_model.predict(X_test)

In [None]:
y_df = pd.DataFrame(y_, columns = columns)

In [None]:
y_df = y_df.apply(lambda x: ''.join(np.array(columns)[x>0.5]), axis = 1)

In [None]:
subDf = pd.DataFrame({'image_name': sample_sub['image_name'], 'tags': y_df})
subDf.head()

In [None]:
subDf.to_csv('submissionDf.csv', index = False)