In [None]:
#importing libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
from tensorflow import keras
from keras import optimizers
from tensorflow.keras.models import Sequential 
import cv2

In [None]:
path = "../input/planet-understanding-the-amazon-from-space/"
os.listdir(path)

In [None]:
train_data = pd.read_csv("/kaggle/input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv")
train_data.head()

In [None]:
train_data.isnull().sum()

In [None]:
train_data.describe()

In [None]:
train_data.info()

In [None]:
labels = set()
def split_tags(tags):
    #splits the tags and stores as a set
    [labels.add(tag) for tag in tags.split()]
train_df = train_data.copy()
train_df['tags'].apply(split_tags)
labels = list(labels)
print(labels)

In [None]:
#encode the labels
for tag in labels:
    train_df[tag] = train_df['tags'].apply(lambda x: 1 if tag in x.split() else 0)
train_df

In [None]:
from tensorflow.keras.layers import Dense, BatchNormalization, Conv2D, MaxPooling2D
from tensorflow.keras.layers import Dropout, Flatten
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, TensorBoard
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.metrics import fbeta_score

## DATA VISUALIZATION

In [None]:
# Histogram of label instances
train_df[labels].sum().sort_values().plot.bar()
plt.xlabel('Labels')
plt.ylabel('Count')
plt.title('Distribution of labels')

## MODEL DEVELOPMENT

In [None]:
def build_model():
    model = Sequential()
    model.add(BatchNormalization(input_shape=(128, 128, 3)))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(128, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(256, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.2))

    model.add(Flatten())
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(17, activation='sigmoid'))

    opt = Adam(lr=1e-2)
    
    model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['fbeta'])

    return model

In [None]:
model = build_model()

In [None]:
sample_submission = pd.read_csv('/kaggle/input/planet-understanding-the-amazon-from-space/sample_submission_v2.csv/sample_submission_v2.csv')
sample_submission['image_name'] = sample_submission['image_name'].apply(lambda x: '{}.jpg'.format(x))
sample_submission.head()

In [None]:
test_df = sample_submission.iloc[:40669]['image_name'].reset_index().drop('index', axis =1)
test_df.head()

In [None]:
test_df.to_csv('submission.csv',index=False)