In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import os
import gc

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import datasets, layers, models
from keras.layers import Dense, Dropout, Conv2D, MaxPooling2D, Flatten, GlobalMaxPooling2D, BatchNormalization
from keras.utils import to_categorical
from keras.regularizers import l2
from tensorflow.keras.models import Sequential 

from tensorflow.keras.preprocessing.image import ImageDataGenerator

import keras as k
from keras import backend as K
from keras.models import Sequential

import cv2
from tqdm import tqdm
from collections import Counter

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.metrics import fbeta_score

import plotly.express as px

path = '../input/planets-dataset/'
os.listdir(path)

In [None]:
#Loading the image datasets
train_path = '../kaggle/input/planets-dataset/planet/planet/train_classes.csv'
test_path = '../kaggle/input/planets-dataset/planet/planet/sample_submission.csv'
train_images = '../input/planets-dataset/planet/planet/train-jpg'
test_images = ',,/input/planets-dataset/planet/planet/test-jpg'


In [None]:
#train_path = '/kaggle/input/planet-understanding-the-amazon-from-space/train_v2.csv/train_v2.csv'
#test_path = '/kaggle/input/planet-understanding-the-amazon-from-space/test_v2_file_mapping.csv/test_v2_file_mapping.csv'
#submission_path = '/kaggle/input/planet-understanding-the-amazon-from-space/sample_submission_v2.csv/sample_submission_v2.csv'

In [None]:
train_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/train_classes.csv')
test_df = pd.read_csv('/kaggle/input/planets-dataset/planet/planet/sample_submission.csv')



In [None]:
train_df.head()

In [None]:
test_df.head()

In [None]:
print (train_df.shape)
print (test_df.shape)


In [None]:
label_list = []
for tag_split in train_df.tags.to_numpy():
    labels = tag_split.split(' ')
    for label in labels:
        if label not in label_list:
            label_list.append(label)

In [None]:
print(len(label_list))

In [None]:
labels_dict = dict(zip(range(0,17), label_list))
labels_dict

In [None]:
#  Applying one-hot encoding
for l in label_list:
    train_df[l] = train_df.tags.apply(lambda x: 1 if l in x.split() else 0)
    
train_df.head()

In [None]:
train_df[label_list].sum().sort_values(ascending=False)

In [None]:
gc.collect()

In [None]:
train_df_columns = list(train_df.columns[2:])
train_df_columns

In [None]:
# onehot encoding the image name
train_df['image_name'] = train_df['image_name'].apply(lambda x: f'{x}.jpg')
train_df.head()

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split = 0.2,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = train_df,
    directory = train_images,
    x_col = 'image_name',
    y_col = train_df_columns,
    subset = 'training',
    batch_size = 64,
    seed=42,
    shuffle=True,
    class_mode='raw',
    target_size = (150,150))

#validation_datagen = ImageDataGenerator(rescale=1./255)

#validation_generator = validation_datagen.flow_from_dataframe(dataframe = '/kaggle/input/planet-understanding-the-amazon-from-space/test_v2_file_mapping.csv',
    #target_size=(150, 150),
    #batch_size=32,
    #class_mode='categorical'
#)
                                                              
#generating validation data which is expected to be 20% of the train dataset since validation split is 0.2
val_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory =train_images, 
    x_col='image_name',
    y_col= train_df_columns,
    subset='validation', 
    batch_size=32,
    seed=42, 
    shuffle=True, 
    class_mode='raw',
    target_size=(150,150))
                                                              
                                                            

In [None]:
#setting up step size for training and validation image data
step_train_size = int(np.ceil(train_generator.samples / train_generator.batch_size))
step_val_size = int(np.ceil(val_generator.samples / val_generator.batch_size))
print(step_train_size , step_val_size)

In [None]:
# Define the CNN model
#model = Sequential([
    #layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    #layers.MaxPooling2D((2, 2)),
    #layers.Conv2D(64, (3, 3), activation='relu'),
    #layers.MaxPooling2D((2, 2)),
    #layers.Conv2D(128, (3, 3), activation='relu'),
    #layers.MaxPooling2D((2, 2)),
    #layers.Flatten(),
    #layers.Dense(512, activation='relu'),
    #layers.Dropout(0.5),
    #layers.Dense (17, activation='sigmoid')
    
#])

# Compile the model
#model.compile(optimizer='adam',
              #loss='binary_crossentropy',
              #metrics=['accuracy'])

# Model architecture
#model = Sequential([
    #Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    #MaxPooling2D(2, 2),
    #Conv2D(64, (3, 3), activation='relu'),
    #MaxPooling2D(2, 2),
    #Conv2D(128, (3, 3), activation='relu'),
    #MaxPooling2D(2, 2),
    #Flatten(),
    #Dense(512, activation='relu'),
    #Dropout(0.5),
    #Dense(num_classes, activation='sigmoid')
#])

#model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
#history = model.fit(train_generator, epochs=50, validation_data=val_generator)

# Define the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(32, 32, 3)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='sigmoid')
])

# Compile the model
model.compile(optimizer='adam',
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy','FBetaScore','CategoricalAccuracy'])

In [None]:
history = model.fit(
    x=train_generator,
    validation_data = val_generator,
    steps_per_epochs=step_train_size,
    epochs=10,
    verbose=1
)

In [None]:
# Model architecture
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dropout(0.5),
    Dense(17, activation='sigmoid')
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
history = model.fit(train_generator, epochs=10, validation_data=val_generator)