Understanding Amazon Forest

In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.preprocessing import image
from keras.layers import BatchNormalization
from sklearn.model_selection import train_test_split
from tqdm import tqdm

In [None]:
#load csv file
df_train = pd.read_csv("C:/Users/D-IKE/Desktop/HamoyeCODES/StageD/archive/train_classes.csv")
df_train.head()


In [None]:
tag_set = set()
def add_tag(tags):
    for tag in tags.split():
        tag_set.add(tag)
        
df_train['tags'].apply(add_tag)
tag_list = list(tag_set)
print(tag_list)

In [None]:
for tag in tag_list:
  df_train[tag] =  df_train['tags'].apply(lambda x: 1 if tag in x.split() else 0)
df_train.head()

In [None]:
train_df = df_train.iloc[:6000]
train_df.shape

In [None]:
image_directory = '../input/train-jpg/train-jpg/'

In [None]:
from keras.preprocessing import image
from tqdm import tqdm
size = 200

x_dataset = []

for i in tqdm(range(train_df.shape[0])):
    img = image.load_img(image_directory + train_df['image_name'][i]+'.jpg', target_size=(size,size,3))
    img = image.img_to_array(img)
    img = img/255.0
    x_dataset.append(img)

x_train = np.array(x_dataset)

In [None]:
del df_train

import gc
gc.collect()

In [None]:
import matplotlib.pyplot as plt
x_train = np.array(x_dataset)
x_train.shape
x = x_train
plt.imshow(x_train[24])

In [None]:
y = np.array(train_df.drop(['image_name', 'tags'], axis=1))

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=20, test_size=0.2)

In [None]:
def fbeta(ytrue, ypred, beta = 2, epsilon = 1e-4):
    beta_sqd = beta**2
    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
    
    TP = tf.reduce_sum(ytrue * ypred, axis = 1)
    FP = tf.reduce_sum(ypred, axis = 1) - TP
    FN = tf.reduce_sum(ytrue, axis = 1) - TP
    
    precision = TP / (TP + FP + epsilon)
    recall = TP / (TP + FN + epsilon)
    
    Fbeta = (1 + beta_sqd) * precision * recall / (beta_sqd * precision + recall + epsilon)
    return Fbeta

In [None]:
def multi_label_acc(ytrue, ypred, epsilon = 1e-4):
    ytrue = tf.cast(ytrue, tf.float32)
    ypred = tf.cast(tf.greater(tf.cast(ypred, tf.float32), tf.constant(0.5)), tf.float32)
    
    true_pos = tf.reduce_sum(ytrue * ypred, axis = 1)
    false_pos = tf.reduce_sum(ypred, axis = 1) - true_pos
    false_neg = tf.reduce_sum(ytrue, axis = 1) - true_pos
    
    ytrue = tf.cast(ytrue, tf.bool)
    ypred = tf.cast(ypred, tf.bool)
    
    true_neg = tf.reduce_sum(tf.cast(tf.logical_not(ytrue), tf.float32) * tf.cast(tf.logical_not(ypred), tf.float32), \axis = 1)
    
    mla = (true_pos + true_neg) / (true_pos + true_neg + false_pos + false_neg + epsilon)
    return mla

In [None]:
def build_model():
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=(5,5), activation='relu', input_shape=(200,200,3)))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Conv2D(filters=64, kernel_size=(5,5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Conv2D(filters=64, kernel_size=(5,5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(BatchNormalization())
    model.add(Dropout(0.2))
    
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(17, activation='sigmoid'))
    
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics = [multi_label_acc, fbeta])
    
    return model

In [None]:
model = build_model()

model.summary()

In [None]:
history = model.fit(x_train, y_train, epochs=15, validation_data=(x_test, y_test), batch_size=50)

In [None]:
test_loss, mla, fbeta = model.evaluate(x_test, y_test)

print(f'Test loss: {test_loss}')
print(f'Fbeta score: {fbeta}')
print(f'Multilabel accuracy: {mla}')