### We can construct a mosaic of nearby tiles using this method: https://www.kaggle.com/c/planet-understanding-the-amazon-from-space/discussion/36738

# Import Necessary Libraries

In [1]:
import numpy as np
import os
import matplotlib
from matplotlib import pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow import keras as K
from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Dropout, Input, Dense, Activation, BatchNormalization, Flatten
from tensorflow.keras.models import Sequential, Model
import math

# Preprocess data
---

### Obtain Labels

In [2]:
train_data = pd.read_csv('data/train_v2.csv')

curr_count = 0
unique_labels = {}
multihot = {}
for line in train_data['tags'].values:
    for label in line.split():
        if label not in unique_labels:
            unique_labels[label] = curr_count
            curr_count += 1

mapping = {}

for k, v in unique_labels.items():
    mapping[k] = np.zeros(len(unique_labels))
    mapping[k][v] = 1

n_labels = len(mapping)
label2name = {v: k for k, v in unique_labels.items()}

print(label2name)

{0: 'haze', 1: 'primary', 2: 'agriculture', 3: 'clear', 4: 'water', 5: 'habitation', 6: 'road', 7: 'cultivation', 8: 'slash_burn', 9: 'cloudy', 10: 'partly_cloudy', 11: 'conventional_mine', 12: 'bare_ground', 13: 'artisinal_mine', 14: 'blooming', 15: 'selective_logging', 16: 'blow_down'}


### View Head of dataset

In [3]:
train_data.head(n = 10)

Unnamed: 0,image_name,tags
0,train_0,haze primary
1,train_1,agriculture clear primary water
2,train_2,clear primary
3,train_3,clear primary
4,train_4,agriculture clear habitation primary road
5,train_5,haze primary water
6,train_6,agriculture clear cultivation primary water
7,train_7,haze primary
8,train_8,agriculture clear cultivation primary
9,train_9,agriculture clear cultivation primary road


### Auxiliary Function for multi-hotting

In [4]:
def multihot(label):
    z = np.zeros(n_labels)
    tokens = label.split(' ')
    for k in range(len(tokens)):
        z += mapping[tokens[k]]

    return z

In [5]:
# first pass, construct a list of image strips

train_path = 'data/train-jpg/'

num_images = len(os.listdir(train_path))

X = []
y = []

num_jpgs = 1000
for iter, file in enumerate(os.listdir(train_path)[:num_jpgs]):
    X.append(train_path + file)
    y.append(train_data['tags'][iter])

    if iter % (0.05 * num_jpgs) == 0:
        print(f"{(100 * iter / num_jpgs):.2f} % complete")

# X_np = np.array(X) / 255

y_np = np.zeros(shape = (num_jpgs, n_labels))


for i, label in enumerate(y):
    y_np[i] = multihot(label)

y = y_np

0.00 % complete
5.00 % complete
10.00 % complete
15.00 % complete
20.00 % complete
25.00 % complete
30.00 % complete
35.00 % complete
40.00 % complete
45.00 % complete
50.00 % complete
55.00 % complete
60.00 % complete
65.00 % complete
70.00 % complete
75.00 % complete
80.00 % complete
85.00 % complete
90.00 % complete
95.00 % complete


### Split into a test and train set

In [6]:
validation_split = 0.2

indices = np.random.permutation(len(X))
train_length = math.floor(indices.shape[0] * (1 - validation_split))
train_indices, test_indices = indices[0:train_length], indices[train_length:]

X_train, X_test = [], []
y_train, y_test = [], []

for i in train_indices:
    X_train.append(X[i])
    y_train.append(y[i])

for j in test_indices:
    X_test.append(X[i])
    y_test.append(y[i])

In [7]:
def parse_function(filename, label):
    """Function that returns a tuple of normalized image array and labels array.
    Args:
        filename: string representing path to image
        label: 0/1 one-dimensional array of size N_LABELS
    """
    # Read an image from a file
    image_string = tf.io.read_file(filename)

    # Decode it into a dense vector
    image_decoded = tf.image.decode_jpeg(image_string, channels=3)

    # Resize it to fixed shape
    image_resized = tf.image.resize(image_decoded, [256, 256])

    print(f"{image_resized[0] = }")

    # Normalize it from [0, 255] to [0.0, 1.0]
    image_normalized = image_resized / 255.0
    return image_normalized, label

In [8]:
def create_dataset(filenames, labels, is_training=True):
    """Load and parse dataset.
    Args:
        filenames: list of image paths
        labels: numpy array of shape (BATCH_SIZE, N_LABELS)
        is_training: boolean to indicate training mode
    """
    
    # Create a first dataset of file paths and labels
    dataset = tf.data.Dataset.from_tensor_slices((filenames, labels))
    # Parse and preprocess observations in parallel
    dataset = dataset.map(parse_function)
    
    # if is_training == True:
    #     # This is a small dataset, only load it once, and keep it in memory.
    #     dataset = dataset.cache()
    #     # Shuffle the data each buffer size
    #     dataset = dataset.shuffle(buffer_size=SHUFFLE_BUFFER_SIZE)
        
    # Batch the data for multiple steps
    dataset = dataset.batch(256)
    # # Fetch batches in the background while the model is training.
    # dataset = dataset.prefetch(buffer_size=AUTOTUNE)
    
    return dataset

In [9]:
train_ds = create_dataset(X_train, y_train)
val_ds = create_dataset(X_test, y_test)

image_resized[0] = <tf.Tensor 'strided_slice:0' shape=(256, 3) dtype=float32>
image_resized[0] = <tf.Tensor 'strided_slice:0' shape=(256, 3) dtype=float32>


### Display a target image

In [10]:
def show_image(idx, X, y):
    img = X[idx]
    plt.imshow(img)
    plt.title(y[idx])
    plt.show()

### Define evaluation function

In [11]:
def macro_f1(y, y_hat, thresh=0.5):
    """Compute the macro F1-score on a batch of observations (average F1 across labels)
    
    Args:
        y (int32 Tensor): labels array of shape (BATCH_SIZE, N_LABELS)
        y_hat (float32 Tensor): probability matrix from forward propagation of shape (BATCH_SIZE, N_LABELS)
        thresh: probability value above which we predict positive
        
    Returns:
        macro_f1 (scalar Tensor): value of macro F1 for the batch
    """
    y_pred = tf.cast(tf.greater(y_hat, thresh), tf.float32)
    tp = tf.cast(tf.math.count_nonzero(y_pred * y, axis=0), tf.float32)
    fp = tf.cast(tf.math.count_nonzero(y_pred * (1 - y), axis=0), tf.float32)
    fn = tf.cast(tf.math.count_nonzero((1 - y_pred) * y, axis=0), tf.float32)
    f1 = 2*tp / (2*tp + fn + fp + 1e-16)
    macro_f1 = tf.reduce_mean(f1)
    return macro_f1

# Construct model
---

In [12]:
ds_model = Sequential()

ds_model.add(Conv2D(filters = 28,
    kernel_size = (3, 3),
    input_shape = (256, 256, 3),
    activation='relu',
    padding = 'Same'))
ds_model.add(MaxPooling2D(pool_size = (2, 2)))

ds_model.add(Conv2D(filters = 28,
    kernel_size = (3, 3),
    input_shape = (256, 256, 3),
    activation='relu'))
ds_model.add(MaxPooling2D(pool_size = (2, 2)))

ds_model.add(Conv2D(filters = 28,
    kernel_size = (3, 3),
    input_shape = (256, 256, 3),
    activation='relu'))
ds_model.add(MaxPooling2D(pool_size = (2, 2)))

ds_model.add(Flatten())

ds_model.add(Dense(200, activation = 'relu'))
ds_model.add(Dropout(0.2))

ds_model.add(Dense(100, activation = 'relu'))
ds_model.add(Dropout(0.1))

ds_model.add(Dense(n_labels, activation = 'sigmoid'))

### Compile the model

In [13]:
opt = K.optimizers.Adam(learning_rate=0.01)

ds_model.compile(optimizer=opt,
    loss = 'binary_crossentropy',
    metrics=[macro_f1])

# Train model
---

In [14]:
batchsize, epochs = 32, 30

ds_history = ds_model.fit(train_ds,
    epochs = epochs,
    batch_size = batchsize,
    validation_data = val_ds,
    verbose = 1)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


# View results
---

In [15]:
i = 0
batch1 = None
for batch in train_ds:
    batch1 = batch
    i += 1

    if i == 1:
        break


batch1[1]

<tf.Tensor: shape=(256, 17), dtype=float64, numpy=
array([[0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       ...,
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])>

In [128]:
y_hat_probs = ds_model.predict(batch1)

ValueError: in user code:

    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\engine\training.py", line 1621, in predict_function  *
        return step_function(self, iterator)
    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\engine\training.py", line 1611, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\engine\training.py", line 1604, in run_step  **
        outputs = model.predict_step(data)
    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\engine\training.py", line 1572, in predict_step
        return self(x, training=False)
    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\notda\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.9_qbz5n2kfra8p0\LocalCache\local-packages\Python39\site-packages\keras\engine\input_spec.py", line 199, in assert_input_compatibility
        raise ValueError(f'Layer "{layer_name}" expects {len(input_spec)} input(s),'

    ValueError: Layer "sequential_4" expects 1 input(s), but it received 2 input tensors. Inputs received: [<tf.Tensor 'IteratorGetNext:0' shape=(32, 256, 256, 3) dtype=float32>, <tf.Tensor 'IteratorGetNext:1' shape=(32, 17) dtype=float64>]
