##Loading the Data

In [23]:
from google.colab import drive
drive.mount('/gdrive', force_remount=True)

Mounted at /gdrive


In [24]:
# Your file will probably be called this, if you used the regular Kaggle download.
!unzip -q /gdrive/MyDrive/comp8220data/living-thing-classification-coarse.zip     # CHANGE DIRECTORY

replace info.json? [y]es, [n]o, [A]ll, [N]one, [r]ename: A
y


In [25]:
import json
import numpy as np
import pandas as pd
import tensorflow as tf
from pathlib import Path
import matplotlib.pyplot as plt

In [26]:
batch_size = 64
img_size = 112
fine_grain = False
num_classes = 50 if fine_grain else 8

root = Path('')

In [27]:
def load_target_names(path='info.json'):
    with open(path) as f:
        info = json.load(f)
    return info['fine'], info['coarse']

def load_image(file_path, image_size=112):
    img = tf.io.read_file(file_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize(img, [image_size, image_size])
    return img

def create_dataset_tf(split, fine_grained, batch_size=32, image_size=112):
  df = pd.read_csv(root / f"{split}.csv")
  file_paths = df.filename.map(lambda fn: str(root / split / fn))
  labels = df.fine if fine_grained else df.coarse

  dataset = tf.data.Dataset.from_tensor_slices((file_paths, labels))
  dataset = dataset.map(lambda x, y: (load_image(x, image_size=image_size), y))
  dataset = dataset.batch(batch_size)
  dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
  return dataset


def create_dataset_sklearn(split, fine_grained, image_size=48, percent=0.1):
  dataset = create_dataset_tf(split=split, fine_grained=fine_grained, batch_size=1000, image_size=image_size)
  dataset = dataset.map(lambda x, y: (tf.image.rgb_to_grayscale(x), y))
  X = None
  Y = None
  n_batches = int(len(dataset) * percent)
  for x,y in dataset.take(n_batches):
    x, y = x.numpy(), y.numpy()
    x = x.reshape(x.shape[0], -1)
    X = x if X is None else np.vstack([X, x])
    Y = y if Y is None else np.hstack([Y, y])

  return X, Y


In [28]:
fine_names, coarse_names = load_target_names()
print(coarse_names)

['Aves', 'Reptilia', 'Mammalia', 'Arachnida', 'Magnoliopsida', 'Insecta', 'Liliopsida', 'Pinopsida']


## Creating Tensorflow Data

In [29]:
train_ds = create_dataset_tf('train', fine_grained=False, batch_size=batch_size)
val_ds = create_dataset_tf('val', fine_grained=False, batch_size=batch_size)

In [30]:
train_ds = train_ds.map(lambda x,y: (tf.keras.applications.xception.preprocess_input(x), y))
val_ds = val_ds.map(lambda x,y: (tf.keras.applications.xception.preprocess_input(x), y))

# Barebone Conv2d

In [None]:
from functools import partial
DefaultConv2D = partial(tf.keras.layers.Conv2D, kernel_size=3, padding="same", activation="relu", kernel_initializer="he_normal")
model_barebone = tf.keras.Sequential([
    DefaultConv2D(filters = 20, kernel_size=6, input_shape=[112,112,3]),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters = 40),
    DefaultConv2D(filters = 40),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters = 200),
    DefaultConv2D(filters = 200),
    tf.keras.layers.MaxPool2D(),
    DefaultConv2D(filters = 400),
    DefaultConv2D(filters = 400),
    tf.keras.layers.MaxPool2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=200, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=70, activation="relu", kernel_initializer="he_normal"),
    tf.keras.layers.Dropout(0.2),
    tf.keras.layers.Dense(units=50, activation="softmax")
])

model_barebone.summary()

Model: "sequential_11"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_8 (Conv2D)           (None, 112, 112, 40)      4360      
                                                                 
 max_pooling2d_5 (MaxPooling  (None, 56, 56, 40)       0         
 2D)                                                             
                                                                 
 conv2d_9 (Conv2D)           (None, 56, 56, 100)       36100     
                                                                 
 conv2d_10 (Conv2D)          (None, 56, 56, 100)       90100     
                                                                 
 max_pooling2d_6 (MaxPooling  (None, 28, 28, 100)      0         
 2D)                                                             
                                                                 
 conv2d_11 (Conv2D)          (None, 28, 28, 200)     

In [None]:
CustomAdam = partial(tf.keras.optimizers.Adam, learning_rate=0.0001, epsilon=1e-3)
model.compile(optimizer=CustomAdam(), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [None]:
history = model.fit(train_ds,validation_data=val_ds, epochs = 6, verbose=1)

Epoch 1/6
Epoch 2/6
Epoch 3/6
Epoch 4/6
Epoch 5/6
Epoch 6/6


In [None]:
test_ds = create_dataset_tf('test', fine_grained=True, batch_size=batch_size)

In [None]:
y_pred_fine = model.predict(test_ds)



In [None]:
y_pred_classes = np.argmax(y_pred_fine, axis=1)

In [None]:
N = 50000     # num elements in test.csv
test_csv = pd.read_csv(root / f"test.csv")
test_csv.loc[:,"fine"] = y_pred_classes
test_csv = test_csv.drop(columns=['coarse'])
pred_file = "/gdrive/MyDrive/comp8220data/47506067-deep-fine.csv"  # CHANGE DIRECTORY, FILENAME as appropriate
test_csv.to_csv(pred_file)

1. Error in Softmax layer need to use sparse_categorical_crossentropy

In [None]:
total_elements = tf.data.experimental.cardinality(val_ds).numpy()
print(total_elements)
num_elements_to_take = int(0.5 * total_elements)
#train_ds = train_ds.take(num_elements_to_take)
val_ds = val_ds.take(num_elements_to_take)

782


In [None]:
total_elements = tf.data.experimental.cardinality(train_ds).numpy()
print(total_elements)

1563


# First Try:
1. resnet with
2. Try using learning rate scheduler, epsilon scheduler

In [31]:


# Load the ResNet50 model without the top (fully connected) layers
base_model = tf.keras.applications.xception.Xception(weights='imagenet', include_top=False)
data_augmentation = tf.keras.Sequential(
    [tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
     tf.keras.layers.RandomFlip(mode="vertical", seed=42),
     tf.keras.layers.RandomZoom(0.2,0.2),
     tf.keras.layers.RandomRotation(factor=(-0.2,0.2), seed=42)]
)
# Freeze the layers in the base model
for layer in base_model.layers:
    layer.trainable = False

model = tf.keras.Sequential([
    data_augmentation,
    base_model,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(units=256, activation="relu"),
    tf.keras.layers.Dropout(0.4),
    tf.keras.layers.Dense(units=128, activation="relu", kernel_regularizer=tf.keras.regularizers.l2(0.01)),
    tf.keras.layers.BatchNormalization(),
    tf.keras.layers.Dense(units=8, activation="softmax")
])
model.build(input_shape=(None, 112, 112, 3))

# Print the summary of the model
model.summary()

Model: "sequential_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 sequential_4 (Sequential)   (None, 112, 112, 3)       0         
                                                                 
 xception (Functional)       (None, None, None, 2048)  20861480  
                                                                 
 global_average_pooling2d_2   (None, 2048)             0         
 (GlobalAveragePooling2D)                                        
                                                                 
 flatten_2 (Flatten)         (None, 2048)              0         
                                                                 
 dense_6 (Dense)             (None, 256)               524544    
                                                                 
 dropout_2 (Dropout)         (None, 256)               0         
                                                      

In [10]:
from functools import partial
CustomAdam = partial(tf.keras.optimizers.Adam, learning_rate=0.0001, epsilon=1e-3)
model.compile(optimizer=CustomAdam(), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [11]:
checkpoint_path = "/gdrive/MyDrive/comp8220data/XceptionPreprocessInitialize"
#checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback_initial = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [12]:
history = model.fit(train_ds,validation_data=val_ds, epochs = 3,callbacks=[cp_callback_initial], verbose=1)

Epoch 1/3
Epoch 1: saving model to /gdrive/MyDrive/comp8220data/XceptionPreprocessInitialize
Epoch 2/3
Epoch 2: saving model to /gdrive/MyDrive/comp8220data/XceptionPreprocessInitialize
Epoch 3/3
Epoch 3: saving model to /gdrive/MyDrive/comp8220data/XceptionPreprocessInitialize


In [33]:
for layer in base_model.layers[8:]:
    layer.trainable = True

In [34]:
from functools import partial
CustomAdam = partial(tf.keras.optimizers.Adam, learning_rate=0.00003, epsilon=1e-3)
model.compile(optimizer=CustomAdam(), loss="sparse_categorical_crossentropy", metrics=["accuracy"])

In [21]:
checkpoint_path = "/gdrive/MyDrive/comp8220data/XceptionPreprocessTest10Layer"
#checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
weight_for_0 = (2 / 20069) * (12500)
weight_for_1 = (1 / 1891) * (12500)
weight_for_2 = (1 / 1957) * (12500)
weight_for_3 = (1 / 2015) * (12500)
weight_for_4 = (2 / 36062) * (12500)
weight_for_5 = (2 / 26082) * (12500)
weight_for_6 = (1 / 9960) * (12500)
weight_for_7 = (1 / 1961) * (12500)
class_weight = {0: weight_for_0, 1: weight_for_1, 2: weight_for_2, 3: weight_for_3,4: weight_for_4, 5: weight_for_5,6: weight_for_6, 7: weight_for_7}


In [32]:
model.load_weights('/gdrive/MyDrive/comp8220data/XceptionPreprocessInitialize')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f9600c8ecb0>

In [None]:
history = model.fit(train_ds,validation_data=val_ds, epochs = 40, callbacks=[cp_callback], verbose=1)

Epoch 1/40

In [None]:
history = model.fit(train_ds,validation_data=val_ds, epochs = 40, callbacks=[cp_callback], verbose=1)

Epoch 1/40
Epoch 1: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 2/40
Epoch 2: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 3/40
Epoch 3: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 4/40
Epoch 4: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 5/40
Epoch 5: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 6/40
Epoch 6: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 7/40
Epoch 7: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 8/40
Epoch 8: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 9/40
Epoch 9: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 10/40
Epoch 10: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 11/40
Epoch 11: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 12/40
Epoch 12: saving model to /gdrive/MyDrive/comp8220data/AftermathTest
Epoch 13/40
Epoch 13: saving model to /gdrive/

In [None]:
model.load_weights('/gdrive/MyDrive/comp8220data')

# Add a new dense layer with 50 units for 50 classes
model.add(tf.keras.layers.Dense(units=200, activation="relu"))
model.add(tf.keras.layers.Dense(units=50, activation="softmax"))

In [None]:
model.pop()

In [None]:
checkpoint_path = "/gdrive/MyDrive/comp8220datafine"
#checkpoint_dir = os.path.dirname(checkpoint_path)

# Create a callback that saves the model's weights
cp_callbackfine = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

In [None]:
history = model.fit(train_ds,validation_data=val_ds, epochs = 10, callbacks=[cp_callbackfine], class_weight=class_weight, verbose=1)

Epoch 1/10
Epoch 1: saving model to /gdrive/MyDrive/comp8220datafine
Epoch 2/10
Epoch 2: saving model to /gdrive/MyDrive/comp8220datafine
Epoch 3/10
 104/1563 [>.............................] - ETA: 4:04 - loss: 2.2868 - accuracy: 0.3531

KeyboardInterrupt: ignored

In [None]:
model.load_weights('/gdrive/MyDrive/comp8220datafine')

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7f5265b66e30>

In [None]:
!unzip -q /gdrive/MyDrive/comp8220data/living-thing-classification-coarse-private.zip

replace privtest_coarse/privtest_coarse.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: A


In [None]:
test_ds = create_dataset_tf('privtest_coarse/privtest_coarse', fine_grained=False, batch_size=batch_size)

In [None]:
total_elements = tf.data.experimental.cardinality(test_ds).numpy()
print(total_elements)

24


In [None]:
y_pred_fine = model.predict(test_ds)



In [None]:
y_pred_classes = np.argmax(y_pred_fine, axis=1)

In [None]:
N = 1500     # num elements in test.csv
privtest_csv = pd.read_csv(root / f"privtest_coarse/privtest_coarse.csv")
privtest_csv.loc[:,"coarse"] = y_pred_classes
#privtest_csv = privtest_csv.drop(columns=['fine'])
pred_file = "/gdrive/MyDrive/comp8220data/47506067-deep-fineprivatedata.csv"  # CHANGE DIRECTORY, FILENAME as appropriate
privtest_csv.to_csv(pred_file)

In [None]:
data_augmentation = tf.keras.Sequential(
    [tf.keras.layers.RandomFlip(mode="horizontal", seed=42),
     tf.keras.layers.RandomRotation(factor=0.05, seed=42)]
)

In [None]:
base_model = tf.keras.applications.xception.Xception(weights="imagenet", include_top=False)
avg = tf.keras.layers.GlobalAveragePooling2D()(base_model.output)
output = tf.keras.layers.Dense(8, activation="softmax")(avg)
model = tf.keras.Model(inputs=base_model.input, outputs=output)
model.summary()

In [None]:
for layer in base_model.layers:
  layer.trainable = False

optimizer = tf.keras.optimizers.SGD(learning_rate=0.1,momentum=0.9)

In [None]:
model.compile(loss="sparse_categorical_crossentropy", optimizer = optimizer, metrics=["accuracy"])

In [None]:
history = model.fit(train_set,validation_data=val_set, epochs = 3, verbose=1)

Epoch 1/3


InvalidArgumentError: ignored

## Last Step: Create CSV File With Prediction

In [None]:
N = 50000     # num elements in test.csv
test_csv = pd.read_csv(root / f"test.csv")
test_csv.loc[:,"coarse"] = predicted_y
test_csv = test_csv.drop(columns=['fine'])
pred_file = "/gdrive/MyDrive/comp8220data/my_test_preds.csv"  # CHANGE DIRECTORY, FILENAME as appropriate
test_csv.to_csv(pred_file)