In [2]:
!pip install kaggle



In [3]:
!mkdir ~/.kaggle
!touch ~/.kaggle/kaggle.json

In [4]:
import os
from getpass import getpass

kaggle_secret = getpass('Enter the content of your kaggle.json: ')

with open('/root/.kaggle/kaggle.json', "w") as f:
  f.write(kaggle_secret)

Enter the content of your kaggle.json: ··········


In [5]:
!kaggle competitions download -c tpu-getting-started
!unzip tpu-getting-started.zip -d data
!rm tpu-getting-started.zip

Downloading tpu-getting-started.zip to /content
100% 4.79G/4.79G [02:40<00:00, 37.9MB/s]
100% 4.79G/4.79G [02:40<00:00, 32.0MB/s]
Archive:  tpu-getting-started.zip
  inflating: data/sample_submission.csv  
  inflating: data/tfrecords-jpeg-192x192/test/00-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/01-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/02-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/03-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/04-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/05-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/06-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/07-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/08-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/09-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-192x192/test/10-192x192-462.tfrec  
  inflating: data/tfrecords-jpeg-19

In [26]:
import tensorflow as tf
import numpy as np
import re

DATASET_PATH = "data"
PATH = DATASET_PATH + "/tfrecords-jpeg-512x512"

TRAINING_FILENAMES = tf.io.gfile.glob(PATH + "/train/*.tfrec")
VALIDATION_FILENAMES = tf.io.gfile.glob(PATH + "/val/*.tfrec")
TEST_FILENAMES = tf.io.gfile.glob(PATH + "/test/*.tfrec")

IMAGE_SIZE = [512, 512]
BATCH_SIZE = 32
NUM_OF_CLASSES = 104

In [30]:
def decode_image(image_data):
  image = tf.image.decode_jpeg(image_data, channels=3)
  image = (
      tf.cast(image, tf.float32) / 255.0
  )
  image = tf.reshape(image, [*IMAGE_SIZE, 3])
  return image


def read_labelled_tfrecord(example):
  LABELLED_TFREC_FORMAT = {
    "image": tf.io.FixedLenFeature([], tf.string),
    "class": tf.io.FixedLenFeature([], tf.int64),
  }
  example = tf.io.parse_single_example(example, LABELLED_TFREC_FORMAT)
  image = decode_image(example["image"])
  label = tf.cast(example["class"], tf.int32)
  one_hot = tf.one_hot(label, NUM_OF_CLASSES)
  return image, one_hot


def read_unlabelled_tfrecord(example):
  UNLABELLED_TFREC_FORMAT = {
    "image": tf.io.FixedLenFeature([], tf.string),
    "id": tf.io.FixedLenFeature([], tf.string),
  }
  example = tf.io.parse_single_example(example, UNLABELLED_TFREC_FORMAT)
  image = decode_image(example["image"])
  id_num = example["id"]
  return image, id_num


def load_dataset(filenames, labelled=True, ordered=False):
  ignore_order = tf.data.Options()
  if not ordered:
    ignore_order.experimental_deterministic = False

  dataset = tf.data.TFRecordDataset(
    filenames, num_parallel_reads=tf.data.experimental.AUTOTUNE
  )
  dataset = dataset.with_options(ignore_order)
  dataset = dataset.map(
    read_labelled_tfrecord if labelled else read_unlabelled_tfrecord,
    num_parallel_calls=tf.data.experimental.AUTOTUNE,
  )
  return dataset

In [31]:
def get_training_dataset():
  dataset = load_dataset(TRAINING_FILENAMES, labelled=True)
  dataset = dataset.repeat()
  dataset = dataset.batch(BATCH_SIZE)
  dataset = dataset.prefetch(1)
  return dataset


def get_validation_dataset(ordered=False):
  dataset = load_dataset(VALIDATION_FILENAMES, labelled=True, ordered=ordered)
  dataset = dataset.batch(BATCH_SIZE)
  dataset = dataset.prefetch(1)
  return dataset


def get_test_dataset(ordered=False):
  dataset = load_dataset(TEST_FILENAMES, labelled=False, ordered=ordered)
  dataset = dataset.batch(BATCH_SIZE)
  dataset = dataset.prefetch(1)
  return dataset


def count_data_items(filenames):
  n = [
      int(re.compile(r"-(\d*)\.").search(filename).group(1))
      for filename in filenames
  ]
  return np.sum(n)

In [32]:
num_train_images = count_data_items(TRAINING_FILENAMES)

train_dataset = get_training_dataset()
validation_dataset = get_validation_dataset()
test_dataset = get_test_dataset()

In [53]:
from tensorflow.keras import datasets, layers, models, callbacks

model = models.Sequential()

model.add(layers.Conv2D(64, (3, 3), activation='relu',  input_shape=(512, 512, 3)))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(32, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Conv2D(16, (3, 3), activation='relu'))
model.add(layers.MaxPooling2D((2, 2)))
model.add(layers.Flatten())
model.add(layers.Dense(104, activation='softmax'))

model.summary()
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['acc'])

es = callbacks.EarlyStopping(monitor='val_acc', mode='max', patience=10, verbose=1)
cp = callbacks.ModelCheckpoint(filepath='model', save_best_only=True, verbose=1, monitor='val_acc', mode='max')

model.fit(
    train_dataset,
    validation_data=validation_dataset,
    epochs=1,
    steps_per_epoch=num_train_images / BATCH_SIZE,
    callbacks=[es, cp]
)

Model: "sequential_30"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_99 (Conv2D)          (None, 510, 510, 64)      1792      
                                                                 
 max_pooling2d_99 (MaxPooli  (None, 255, 255, 64)      0         
 ng2D)                                                           
                                                                 
 conv2d_100 (Conv2D)         (None, 253, 253, 32)      18464     
                                                                 
 max_pooling2d_100 (MaxPool  (None, 126, 126, 32)      0         
 ing2D)                                                          
                                                                 
 conv2d_101 (Conv2D)         (None, 124, 124, 16)      4624      
                                                                 
 max_pooling2d_101 (MaxPool  (None, 62, 62, 16)      

<keras.src.callbacks.History at 0x7f4a062b2320>