[link dataset](https://www.kaggle.com/datasets/doctrinek/oxford-iiit-cats-extended-10k)

# Setup

### command line

In [None]:
# import the kaggle.json from kaggle API into colab
# do this command

# install kaggle library
!pip install kaggle
# make a directory named .kaggle
!mkdir ~/.kaggle
# copy the kaggle.json into this new directory
!cp kaggle.json ~/.kaggle/
# alocate the required permission for this file
!chmod 600 ~/.kaggle/kaggle.json
# download dataset
!kaggle datasets download doctrinek/oxford-iiit-cats-extended-10k

Downloading oxford-iiit-cats-extended-10k.zip to /content
 97% 967M/993M [00:07<00:00, 218MB/s]
100% 993M/993M [00:07<00:00, 138MB/s]


In [None]:
!pip install split-folders
!pip install pillow

Collecting split-folders
  Downloading split_folders-0.5.1-py3-none-any.whl (8.4 kB)
Installing collected packages: split-folders
Successfully installed split-folders-0.5.1


### import libraries

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Flatten, Dropout, Conv2D, MaxPooling2D, Input
from tensorflow.keras.models import Model
import numpy as np
import matplotlib.pyplot as plt
import pathlib, zipfile, os, splitfolders, datetime
from PIL import Image

### setup tensorboard

In [None]:
%load_ext tensorboard

In [None]:
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

In [None]:
!rm -rf ./logs/

In [None]:
tf.summary.create_file_writer("./logs/")

<tensorflow.python.ops.summary_ops_v2._ResourceSummaryWriter at 0x7ade6a3605b0>

In [None]:
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

### extract zip file

In [None]:
zip_path = "/content/oxford-iiit-cats-extended-10k.zip"
zip_read = zipfile.ZipFile(zip_path, "r")
zip_read.extractall('/content/dataset')
zip_read.close()

In [None]:
os.listdir('/content/dataset/')

['CatBreedsRefined-v3']

### resize all the images

### split into train and validation

In [None]:
original_dir = '/content/dataset/CatBreedsRefined-v3'

In [None]:
splitfolders.ratio(original_dir, output='/content/dataset/project', seed=6969, ratio=(0.8, 0.2))
train_dir = '/content/dataset/project/train'
validation_dir = '/content/dataset/project/val'

Copying files: 10257 files [00:12, 809.04 files/s] 


### explore data samples

In [None]:
def total_sample(directory):
  total = 0
  for folder in os.listdir(directory):
    folder_path = os.path.join(directory, folder)
    total += len(os.listdir(folder_path))

  return total

train_sample_length = total_sample(train_dir)
validation_sample_length = total_sample(validation_dir)
print(f"The train directory has {train_sample_length} samples")
print(f"The validation directory has {validation_sample_length} samples")
print(f"Which in total makes it {train_sample_length + validation_sample_length} samples")

The train directory has 8202 samples
The validation directory has 2055 samples
Which in total makes it 10257 samples


# Preprocess Data

In [None]:
batch_size = 48
target_size=(224,224)

train_datagen = ImageDataGenerator(
    rescale = 1.0/255,
    shear_range=0.2,
    width_shift_range=0.2,
    height_shift_range=0.2,
    channel_shift_range=0.2,
    #rotation_range=20,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    train_dir,
    class_mode='categorical',
    target_size=target_size,
    batch_size=batch_size
)

validation_datagen = ImageDataGenerator(
    rescale=1.0/255
)

validation_generator = validation_datagen.flow_from_directory(
    validation_dir,
    class_mode='categorical',
    target_size=target_size,
    batch_size=batch_size
)

Found 8202 images belonging to 12 classes.
Found 2055 images belonging to 12 classes.


# Model Creation

### callback function
- stops when acc & val_acc is >= 92%
- stops when acc / val_acc < max_acc for limit_acc epochs
- stops when loss / val_loss > 0.75 for limit_loss epochs


In [None]:
class SantaiDuluGakSih(tf.keras.callbacks.Callback):
  def __init__(self, sabar_acc=10, sabar_loss=10):
    super(SantaiDuluGakSih, self).__init__()
    self.sabar_acc = sabar_acc
    self.sabar_loss = sabar_loss
    self.limit_acc = sabar_acc
    self.limit_loss = sabar_loss
    self.max_acc = 0
    self.max_val_acc = 0

  def on_epoch_end(self, epoch, logs={}):
    self.max_acc = logs.get('accuracy') if logs.get('accuracy') > self.max_acc else self.max_acc

    self.max_val_acc = logs.get('val_accuracy') if logs.get('val_accuracy') > self.max_val_acc else self.max_val_acc

    if logs.get('accuracy')>=self.max_acc and logs.get('val_accuracy')>=self.max_val_acc:
      self.sabar_acc = self.limit_acc
    else:
      self.sabar_acc -= 1

    if logs.get('loss')>0.75 or logs.get('val_loss')>0.75:
      self.sabar_loss -= 1
    else:
      self.sabar_loss += 1

    if self.sabar_acc == 0:
      print(f"The model accuracy has been below {self.max_acc} and {self.max_val_acc} for {self.limit_acc} epochs, Stopping training immediatly!!!")
      self.model.stop_training = True
    elif self.sabar_loss == 0:
      print(f"The model loss has been above 75% for {self.limit_loss} epochs, Stopping training immediatly!!!")
      self.model.stop_training = True
    elif self.max_acc >= 0.92 and self.max_val_acc >= 0.92:
      print(f"The model accuracy has reached 92%, stopping training")
      self.model.stop_training = True

### transfer learning using MobileNetV2

In [23]:
model = tf.keras.Sequential()

In [24]:
pre_trained_model = MobileNetV2(
    weights="imagenet",
    include_top=False,
    input_tensor=Input(shape=(224,224,3))
)

for layer in pre_trained_model.layers:
  layer.trainable = False

model.add(pre_trained_model)



In [25]:
model.add(Conv2D(128, (3,3), activation="relu"))
model.add(MaxPooling2D(2,2))
model.add(Dropout(0.3))
model.add(Flatten())
model.add(Dense(512, activation="relu"))
model.add(Dense(256, activation="relu"))
model.add(Dense(160, activation="relu"))
model.add(Dense(128, activation="relu"))
model.add(Dense(12, activation="softmax"))

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 mobilenetv2_1.00_224 (Func  (None, 7, 7, 1280)        2257984   
 tional)                                                         
                                                                 
 conv2d_2 (Conv2D)           (None, 5, 5, 64)          737344    
                                                                 
 max_pooling2d_1 (MaxPoolin  (None, 2, 2, 64)          0         
 g2D)                                                            
                                                                 
 dropout_1 (Dropout)         (None, 2, 2, 64)          0         
                                                                 
 flatten_1 (Flatten)         (None, 256)               0         
                                                                 
 dense_5 (Dense)             (None, 512)              

In [26]:
int_lr = 1e-3
model.compile(
    optimizer=tf.optimizers.Adam(learning_rate=int_lr),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

In [27]:
berhenti_bang = SantaiDuluGakSih(sabar_acc=3, sabar_loss=10)
modelku = model.fit(
    train_generator,
    epochs=20,
    validation_data=validation_generator,
    callbacks=[berhenti_bang, tensorboard_callback],
    verbose=2
)

Epoch 1/20
171/171 - 650s - loss: 1.1299 - accuracy: 0.6025 - val_loss: 0.6682 - val_accuracy: 0.7708 - 650s/epoch - 4s/step
Epoch 2/20
171/171 - 642s - loss: 0.7313 - accuracy: 0.7553 - val_loss: 0.5731 - val_accuracy: 0.8058 - 642s/epoch - 4s/step
Epoch 3/20
171/171 - 639s - loss: 0.6689 - accuracy: 0.7820 - val_loss: 0.5742 - val_accuracy: 0.8005 - 639s/epoch - 4s/step
Epoch 4/20


KeyboardInterrupt: ignored

# Model evaluation

### plot loss and accuracy

In [None]:
# plot loss
plt.plot(modelku.history['loss'])
plt.plot(modelku.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# plot acc
plt.plot(modelku.history['accuracy'])
plt.plot(modelku.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')
plt.show()

### show tensorboard

In [None]:
%tensorboard --logdir logs/fit

# Save model

In [None]:
# menyimpan model dalam format saved model
export_dir = 'saved_model/'
tf.saved_model.save(model, export_dir)

# convert SavedModel menjadi vegs.tflite
converter = tf.lite.TFLiteConverter.from_saved_model(export_dir)
tflite_model = converter.convert()

tflite_model_file = pathlib.Path('vegs.tflite')
tflite_model_file.write_bytes(tflite_model)