# Training and Validation Script

### Import Library

In [None]:
import os
import re
import zipfile
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub

from tensorflow.keras import optimizers
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

### Import Dataset

In [None]:
# download zip file dataset from https://drive.google.com/drive/folders/1nwR-wo-_9mQtqkVJd3Grhlw6YGPX4EKP?usp=share_link
filenames = os.listdir()

for file in filenames:
  dataset = re.search(r'^dataset.*\.zip$', file)

  if dataset:
    zip_path = f'./{file}'
    zip_ref = zipfile.ZipFile(zip_path, 'r')
    zip_ref.extractall(path='./')
    zip_ref.close()

In [None]:
base_dir = './dataset/'

train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

train_caries_dir = os.path.join(train_dir, 'caries')
train_nocaries_dir = os.path.join(train_dir, 'no-caries')

test_caries_dir = os.path.join(test_dir, 'caries')
test_nocaries_dir = os.path.join(test_dir, 'no-caries')

In [None]:
print('total training caries images :', len(os.listdir( train_caries_dir ) ))
print('total training no caries images :', len(os.listdir( train_nocaries_dir ) ))

print('total test caries images :', len(os.listdir( test_caries_dir ) ))
print('total test no caries images :', len(os.listdir( test_nocaries_dir ) ))

In [None]:
%matplotlib inline

import matplotlib.image as mpimg
import matplotlib.pyplot as plt


train_caries_fnames = os.listdir( train_caries_dir )
train_nocaries_fnames = os.listdir( train_nocaries_dir )

nrows = 4
ncols = 4

pic_index = 0

In [None]:
fig = plt.gcf()
fig.set_size_inches(ncols*4, nrows*4)

pic_index+=8

next_caries_pix = [os.path.join(train_caries_dir, fname) 
                for fname in train_caries_fnames[ pic_index-8:pic_index] 
               ]

next_nocaries_pix = [os.path.join(train_nocaries_dir, fname) 
                for fname in train_nocaries_fnames[ pic_index-8:pic_index]
               ]

for i, img_path in enumerate(next_caries_pix+next_nocaries_pix):
  # Set up subplot; subplot indices start at 1
  sp = plt.subplot(nrows, ncols, i + 1)
  sp.axis('Off') # Don't show axes (or gridlines)

  img = mpimg.imread(img_path)
  plt.imshow(img)

plt.show()

### Building Model using Transfer Learning

In [None]:
# transfer learning
model_selection = ("mobilenet_v2", 224, 1280) 
handle_base, pixels, FV_SIZE = model_selection
IMAGE_SIZE = (pixels, pixels)

MODULE_HANDLE ="https://tfhub.dev/google/tf2-preview/{}/feature_vector/4".format(handle_base)
feature_extractor = hub.KerasLayer(MODULE_HANDLE, input_shape=IMAGE_SIZE + (3,))
feature_extractor.trainable = False

In [None]:
# not final
model = tf.keras.Sequential([
  feature_extractor,
  tf.keras.layers.Dense(2, activation='sigmoid') # sigmoid/softmax
])

model.summary()

In [None]:
model.compile(
  optimizer = optimizers.RMSprop(learning_rate=0.0001), 
  loss = 'binary_crossentropy', 
  metrics = ['accuracy']
)

### Preprocessing Image using ImageDataGenerator

In [None]:
# experimental ImageDatagenerator for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        # rotation_range=40,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        # shear_range=0.2,
        # zoom_range=0.2,
        # horizontal_flip=True,
        # fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

### Model Training

In [None]:
history = model.fit(
  train_generator,
  validation_data = test_generator,
  steps_per_epoch = 100,
  epochs = 20,
  validation_steps = 50,
  verbose = 2)

### Evaluation

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
# change filename to file you want to predict
filename = '.jpeg'
img = load_img(filename, target_size=(150, 150))
x = img_to_array(img)
x = np.expand_dims(x, axis=0)

images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes)