# Training and Validation Script

## Import Library

In [3]:
import os
import re
import zipfile
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
import matplotlib.pyplot as plt
import gdown
import zipfile

from tensorflow.keras.applications.inception_v3 import InceptionV3

from tensorflow.keras import layers
from tensorflow.keras import optimizers
from tensorflow.keras.utils import load_img, img_to_array
from tensorflow.keras.preprocessing.image import ImageDataGenerator

## Import Dataset (Need to be automated and consistent)

In [4]:
url = "https://drive.google.com/file/d/1tYM-45FgvabZgCORbWcsmAHDQ4s10Eoi/view?usp=drive_link"
output = "dataset.zip"
gdown.download(url, output, quiet=False, fuzzy=True)

Downloading...
From: https://drive.google.com/uc?id=1tYM-45FgvabZgCORbWcsmAHDQ4s10Eoi
To: /content/dataset.zip
100%|██████████| 9.15M/9.15M [00:00<00:00, 28.0MB/s]


'dataset.zip'

In [5]:
# download zip file dataset from https://drive.google.com/drive/folders/1nwR-wo-_9mQtqkVJd3Grhlw6YGPX4EKP?usp=share_link
filenames = os.listdir()

for file in filenames:
  dataset = re.search(r'^dataset.*\.zip$', file)

  if dataset:
    zip_path = f'./{file}'
    zip_ref = zipfile.ZipFile(zip_path, 'r')
    zip_ref.extractall(path='./')
    zip_ref.close()

## ETL (Extract Transform Load)

In [6]:
# Extract (Specifying path)
base_dir = './dataset/'

train_dir = os.path.join(base_dir, 'train')
test_dir = os.path.join(base_dir, 'test')

In [16]:
# Extract (Generating dataset from directory)
BATCH_SIZE = 32
IMAGE_SIZE = (255, 255)
VAL_SPLIT = 0.2

train_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    # labels='inferred',
    # label_mode='binary',
    # class_names=None,
    # batch_size=BATCH_SIZE,
    # image_size=IMAGE_SIZE,
    validation_split=VAL_SPLIT,
    subset='training',
    # shuffle=True,
    seed=123
)

val_ds = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    # labels='inferred',
    # label_mode='binary',
    # class_names=None,
    # batch_size=BATCH_SIZE,
    # image_size=IMAGE_SIZE,
    validation_split=VAL_SPLIT,
    subset='validation',
    # shuffle=True,
    seed=123
)

test_ds = tf.keras.utils.image_dataset_from_directory(
    test_dir,
    # labels='inferred',
    # label_mode='binary',
    # class_names=['caries', 'no-caries'],
    # batch_size=BATCH_SIZE,
    # image_size=IMAGE_SIZE,
    # shuffle=True,
)

Found 500 files belonging to 2 classes.
Using 400 files for training.
Found 500 files belonging to 2 classes.
Using 100 files for validation.
Found 32 files belonging to 2 classes.


In [17]:
# Check batches' shape
for image_batch, labels_batch in train_ds:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

(32, 256, 256, 3)
(32,)


### Preprocessing Images using keras preprocessing layers

In [25]:
# Transform (Preprocessing data)

# optimize data I/O
AUTOTUNE = tf.data.AUTOTUNE

IMG_SIZE = 255

image = next(iter(train_ds))

resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(IMG_SIZE, IMG_SIZE),
  layers.Rescaling(1./255)
])

data_augmentation = tf.keras.Sequential([
  layers.RandomFlip("horizontal",
                    input_shape=(IMG_SIZE,
                                 IMG_SIZE,
                                 3)),
  layers.RandomRotation(0.1),
])

def prepare(ds, shuffle=False, augment=False):
  # Resize and rescale all datasets.
  ds = ds.map(lambda x, y: (resize_and_rescale(x), y), 
              num_parallel_calls=AUTOTUNE)

  if shuffle:
    ds = ds.shuffle(1000)

  # Use data augmentation only on the training set.
  if augment:
    ds = ds.map(lambda x, y: (data_augmentation(x), y), 
                num_parallel_calls=AUTOTUNE)

  # Use buffered prefetching on all datasets.
  return ds.prefetch(buffer_size=AUTOTUNE)


train_ds = train_ds.cache().prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

In [26]:
# Load (Apply preprocessing and load data)

train_ds = prepare(train_ds, shuffle=True, augment=True)
val_ds = prepare(val_ds)
test_ds = prepare(test_ds)

In [None]:
# experimental ImageDatagenerator for training
train_datagen = ImageDataGenerator(
        rescale=1./255,
        # rotation_range=40,
        # width_shift_range=0.2,
        # height_shift_range=0.2,
        # shear_range=0.2,
        # zoom_range=0.2,
        # horizontal_flip=True,
        # fill_mode='nearest'
)
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

test_generator = test_datagen.flow_from_directory(
        test_dir,
        target_size=(150, 150),
        batch_size=20,
        class_mode='binary')

### Building Model using Transfer Learning

In [None]:
# transfer learning not final (maybe can use less layer?)
# model_selection = ("mobilenet_v2", 224, 1280) 
# handle_base, pixels, FV_SIZE = model_selection
# IMAGE_SIZE = (pixels, pixels)

# MODULE_HANDLE ="https://tfhub.dev/google/tf2-preview/{}/feature_vector/4".format(handle_base)
# feature_extractor = hub.KerasLayer(MODULE_HANDLE, input_shape=IMAGE_SIZE + (3,))
# feature_extractor.trainable = False



In [27]:
# Inception V3 model
pre_trained_model = InceptionV3(input_shape = (255, 255, 3), 
                                include_top = False, 
                                weights = 'imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5


In [28]:
for layer in pre_trained_model.layers:
    layer.trainable = False

In [None]:
# # not final
# model = tf.keras.Sequential([
#   feature_extractor,
#   tf.keras.layers.Dense(1, activation='sigmoid') # sigmoid/softmax
# ])

# model.summary()

In [29]:
x = layers.Flatten()(pre_trained_model.output)
# x = layers.Rescaling(1./255)(x)
x = layers.Dense(1024, activation="relu")(x)
x = layers.Dropout(0.2)(x)
x = layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(pre_trained_model.input, x)

In [30]:
model.compile(
  optimizer = optimizers.Adam(learning_rate=0.0001), 
  loss = 'binary_crossentropy', 
  metrics = ['accuracy']
)

## Model Training

In [32]:
history = model.fit(
  train_ds,
  validation_data = val_ds,
  steps_per_epoch = 100,
  epochs = 5,
  validation_steps = 50,
  verbose = 2)

Epoch 1/5




100/100 - 85s - loss: 1.0451 - accuracy: 0.8475 - val_loss: 0.1467 - val_accuracy: 0.9600 - 85s/epoch - 845ms/step


## Model Evaluation

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
# change filename to file you want to predict
filename = '.jpeg'
img = load_img(filename, target_size=(150, 150))
x = img_to_array(img)
x = np.expand_dims(x, axis=0)

images = np.vstack([x])
classes = model.predict(images, batch_size=10)
print(classes)

## TODO
Create save model function