In [None]:
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from inceptionv3_models import create_inceptionv3_model
import os
import wandb
from wandb.keras import WandbCallback

# Loading Data Set
Three sets are created: training, validation, and test. 
- Labels are generated based on the folder structure. Class name must correspond to the subfolder name.
- Loading in batches, of size 32, to reduce memory usage.
- Label mode is set to categorical, which means that the labels are encoded as a categorical vector.

Bilinear interploation is set to default. This specify the method used in the resizing procedure. By default aspect ratio is not perserved, i.e., the ratio between image width and height.

One hot encoding is utilized when label mode is set to categorical.


The image load documentation is available [here](https://www.tensorflow.org/api_docs/python/tf/keras/utils/image_dataset_from_directory) and an example is available [here](https://keras.io/api/data_loading/image/).

In [None]:
train_ds = keras.utils.image_dataset_from_directory(
    directory='dataset/train/',
    labels='inferred',
    label_mode='categorical',
    shuffle=True,
    batch_size=32,
    image_size=(224, 224))

val_ds = keras.utils.image_dataset_from_directory(
    directory='dataset/val/',
    labels='inferred',
    label_mode='categorical',
    shuffle=True,
    batch_size=32,
    image_size=(224, 224))
test_ds = keras.utils.image_dataset_from_directory(
    directory='dataset/test/',
    labels='inferred',
    label_mode='categorical',
    shuffle=False,
    batch_size=32,
    image_size=(224, 224))

# Visualizing the Data

In [None]:
# histogram of class distribution in the data set
# remove underscores from class names
class_dist = {class_name.replace('_', ' ') : 0 for class_name in train_ds.class_names}
for label in os.listdir("dataset/train"):
    class_dist[label.replace('_', ' ')] = len(os.listdir(os.path.join("dataset/train/", label)))

# plot the histogram
plt.bar(class_dist.keys(), class_dist.values(), color=['red', 'green', 'blue', 'cyan', 'magenta', 'yellow', 'black'])
plt.title('Class Distribution')
plt.xlabel('Class')
plt.ylabel('Frequency')
plt.xticks(rotation=90)
plt.show()

In [None]:
# percentage of each class in the data set
for label, count in zip(class_dist.keys(), class_dist.values()):
    print(f'{label}: {count/sum(class_dist.values())*100:.2f}%')

# Inception v3

In [None]:
# initialize wandb
wandb.init(project="Inception_v3", config={"learning_rate": 0.001, "epochs": 30, "verbose": 1, "name": "Inception_v3"})

# configs
cfg = wandb.config

# create the model
model = create_inceptionv3_model(len(train_ds.class_names))

In [None]:
# list of callbacks
callbacks = [WandbCallback(mode="min", monitor="val_loss", save_model=True),
             tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1,
                                                   patience=5, min_lr=0.0001),
             tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5)]

In [None]:
# train the model
model.fit(train_ds, epochs=cfg.epochs, batch_size=32, validation_data=val_ds, callbacks=callbacks)

In [None]:
wandb.finish()