In [None]:
!pip install wandb



In [None]:
# using the wandb
import wandb
from wandb.keras import WandbCallback

# Step 0: Init the AutoML program

Run the cell to import necessary modules and training code.

In [None]:
import numpy as np
import os
import contextlib
import tensorflow as tf
import tempfile
import shutil
import zipfile
from PIL import Image
import numpy as np
import zipfile


@contextlib.contextmanager
def _tempdir():
    dirpath = tempfile.mkdtemp()
    yield dirpath
    shutil.rmtree(dirpath)


class ImageClassifier:
    def __init__(self):
        self.model = None
        self.image_size = None
        self.class_names = None

    def train(self, dataset_dir, batch_size=32, epochs=1, image_size=(160, 160), learning_rate=0.0001):
        # Step1: Data prep
        train_dataset = tf.keras.utils.image_dataset_from_directory(dataset_dir,
                                                                    seed=1337,
                                                                    validation_split=0.2,
                                                                    batch_size=batch_size,
                                                                    subset='training',
                                                                    image_size=image_size)
        validation_dataset = tf.keras.utils.image_dataset_from_directory(dataset_dir,
                                                                         seed=1337,
                                                                         validation_split=0.2,
                                                                         batch_size=batch_size,
                                                                         subset='validation',
                                                                         image_size=image_size)
        class_names = train_dataset.class_names

        print('Number of trian batches: %d' %
              tf.data.experimental.cardinality(train_dataset))
        print('Number of validation batches: %d' %
              tf.data.experimental.cardinality(validation_dataset))
        train_dataset = train_dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
        validation_dataset = validation_dataset.prefetch(
            buffer_size=tf.data.AUTOTUNE)

        # Step2: Model Architecture

        # Skip the data_augmentation because
        # https://stackoverflow.com/questions/69955838/saving-model-on-tensorflow-2-7-0-with-data-augmentation-layer
        #
        # data_augmentation = tf.keras.Sequential([
        #     tf.keras.layers.RandomFlip('horizontal'),
        #     tf.keras.layers.RandomRotation(0.2),
        # ])
        preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input
        image_shape = image_size + (3,)
        base_model = tf.keras.applications.MobileNetV2(input_shape=image_shape,
                                                       include_top=False,
                                                       weights='imagenet')
        base_model.trainable = True
        fine_tune_at = 100
        for layer in base_model.layers[:fine_tune_at]:
            layer.trainable = False

        global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
        inputs = tf.keras.Input(shape=image_shape)
        x = inputs
        # x = data_augmentation(x)
        x = preprocess_input(x)
        x = base_model(x, training=False)
        x = global_average_layer(x)
        x = tf.keras.layers.Dropout(0.2)(x)
        x = tf.keras.layers.Dense(len(class_names))(x)
        outputs = tf.nn.softmax(x)
        model = tf.keras.Model(inputs, outputs)
        model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
                      loss=tf.keras.losses.SparseCategoricalCrossentropy(
            from_logits=False),
            metrics=['accuracy'])
        model.summary()

        # Step 3: Start training
        history = model.fit(train_dataset,
                            epochs=epochs,
                            validation_data=validation_dataset, callbacks=[WandbCallback()])

        # Step 4: Evaluation
        if validation_dataset:
            loss, accuracy = model.evaluate(validation_dataset)
            # wandb.log({'accuracy': accuracy, 'loss': loss})

            print('Test accuracy :', accuracy)

            # Retrieve a batch of images from the test set
            image_batch, label_batch = validation_dataset.as_numpy_iterator().next()
            predictions = model.predict_on_batch(image_batch)

            # Apply softmax and argmax to find the most possible class
            predictions = model.predict_on_batch(image_batch)
            predictions = tf.math.argmax(predictions, axis=-1)
            print('Predictions:\n', predictions.numpy())
            print('Labels:\n', label_batch)

        self.model = model
        self.class_names = class_names
        self.image_size = (160, 160)
        image_size = (160, 160)

        return history

    def save(self, modelfile):
        with _tempdir() as modelpath:
            self.model.save(modelpath)
            with open(f'{modelpath}/class_names.txt', 'w') as f:
                for class_name in self.class_names:
                    print(class_name, file=f)

            with zipfile.ZipFile(modelfile, 'w', zipfile.ZIP_DEFLATED) as zf:
                for dirname, subdirs, files in os.walk(modelpath):
                    arc_dirname = dirname[len(modelpath):]
                    print(f'dir : {arc_dirname}/')
                    zf.write(dirname, arc_dirname)
                    for filename in files:
                        print(f'file: {arc_dirname}/{filename}')
                        zf.write(os.path.join(dirname, filename),
                                 os.path.join(arc_dirname, filename))

    def load(self, modelfile):
        with _tempdir() as dirpath:
            with zipfile.ZipFile(modelfile, 'r') as zip_ref:
                zip_ref.extractall(dirpath)
            model = tf.keras.models.load_model(dirpath)
            with open(f"{dirpath}/class_names.txt") as f:
                class_names = f.readlines()
            class_names = [class_name.strip() for class_name in class_names]

        self.image_size = (160, 160)
        self.class_names = (class_names)
        self.model = model

    def predict(self, img_path):
        with Image.open(img_path) as image:
            image = image.resize(self.image_size).convert("RGB")
            x = tf.keras.preprocessing.image.img_to_array(image)
            x = tf.expand_dims(x, 0)
        result = self.model(x)
        result = tf.squeeze(result)
        cls_idx = int(tf.math.argmax(result, axis=-1))
        cls = self.class_names[cls_idx]
        return (cls, result.numpy())

    def predict_img(self, image):
        image = image.resize(self.image_size).convert("RGB")
        x = tf.keras.preprocessing.image.img_to_array(image)
        x = tf.expand_dims(x, 0)
        result = self.model(x)
        result = tf.squeeze(result)
        cls_idx = int(tf.math.argmax(result, axis=-1))
        cls = self.class_names[cls_idx]
        return (cls, result.numpy())


# Step 1: Prepare the dataset

The dataset should be a 'zip' file with the following format.

```
flowers_photos/
  daisy/
  dandelion/
  roses/
  sunflowers/
  tulips/
```  

Please see the document in tensorflow
https://www.tensorflow.org/tutorials/load_data/images


There are two way to prepare the dataset

1. Download the example dataset
1. Use your own dataset

After the step 1, the variable of `dataset_dir` should be set as the directory of your dataset.

## Download the example dataset

If you would like to have a quick try, you can use the example dataset.

In [None]:
# _DATASET_URL = 'https://storage.googleapis.com/infuseai-auto-img-cls/datasets/noodles_v1.2.zip'

# !wget $_DATASET_URL

# import os
# import tempfile

# basename = os.path.basename(_DATASET_URL)

# with zipfile.ZipFile(basename, 'r') as zip_ref:
#     zip_ref.extractall(".")

# base, ext = os.path.splitext(basename)
# dataset_dir = f"{base}"


# print(f"dataset dir: {dataset_dir}")




# Step 2: Train

Now, we have dataset. Then we will start to train a model from the dataset.

Most of the time, the step should not be a single run, we should adjust the parameters (or said hyperparameter) to get the best result.

There are three parameter we can adjust
1. `EPOCHS`: how many times we should go through the whole dataset
2. `BATCH_SIZE`: how many data we should update the weights
3. `LEARNING_RATE`: how big step we should update the weights for each batch

In the training process, we will split the dataset into two part
- Training set (80%): use to train and update the weight
- Validation set (20%): use to validate the model

The goal for each experiment is to get the best accuracy in the validation dataset.

In [None]:

import wandb

wandb.init(project="toy-project", name='auto-img-cls', job_type='training')
#wandb.init(project="toy-project", name='auto-img-cls', job_type='training', group='group-1')

artifact = wandb.use_artifact('qrtt1/toy-project-dataset2/noodles:v0', type='dataset')
dataset_dir = artifact.download()

EPOCHS = 3
BATCH_SIZE = 25
LEARNING_RATE = 0.0025


wandb.config.update({
  "epochs": EPOCHS,
  "batch_size": BATCH_SIZE,
  "learning_rate": LEARNING_RATE,
  "lr": LEARNING_RATE,
  "xxx": "foobarbar"
})

classifier = ImageClassifier()
history = classifier.train(dataset_dir, epochs=EPOCHS, batch_size=BATCH_SIZE, learning_rate=LEARNING_RATE)





[34m[1mwandb[0m: Downloading large artifact noodles:v0, 119.85MB. 304 files... Done. 0:0:0


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_2 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_2 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                 

  layer_config = serialize_layer_fn(layer)


Epoch 2/3
Epoch 3/3
Test accuracy : 0.38333332538604736
Predictions:
 [2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
Labels:
 [1 1 2 1 0 1 1 1 2 2 1 1 0 0 2 0 2 2 2 0 0 2 2 0 0]


In [None]:
import wandb

sweep_config = {
  "name" : "my-sweep",
  "method" : "random",
  "parameters" : {
    "epochs" : {
      "values" : [5, 10, 20]
    },
    "learning_rate" :{
      "min": 0.0001,
      "max": 0.1
    }
  }
}

sweep_id = wandb.sweep(sweep_config)

classifier = ImageClassifier()

def train():
    with wandb.init(project='tuning-for-toy-project') as run:
        config = wandb.config
        # model = make_model(config)
        
        history = classifier.train(dataset_dir, epochs=config['epochs'], batch_size=25, learning_rate=config['learning_rate'])
        # for epoch in range(config["epochs"]):
        #     loss = model.fit()  # your model training code here
        #     wandb.log({"loss": loss, "epoch": epoch})

count = 5 # number of runs to execute
wandb.agent(sweep_id, function=train, count=count)

Create sweep with ID: do63fokz
Sweep URL: https://wandb.ai/qrtt1/uncategorized/sweeps/do63fokz


[34m[1mwandb[0m: Agent Starting Run: 7ysa3c6s with config:
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.07972940326716206


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_5 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_5 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                   

  layer_config = serialize_layer_fn(layer)


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test accuracy : 0.30000001192092896
Predictions:
 [1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Labels:
 [1 0 0 1 2 0 2 2 2 2 2 1 1 1 2 2 0 1 1 0 1 2 2 2 1]


VBox(children=(Label(value=' 0.00MB of 0.00MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▆▄▆▁█
epoch,▁▃▅▆█
loss,▆█▂▁▁
val_accuracy,▂█▁▁▁
val_loss,█▄▂▁▁

0,1
accuracy,0.34583
best_epoch,4.0
best_val_loss,26.10916
epoch,4.0
loss,43.83113
val_accuracy,0.3
val_loss,26.10916


[34m[1mwandb[0m: Agent Starting Run: wnwxiyq3 with config:
[34m[1mwandb[0m: 	epochs: 20
[34m[1mwandb[0m: 	learning_rate: 0.08415230039430407


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_6 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_6 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                   

VBox(children=(Label(value=' 16.39MB of 16.39MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
accuracy,▄▅▅▂▅▆▆▁█▂▄▆▄▁▁█▃█▃▇
epoch,▁▁▂▂▂▃▃▄▄▄▅▅▅▆▆▇▇▇██
loss,▇█▅▄▂▁▃▃▁▂▂▂▁▂▂▂▁▁▂▁
val_accuracy,▂▂▂██▂█▁▂▁▂█▁▁▂██▁█▂
val_loss,█▇▇▄▂▄▂▄▂▃▂▁▂▃▄▂▂▁▂▂

0,1
accuracy,0.35417
best_epoch,17.0
best_val_loss,3.44474
epoch,19.0
loss,22.83447
val_accuracy,0.31667
val_loss,25.1992


[34m[1mwandb[0m: Agent Starting Run: w8wff6wk with config:
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	learning_rate: 0.004980061012178363


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_7 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_7 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                   

VBox(children=(Label(value=' 0.11MB of 0.11MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

0,1
accuracy,▁▁▁▂▃██▄▅▃
epoch,▁▂▃▃▄▅▆▆▇█
loss,█▁▁▁▁▁▁▁▁▁
val_accuracy,▁█▁▂▁▁▂▂▂▁
val_loss,▃▁▁▁▃▃▂▂▃█

0,1
accuracy,0.3125
best_epoch,1.0
best_val_loss,1.14939
epoch,9.0
loss,2.49223
val_accuracy,0.3
val_loss,4.00154


[34m[1mwandb[0m: Agent Starting Run: i2d7kqgu with config:
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.053317799987105806


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_8 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_8 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                   

VBox(children=(Label(value=' 23.29MB of 23.29MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
accuracy,▆▅▂█▁
epoch,▁▃▅▆█
loss,█▃▅▁▄
val_accuracy,▂▁█▁█
val_loss,▃█▁▂▃

0,1
accuracy,0.25833
best_epoch,2.0
best_val_loss,10.03741
epoch,4.0
loss,40.06866
val_accuracy,0.38333
val_loss,20.86859


[34m[1mwandb[0m: Agent Starting Run: 6xtlyxxm with config:
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	learning_rate: 0.021540870433390626


Found 300 files belonging to 3 classes.
Using 240 files for training.
Found 300 files belonging to 3 classes.
Using 60 files for validation.
Number of trian batches: 10
Number of validation batches: 3
Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 160, 160, 3)]     0         
                                                                 
 tf.math.truediv_9 (TFOpLamb  (None, 160, 160, 3)      0         
 da)                                                             
                                                                 
 tf.math.subtract_9 (TFOpLam  (None, 160, 160, 3)      0         
 bda)                                                            
                                                                 
 mobilenetv2_1.00_160 (Funct  (None, 5, 5, 1280)       2257984   
 ional)                                                   

VBox(children=(Label(value=' 23.29MB of 23.29MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
accuracy,▂▄█▂▁
epoch,▁▃▅▆█
loss,█▆▃▂▁
val_accuracy,████▁
val_loss,█▄▁▁▁

0,1
accuracy,0.30417
best_epoch,4.0
best_val_loss,8.11717
epoch,4.0
loss,14.21688
val_accuracy,0.3
val_loss,8.11717


# Step 3: Predict

Use an image to test the model

In [None]:
# !wandb agent qrtt1/new-project/hqztda7p

# Step 4: Save and Load

After several experiments, we can pick a best model to save.

## Save the model

In [None]:
_MODEL_FILE="model.zip"

classifier.save(_MODEL_FILE)
wandb.log_artifact(_MODEL_FILE, 'model', 'model')

INFO:tensorflow:Assets written to: /tmp/tmpzpa9qnao/assets


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


dir : /
file: /class_names.txt
file: /keras_metadata.pb
file: /saved_model.pb
dir : /assets/
dir : /variables/
file: /variables/variables.index
file: /variables/variables.data-00000-of-00001


<wandb.sdk.wandb_artifacts.Artifact at 0x7fb64d898e90>

In [None]:
wandb.finish()

VBox(children=(Label(value=' 45.03MB of 45.03MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.…

0,1
accuracy,▂█▁
epoch,▁▅█
loss,█▁▁
val_accuracy,█▁█
val_loss,▆█▁

0,1
accuracy,0.28333
best_epoch,2.0
best_val_loss,1.1126
epoch,2.0
loss,1.81057
val_accuracy,0.38333
val_loss,1.1126
