In [1]:
import tensorflow as tf
import os
import argparse
from tensorflow.python.keras.callbacks import Callback



class MyFashionMnist(object):
    def train(self):

        # 입력 값 받게 하기
        parser = argparse.ArgumentParser()
        parser.add_argument('--learning_rate', required=False, type=float, default=0.001)
        parser.add_argument('--dropout_rate', required=False, type=float, default=0.2)
        # epoch 5 ~ 15
        parser.add_argument('--epoch', required=False, type=int, default=5)    
        # relu, sigmoid, softmax, tanh
        parser.add_argument('--act', required=False, type=str, default='relu')        
        # layer 1 ~ 5
        parser.add_argument('--layer', required=False, type=int, default=1)        


        args = parser.parse_args()    

        (x_train, y_train), (x_test, y_test) = tf.keras.datasets.fashion_mnist.load_data()
        x_train, x_test = x_train / 255.0, x_test / 255.0

        model = tf.keras.models.Sequential()
        model.add(tf.keras.layers.Flatten(input_shape=(28, 28)))

        for i in range(int(args.layer)):    
            model.add(tf.keras.layers.Dense(128, activation=args.act))
            model.add(tf.keras.layers.Dropout(args.dropout_rate))

        model.add(tf.keras.layers.Dense(10, activation='softmax'))
        model.summary()

        model.compile(optimizer=tf.keras.optimizers.Adam(lr=args.learning_rate),
                      loss='sparse_categorical_crossentropy',
                      metrics=['acc'])

        model.fit(x_train, y_train,
                  verbose=0,
                  validation_data=(x_test, y_test),
                  epochs=args.epoch,
                  callbacks=[KatibMetricLog()])

        model.evaluate(x_test,  y_test, verbose=0)

class KatibMetricLog(Callback):
    def on_batch_end(self, batch, logs={}):
        print("batch=" + str(batch),
              "accuracy=" + str(logs.get('acc')),
              "loss=" + str(logs.get('loss')))
    def on_epoch_begin(self, epoch, logs={}):
        print("epoch " + str(epoch) + ":")
    
    def on_epoch_end(self, epoch, logs={}):
        print("Validation-accuracy=" + str(logs.get('val_acc')),
              "Validation-loss=" + str(logs.get('val_loss')))
        return

if __name__ == '__main__':
    if os.getenv('FAIRING_RUNTIME', None) is None:
        from kubeflow import fairing
        from kubeflow.fairing.kubernetes import utils as k8s_utils

        DOCKER_REGISTRY = 'jaewoo201'
        fairing.config.set_builder(
            'append',
            image_name='fairing-job',
            base_image='jaewoo201/kubeflow-jupyter-lab:tf2.0-cpu',
            registry=DOCKER_REGISTRY, 
            push=True)
        # cpu 2, memory 5GiB
        fairing.config.set_deployer('job',
                                    namespace='kf-namespace',
                                    pod_spec_mutators=[
                                        k8s_utils.get_resource_mutator(cpu=1,
                                                                       memory=2)]
         
                                   )
        fairing.config.run()
    else:
        remote_train = MyFashionMnist()
        remote_train.train()

[I 200810 15:03:36 config:134] Using preprocessor: <kubeflow.fairing.preprocessors.converted_notebook.ConvertNotebookPreprocessor object at 0x7f1878d51fd0>
[I 200810 15:03:36 config:136] Using builder: <kubeflow.fairing.builders.append.append.AppendBuilder object at 0x7f182b0cf198>
[I 200810 15:03:36 config:138] Using deployer: <kubeflow.fairing.deployers.job.job.Job object at 0x7f182c0b7ba8>
[W 200810 15:03:36 append:50] Building image using Append builder...
[I 200810 15:03:36 base:107] Creating docker context: /tmp/fairing_context_h_2pgljd
[I 200810 15:03:36 converted_notebook:127] Converting fashion-mnist-katib.ipynb to fashion-mnist-katib.py
[I 200810 15:03:36 docker_creds_:234] Loading Docker credentials for repository 'jaewoo201/kubeflow-jupyter-lab:tf2.0-cpu'
[W 200810 15:03:37 append:54] Image successfully built in 1.0331726599251851s.
[W 200810 15:03:37 append:94] Pushing image jaewoo201/fairing-job:11A187CF...
[I 200810 15:03:37 docker_creds_:234] Loading Docker credentials 

2020-08-10 15:03:46.401838: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcudart.so.10.1'; dlerror: libcudart.so.10.1: cannot open shared object file: No such file or directory
2020-08-10 15:03:46.401877: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/train-images-idx3-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-labels-idx1-ubyte.gz
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/t10k-images-idx3-ubyte.gz
2020-08-10 15:03:50.245467: W tensorflow/stream_executor/platform/default/dso_loader.cc:59] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shar

[W 200810 15:04:30 job:173] Cleaning up job fairing-job-lw2w8...
