In [1]:
!pip install --upgrade -q mlflow
!pip install --upgrade -q boto3

In [2]:
import os
import pandas as pd
import numpy as np
import json

import boto3

import mlflow

from tensorflow import keras
from tensorflow.keras import layers

2025-08-21 05:46:50.523470: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2025-08-21 05:46:52.441549: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2025-08-21 05:46:57.785347: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.


In [3]:
mlflow.set_tracking_uri('arn:aws:sagemaker:ap-southeast-2:954690186719:mlflow-tracking-server/SageMaker-Experiment-Lab')

In [4]:
boto_session = boto3.Session()
region = boto_session.region_name

In [5]:
s3_client = boto3.client("s3")

train_input_path = os.path.join('datasets', 'train_input.npy')
test_input_path = os.path.join('datasets', 'test_input.npy')
train_target_path = os.path.join('datasets', 'train_target.npy')
test_target_path = os.path.join('datasets', 'test_target.npy')

s3_client.download_file(
    f'sagemaker-example-files-prod-{region}',
    os.path.join('datasets', 'image', 'MNIST', 'numpy', 'input_train.npy'),
    train_input_path
)

s3_client.download_file(
    f'sagemaker-example-files-prod-{region}',
    os.path.join('datasets', 'image', 'MNIST', 'numpy', 'input_test.npy'),
    test_input_path
)

s3_client.download_file(
    f'sagemaker-example-files-prod-{region}',
    os.path.join('datasets', 'image', 'MNIST', 'numpy', 'input_train_labels.npy'),
    train_target_path
)

s3_client.download_file(
    f'sagemaker-example-files-prod-{region}',
    os.path.join('datasets', 'image', 'MNIST', 'numpy', 'input_test_labels.npy'),
    test_target_path
)

In [6]:
def load_datasets(train_input_path, test_input_path, train_target_path, test_target_path, num_classes=10):
    train_input = np.load(train_input_path)
    test_input = np.load(test_input_path)
    train_target = np.load(train_target_path)
    test_target = np.load(test_target_path)

    train_input = np.reshape(train_input, (60000, 28, 28))
    test_input = np.reshape(test_input, (10000, 28, 28))
    train_target = np.reshape(train_target, (60000,))
    test_target = np.reshape(test_target, (10000,))

    train_input = train_input.astype("float32") / 255
    test_input = test_input.astype("float32") / 255

    train_input = np.expand_dims(train_input, -1)
    test_input = np.expand_dims(test_input, -1)

    train_target = keras.utils.to_categorical(train_target, num_classes)
    test_target = keras.utils.to_categorical(test_target, num_classes)

    print(
        f"Train Inpu Shape : {train_input.shape}",
        f"\n{train_input.shape[0]} train samples",
        f"\n{test_input.shape[0]} test samples"
    )

    return train_input, test_input, train_target, test_target

In [7]:
train_input, test_input, train_target, test_target = load_datasets(
    train_input_path = train_input_path,
    test_input_path = test_input_path,
    train_target_path = train_target_path,
    test_target_path = test_target_path
)

Train Inpu Shape : (60000, 28, 28, 1) 
60000 train samples 
10000 test samples


In [15]:
CFG = {
    'input_shape' : (28,28,1),
    'num_classes' : 10
}

PARAM = {
    'batch_size' : 256,
    'epochs' : 8,
    'dropout' : 0.3
}

In [9]:
def get_model(input_shape, num_classes, dropout=0.5):
    model = keras.Sequential(
        [
            keras.Input(shape=input_shape),
            layers.Conv2D(32, kernel_size=(3, 3), activation='relu'),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Conv2D(64, kernel_size=(3, 3), activation='relu'),
            layers.MaxPool2D(pool_size=(2, 2)),
            layers.Flatten(),
            layers.Dropout(dropout),
            layers.Dense(num_classes, activation="softmax")
        ]
    )

    model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])

    return model

In [16]:
model = get_model(
    input_shape=CFG['input_shape'],
    num_classes=CFG['num_classes'],
    dropout=PARAM['dropout']
)

In [17]:
class ExperimentCallback(keras.callbacks.Callback):
    def __init__(self, run=None):
        super().__init__()
        self.run = run  # mlflow의 run context (선택)

    def on_epoch_end(self, epoch, logs=None):
        # logs 딕셔너리의 모든 키(metric) 기록
        if logs is not None:
            for key, value in logs.items():
                mlflow.log_metric(key, value, step=epoch)
                print(f"\n{key} -> {logs[key]}")

In [18]:
experiment_name = "keras-mlflow-experiment-lab"
mlflow.set_experiment(experiment_name)

with mlflow.start_run(run_name="keras-exp-2") as run:
    mlflow.tensorflow.autolog()

    mlflow.log_param('BatchSize', PARAM['batch_size'])
    mlflow.log_param('Epochs', PARAM['epochs'])
    mlflow.log_param('Dropout', PARAM['dropout'])

    mlflow.log_artifact(train_input_path)
    mlflow.log_artifact(train_target_path)
    mlflow.log_artifact(test_input_path)
    mlflow.log_artifact(test_target_path)

    model.fit(
        train_input,
        train_target,
        batch_size=PARAM['batch_size'],
        epochs=PARAM['epochs'],
        validation_split=0.1,
        callbacks=[ExperimentCallback(run=run)]
    )

    score = model.evaluate(test_input, test_target)
    print(f"Test Loss : {score[0]}\nTest Accuracy : {score[1]}")

    mlflow.log_metric("TestLoss_CrossEntropy", score[0])
    mlflow.log_metric("TestAccuracy", score[1])

2025-08-21 05:59:11.640065: W external/local_xla/xla/tsl/framework/cpu_allocator_impl.cc:84] Allocation of 169344000 exceeds 10% of free system memory.


Epoch 1/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 135ms/step - accuracy: 0.7058 - loss: 0.9696
accuracy -> 0.856925904750824

loss -> 0.48390501737594604

val_accuracy -> 0.9731666445732117

val_loss -> 0.10326249897480011




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m33s[0m 149ms/step - accuracy: 0.7065 - loss: 0.9674 - val_accuracy: 0.9732 - val_loss: 0.1033
Epoch 2/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 179ms/step - accuracy: 0.9610 - loss: 0.1314
accuracy -> 0.9643333554267883

loss -> 0.1195259690284729

val_accuracy -> 0.9815000295639038

val_loss -> 0.06386993080377579




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m50s[0m 190ms/step - accuracy: 0.9610 - loss: 0.1313 - val_accuracy: 0.9815 - val_loss: 0.0639
Epoch 3/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.9732 - loss: 0.0889
accuracy -> 0.9734259247779846

loss -> 0.08618266135454178

val_accuracy -> 0.984333336353302

val_loss -> 0.05696840584278107




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 163ms/step - accuracy: 0.9732 - loss: 0.0889 - val_accuracy: 0.9843 - val_loss: 0.0570
Epoch 4/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 136ms/step - accuracy: 0.9768 - loss: 0.0755
accuracy -> 0.9780370593070984

loss -> 0.07107127457857132

val_accuracy -> 0.9865000247955322

val_loss -> 0.04748576506972313




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 150ms/step - accuracy: 0.9768 - loss: 0.0755 - val_accuracy: 0.9865 - val_loss: 0.0475
Epoch 5/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 154ms/step - accuracy: 0.9799 - loss: 0.0649
accuracy -> 0.9809259176254272

loss -> 0.06232111155986786

val_accuracy -> 0.987333357334137

val_loss -> 0.04161537066102028




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 166ms/step - accuracy: 0.9799 - loss: 0.0649 - val_accuracy: 0.9873 - val_loss: 0.0416
Epoch 6/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 201ms/step - accuracy: 0.9841 - loss: 0.0521
accuracy -> 0.9833518266677856

loss -> 0.05359245091676712

val_accuracy -> 0.9879999756813049

val_loss -> 0.04158106446266174




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 214ms/step - accuracy: 0.9841 - loss: 0.0521 - val_accuracy: 0.9880 - val_loss: 0.0416
Epoch 7/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 152ms/step - accuracy: 0.9853 - loss: 0.0486
accuracy -> 0.9850925803184509

loss -> 0.04786858335137367

val_accuracy -> 0.9878333210945129

val_loss -> 0.04144691303372383




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m71s[0m 164ms/step - accuracy: 0.9853 - loss: 0.0486 - val_accuracy: 0.9878 - val_loss: 0.0414
Epoch 8/8
[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 137ms/step - accuracy: 0.9859 - loss: 0.0444
accuracy -> 0.9862037301063538

loss -> 0.044576164335012436

val_accuracy -> 0.9901666641235352

val_loss -> 0.03727125748991966




[1m211/211[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m40s[0m 158ms/step - accuracy: 0.9859 - loss: 0.0444 - val_accuracy: 0.9902 - val_loss: 0.0373
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 319ms/step




[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 9ms/step - accuracy: 0.9874 - loss: 0.0397
Test Loss : 0.03252928704023361
Test Accuracy : 0.989300012588501
🏃 View run keras-exp-2 at: https://ap-southeast-2.experiments.sagemaker.aws/#/experiments/2/runs/a48cffce7a1e4f04ab16c26f3b0987ad
🧪 View experiment at: https://ap-southeast-2.experiments.sagemaker.aws/#/experiments/2
