# Explore Pipeline

We wish to explore the impact of our data pipeline; we will consider six things:

- train-validation split size.
- Model batch size.
- rescaling
- including rotations to the images.
- including translations of the images.
- including added noise to the brightness of the images.
- zooming on the images.

## Summary

Based on the below findings; it seems like data augmentation of the following.

|Augmentation|Value|
|------------|-----|
|train-val split|0.8 / 0.9|
|batch size|64 or 128|
|rescaling| 1/255.|
|rotations| 0.05 (18deg)|
|translations| 0.1|
|brightness| 0.1|
|zoom|0.1|

In [16]:
import os
import sys
from dotenv import load_dotenv, find_dotenv
from pathlib import Path
import tensorflow as tf
import matplotlib.pyplot as plt
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
from src import DrDataLoader
import mlflow
import mlflow.tensorflow
mlflow.set_tracking_uri("http://localhost:5000")
mlflow.tensorflow.autolog()
load_dotenv(find_dotenv())

data_loader = DrDataLoader(Path('../') / Path(os.getenv('RAW_FP'))) #type: ignore

def get_rescale_im(factor):
    return tf.keras.Sequential([
        tf.keras.layers.Rescaling(factor)
    ])

def get_rotate_im(rotation):
    return tf.keras.Sequential([
        tf.keras.layers.RandomRotation(factor=rotation)
    ])

def get_translate_im(translation):
    return tf.keras.Sequential([
        tf.keras.layers.RandomTranslation(
            height_factor=translation[0],
            width_factor=translation[0]
        )
    ])

def get_brighten_im(brightness):
    return tf.keras.Sequential([
        tf.keras.layers.RandomBrightness(
            factor = brightness
        )
    ])

def get_zoom_im(factor):
    return tf.keras.Sequential([
        tf.keras.layers.RandomZoom(factor)
    ])

AUTOTUNE = tf.data.AUTOTUNE

def prepare(
    ds,
    rescale=None,
    batch_size = None,
    rotation=None,
    translation=None,
    brightness=None,
    zoom=None,
    ):

    if brightness is not None:
        brighten_im = get_brighten_im(brightness)
        ds = ds.map(lambda x, y: (brighten_im(x), y), num_parallel_calls=AUTOTUNE)

    if rescale is not None:
        rescale_im = get_rescale_im(rescale)
        ds = ds.map(lambda x, y: (rescale_im(x), y), num_parallel_calls=AUTOTUNE)
    
    if rotation is not None:
        rotate_im = get_rotate_im(rotation)
        ds = ds.map(lambda x, y: (rotate_im(x), y), num_parallel_calls=AUTOTUNE)
    
    if translation is not None:
        translate_im = get_translate_im(translation)
        ds = ds.map(lambda x, y: (translate_im(x), y), num_parallel_calls=AUTOTUNE)

    if zoom is not None:
        zoom_im = get_zoom_im(zoom)
        ds = ds.map(lambda x, y: (zoom_im(x), y), num_parallel_calls=AUTOTUNE)

    if batch_size is not None:
        ds = ds.batch(batch_size)

    return ds




## Experiment 1
We explore the impact of the train - validation split size.

In [6]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-split_size', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZES = [0.5, 0.6, 0.7, 0.8, 0.9]
RESCALE = None
ROTATION = None
TRANSLATION = None
BRIGHTEN = None
EPOCHS = 10


for SPLIT_SIZE in SPLIT_SIZES:
    with mlflow.start_run(
        run_name=f'run_split_size_{str(SPLIT_SIZE).zfill(3)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)

Name: pipeline-experiments-split_size
Experiment_id: 1
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669993601097
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp8605ahti/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp8605ahti/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmplw30oqzh/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmplw30oqzh/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpgk_agshx/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpgk_agshx/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7gks39oj/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7gks39oj/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpm_19_03r/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpm_19_03r/model/data/model/assets


## Experiment Batch size

We see the impact of batch size.

In [7]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-batch_size', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZES = [32, 64, 128, 256, 512]
SPLIT_SIZE = 0.8
RESCALE = None
ROTATION = None
TRANSLATION = None
BRIGHTEN = None
EPOCHS = 10


for BATCH_SIZE in BATCH_SIZES:
    with mlflow.start_run(
        run_name=f'run_batch_size_{str(BATCH_SIZE).zfill(3)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-batch_size
Experiment_id: 2
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669993788865
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7sdhmwm1/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7sdhmwm1/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpmzryxuk_/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpmzryxuk_/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpafuzoq28/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpafuzoq28/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmppr4mmmpf/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmppr4mmmpf/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp52ow1hnu/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp52ow1hnu/model/data/model/assets


## Experiment Rescaling images

In [9]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-rescale_size', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZE = 0.8
RESCALES = [None, 1./255.]
ROTATION = None
TRANSLATION = None
BRIGHTEN = None
EPOCHS = 10


for RESCALE in RESCALES:
    with mlflow.start_run(
        run_name=f'run_rescale_{str(RESCALE)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-rescale_size
Experiment_id: 3
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669994899543
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp0739j1jo/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp0739j1jo/model/data/model/assets


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpibqbbhr4/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpibqbbhr4/model/data/model/assets


## Experiment - add random rotation to the training set.

In [10]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-rotation', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZE = 0.8
RESCALE = 1./255
ROTATIONS = [0.05, 0.1, 0.15, 0.2, 0.25]
TRANSLATION = None
BRIGHTEN = None
EPOCHS = 10


for ROTATION in ROTATIONS:
    with mlflow.start_run(
        run_name=f'run_rotation_{str(ROTATION)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=None,
            translation=None,
            brightness=None
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-rotation
Experiment_id: 4
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669995314046
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp76156n6b/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp76156n6b/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpfx_fxkqi/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpfx_fxkqi/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp3picwefw/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp3picwefw/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpxr_cbrb8/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpxr_cbrb8/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmph36u5n89/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmph36u5n89/model/data/model/assets


## Experiment - random translations

In [14]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-translation-rerun', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZE = 0.8
RESCALE = 1./255
ROTATION = 0.05
TRANSLATIONS = [(0.025, 0.025), (0.05,0.05), (0.1,0.1), (0.2, 0.2), (0.0, 0.1), (0.1, 0.0)]
BRIGHTEN = None
EPOCHS = 10


for TRANSLATION in TRANSLATIONS:
    with mlflow.start_run(
        run_name=f'run_translation_{str(TRANSLATION[0])}_{str(TRANSLATION[1])}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=None,
            translation=None,
            brightness=None
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-translation-rerun
Experiment_id: 6
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669997123916
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp4eapmpce/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp4eapmpce/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp_vougly3/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp_vougly3/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpz0oda8ph/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpz0oda8ph/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpwdi4k19a/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpwdi4k19a/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmprife8o6c/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmprife8o6c/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpgzhllvnh/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpgzhllvnh/model/data/model/assets


## Experiment - random brightness

In [15]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-brightness', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZE = 0.8
RESCALE = 1./255
ROTATION = 0.05
TRANSLATION = (0.1, 0.1)
BRIGHTENS = [0.05, 0.1, 0.2, 0.3]
EPOCHS = 10


for BRIGHTEN in BRIGHTENS:
    with mlflow.start_run(
        run_name=f'run_brighten_{str(BRIGHTEN)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=None,
            translation=None,
            brightness=None
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-brightness
Experiment_id: 7
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669998180228
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpexwa4hot/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpexwa4hot/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpvo4ynod9/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpvo4ynod9/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7r1jijuo/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp7r1jijuo/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpihfzcwue/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpihfzcwue/model/data/model/assets


## Experiment Random zoom

In [18]:
experiment_id = mlflow.create_experiment(
    'pipeline-experiments-zoom_rerun', #type: ignore
    artifact_location=Path.cwd().joinpath('../artifacts').as_uri(),
    tags={'version': 'v1', 'priority': 'P1'},
)
experiment = mlflow.get_experiment(experiment_id)
print(f'Name: {experiment.name}')
print(f'Experiment_id: {experiment.experiment_id}')
print(f'Artifact Location: {experiment.artifact_location}')
print(f'Tags: {experiment.tags}')
print(f'Lifecycle_stage: {experiment.lifecycle_stage}')
print(f'Creation timestamp: {experiment.creation_time}')

BATCH_SIZE = 128
SPLIT_SIZE = 0.8
RESCALE = 1./255
ROTATION = 0.05
TRANSLATION = (0.1, 0.1)
BRIGHTENS = 0.1
ZOOMS = [0.05, 0.1, 0.2]
EPOCHS = 10


for ZOOM in ZOOMS:
    with mlflow.start_run(
        run_name=f'run_zoom_{str(ZOOM)}',
        experiment_id=experiment_id
        ):
        mlflow.log_param('SPLIT_SIZE', SPLIT_SIZE)
        mlflow.log_param("BATCH_SIZE", BATCH_SIZE)
        mlflow.log_param('RESCALE', RESCALE)
        mlflow.log_param('ROTATION', ROTATION)
        mlflow.log_param('TRANSLATION', TRANSLATION)
        mlflow.log_param('BRIGHTEN', BRIGHTEN)
        mlflow.log_param('ZOOM', ZOOM)
        mlflow.log_param('EPOCHS', EPOCHS)


        train_ds, val_ds = tf.keras.utils.split_dataset(
            data_loader.load_training_data(),
            left_size = SPLIT_SIZE,
            shuffle=True
        )

        train_ds = prepare(
            train_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=ROTATION,
            translation=TRANSLATION,
            brightness=BRIGHTEN,
            zoom=ZOOM
        )

        val_ds = prepare(
            val_ds,
            rescale=RESCALE,
            batch_size=BATCH_SIZE,
            rotation=None,
            translation=None,
            brightness=None,
            zoom=None
        )
        
        model = tf.keras.Sequential([
            tf.keras.layers.Flatten(input_shape=(28, 28, 1)),
            tf.keras.layers.Dense(128, activation='relu'),
            tf.keras.layers.Dense(10)
            ])  
        model.compile(
            optimizer=tf.keras.optimizers.Adam(0.001),
            loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
            metrics=['sparse_categorical_accuracy'])
        history = model.fit(train_ds, epochs=EPOCHS, validation_data=val_ds)


Name: pipeline-experiments-zoom_rerun
Experiment_id: 9
Artifact Location: file:///Users/b7064522/Documents/DataScience/kaggle-digit-recognizer/notebooks/../artifacts
Tags: {'version': 'v1', 'priority': 'P1'}
Lifecycle_stage: active
Creation timestamp: 1669999343678
Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp22pi46f_/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmp22pi46f_/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpoildcsvo/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpoildcsvo/model/data/model/assets


Epoch 1/10




Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10




INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpk9tgkbg8/model/data/model/assets


INFO:tensorflow:Assets written to: /var/folders/4g/6c1htv314w9dcb25vhpxqfscjwsh63/T/tmpk9tgkbg8/model/data/model/assets
