<a href="https://www.kaggle.com/code/kentvejrupmadsen/classification-of-letters?scriptVersionId=133635583" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
%pip install ipykernel
!python -m ipykernel install --user --name=notebook_environment

%pip install matplotlib
%pip install keras
%pip install tensorflow
%pip install kaggle
%pip install numpy
%pip install wandb

In [None]:
from random \
    import SystemRandom

import tensorflow

from tensorflow.data \
    import AUTOTUNE

from kaggle_secrets \
    import UserSecretsClient

from keras.losses \
    import SparseCategoricalCrossentropy

from keras.utils \
    import image_dataset_from_directory

from keras \
    import \
    Model, \
    Sequential

from keras \
    import layers

import wandb

from wandb.integration.keras \
    import \
    WandbCallback, \
    WandbEvalCallback, \
    WandbMetricsLogger, \
    WandbModelCheckpoint

In [None]:
secrets = UserSecretsClient()
random_generator = SystemRandom()

# Constants
def label_training() -> str:
    return 'training'

def label_validation() -> str:
    return 'validation'

def empty_string() -> str:
    return ''

def zero() -> int:
    return 0

def get_log_runs() -> bool:
    global log_runs
    return log_runs

def get_gpus_to_use() -> list:
    return [ '/gpu:0', '/gpu:1' ]

# Dynamic
def set_size_of_training_set_categories(
    value: int
) -> None:
    global configuration
    configuration['dataset']['training']['size'] = value

def set_labels_of_training_set(
    value: list
) -> None:
    global configuration
    configuration['dataset']['training']['labels'] = value
    set_size_of_training_set_categories(
        len(
            value
        )
    )

def get_size_of_training_set_categories() -> int:
    global configuration
    return configuration['dataset']['training']['size']

def set_size_of_validation_set_categories(
    value: int
) -> None:
    global configuration
    configuration['dataset']['validation']['size'] = value

def set_labels_of_validation_set(
    value: list
) -> None:
    global configuration
    configuration['dataset']['validation']['labels'] = value
    
    set_size_of_validation_set_categories(
        len(
            value
        )
    )

def get_size_of_validation_set_categories() -> int:
    global configuration
    return configuration['dataset']['validation']['size']

def get_max_random_value() -> int:
    return 16777215

def get_min_random_value() -> int:
    return zero()

def generate_random_seed() -> int:
    global random_generator
    
    return random_generator.randint(
        get_min_random_value(), 
        get_max_random_value()
    )

In [2]:
__wandb_key: str = secrets.get_secret(
    '__wandb__'
)

    
def get_wandb_secret() -> str:
    global __wandb_key
    
    if __wandb_key is None:
        return empty_string()
    
    return __wandb_key


In [None]:
configuration = generate_config()

def generate_config_for_a_dataset() -> dict:
    return {
        'labels': [],
        'size': zero(),
        'seed': generate_random_seed(),
        'shuffle': True,
        'crop': False
    }

def generate_config_view() -> dict:
    return {
        'height': 512,
        'width': 512,
        'channels': 3
    }

def generate_config_datasets() -> dict:
    return {
        'split_validation_at': 0.45,
        'training': generate_config_for_a_dataset(),
        'validation': generate_config_for_a_dataset()
    }

def generate_config() -> dict:
    return {
        'batch_size': 12,
        'epochs': 10,

        'view': generate_config_view(),
        'dataset': generate_config_datasets(),
    }

def get_configuration() -> dict:
    global configuration
    return configuration

def get_configuration_by_key(
    key: str
):
    return get_configuration()[key]

def get_configuration_in_view_by_key(
    key: str
):
    return get_configuration_by_key('view')[key]

def get_configuration_in_dataset_by_key(
    key: str
):
    return get_configuration_by_key('dataset')[key]

def get_configuration_in_training_set_by_key(
    key: str
):
    return get_configuration_in_dataset_by_key('training')[key]

def get_configuration_in_validation_set_by_key(
    key: str
):
    return get_configuration_in_dataset_by_key('validation')[key]


In [None]:
#
use_cache: bool = False
log_runs: bool = True

cache_buffer_size: int = AUTOTUNE

epochs: int = \
    get_configuration_by_key('epochs')

dataset_batch_size: int = \
    get_configuration_by_key('batch_size')

vision_height: int = \
    get_configuration_in_view_by_key('height')

vision_width: int = \
    get_configuration_in_view_by_key('width')

vision_number_of_color_channels: int = \
    get_configuration_in_view_by_key('channels')


vision_size = (
    vision_height, 
    vision_width
)

vision_size_w_channels = (
    vision_height, 
    vision_width, 
    vision_number_of_color_channels
)

split_dataset_at: float = \
    get_configuration_in_dataset_by_key('split_validation_at')

path_to_model: str = '/kaggle/working/model'
path_to_dataset: str = '/kaggle/input/letter-images-dataset/dataset'

training_dataset = None
validation_dataset = None

pointer_to_training_dataset = None
pointer_to_validation_dataset = None

history = None
model = None

# Callbacks
wandb_callback = None

fit_callbacks = []

# result
results: list = []

strategy = tensorflow.distribute.MirroredStrategy(
    devices=get_gpus_to_use()
)

In [None]:
def size_of_fit_callbacks() -> int:
    global fit_callbacks
    return len(fit_callbacks)

def fit_callbacks_is_empty() -> bool:
    return size_of_fit_callbacks() == zero()

def fit_callbacks_has_content() -> bool:
    return size_of_fit_callbacks() > zero()

def setup_callbacks() -> None:
    global fit_callbacks
    pass

In [None]:
def start_process() -> None:
    if get_log_runs():
        wandb.login(
            key=get_wandb_secret()
        )
        
def initialise_process() -> None:
    if get_log_runs():
        wandb.init(
            project = 'letter-identification',
            entity = 'designermadsen',    
            config = configuration,
            sync_tensorboard = True,
            save_code = True,
            monitor_gym = True
        )

def end_process() -> None:
    if get_log_runs():
        wandb.finish()


In [None]:
start_process()

training_dataset = image_dataset_from_directory(
    path_to_dataset,
    validation_split = split_dataset_at,
    subset= label_training(),
    seed = get_configuration_in_training_set_by_key('seed'),
    image_size = vision_size,
    batch_size = dataset_batch_size,
    shuffle = True,
    crop_to_aspect_ratio = True
)

set_labels_of_training_set(
    training_dataset.class_names
)

validation_dataset = image_dataset_from_directory(
    path_to_dataset,
    validation_split = split_dataset_at,
    subset = label_validation(),
    seed = get_configuration_in_validation_set_by_key('seed'),
    image_size = vision_size,
    batch_size = dataset_batch_size,
    shuffle = True,
    crop_to_aspect_ratio = True
)

set_labels_of_validation_set(
    validation_dataset.class_names
)

initialise_process()

In [None]:
def generate_input_layers(
    ml_layers: list
) -> list:
    global vision_size_w_channels
    
    ml_layers.append(
        layers.Rescaling(
            1./255, 
            input_shape=vision_size_w_channels, 
            trainable=True
        )
    )
    
    return ml_layers


In [None]:
def generate_middle_layers(
    ml_layers: list
) -> list:
    merge_layer = (2, 2)
    
    ml_layers.append(
        layers.Conv2D(
            256, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            256, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            128, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            64, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            32, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            32, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            32, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            32, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    
    ml_layers.append(
        layers.Conv2D(
            16, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            16, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            16, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.Conv2D(
            16, 
            3, 
            padding='same', 
            activation='relu'
        )
    )
    
    ml_layers.append(
        layers.MaxPooling2D(
            pool_size=merge_layer
        )
    )
    
    return ml_layers

In [None]:
def generate_output_layers(
    ml_layers: list
) -> list:
    
    ml_layers.append(
        layers.Flatten()
    )
    
    decision_neurons_size = get_size_of_training_set_categories() * 8
    
    ml_layers.append(
        layers.Dense(
            decision_neurons_size, 
            activation='relu',
        ),
    )
    
    ml_layers.append(
        layers.Dense(
            get_size_of_training_set_categories()
        )
    )
    
    return ml_layers

In [None]:
def generate_layers() -> list:
    layers = []
    
    layers = generate_input_layers(layers)
    layers = generate_middle_layers(layers)
    layers = generate_output_layers(layers)
    
    return layers

def make_model():
    global vision_size_w_channels
    
    division = (2, 2)
    
    return \
        Sequential(
            generate_layers()
        )

In [None]:
if not (strategy is None):
    with strategy.scope():
        model = make_model()
else:
    model = make_model()

In [None]:
model.compile(
    optimizer='adam', 
    loss=SparseCategoricalCrossentropy(
        from_logits=True
    ),
    metrics=['accuracy'], 
)
model.summary()

In [None]:
if use_cache:
    pointer_to_training_dataset = training_dataset.cache().prefetch(
        buffer_size=cache_buffer_size
    )
    
    pointer_to_validation_dataset = validation_dataset.cache().prefetch(
        buffer_size=cache_buffer_size
    )
else:
    pointer_to_training_dataset = training_dataset
    pointer_to_validation_dataset = validation_dataset

In [None]:
if get_log_runs():
    wandb_callback = WandbCallback(
        log_weights=True, 
        log_gradients=True, 
        training_data=pointer_to_training_dataset, 
        validation_data=pointer_to_validation_dataset,

        log_evaluation=True, 

        monitor='val_accuracy',
        mode='max'
    )

In [None]:
if fit_callbacks_is_empty():
    setup_callbacks()

In [None]:
if fit_callbacks_has_content():
    history = model.fit(
        pointer_to_training_dataset,
        validation_data = pointer_to_validation_dataset,
        epochs=epochs,
        callbacks=fit_callbacks
    )
else:
    history = model.fit(
        pointer_to_training_dataset,
        validation_data = pointer_to_validation_dataset,
        epochs=epochs
    )

In [None]:
result = {
    'validation': {
        'accuracy': history.history['val_accuracy'],
        'loss': history.history['val_loss']
    },
    'result': {
        'accuracy': history.history['accuracy'],
        'loss': history.history['loss']
    }
}

results.append(result)

In [None]:
if get_log_runs():
    wandb.log( 
        { 
        'training': results 
        } 
    )

model.save(
    path_to_model
)

In [None]:
end_process()