<a href="https://www.kaggle.com/code/kentvejrupmadsen/classification-of-letters?scriptVersionId=133570719" target="_blank"><img align="left" alt="Kaggle" title="Open in Kaggle" src="https://kaggle.com/static/images/open-in-kaggle.svg"></a>

In [None]:
%pip install ipykernel
!python -m ipykernel install --user --name=notebook_environment

%pip install matplotlib
%pip install keras
%pip install tensorflow
%pip install kaggle
%pip install numpy
%pip install wandb

In [None]:
from random \
    import SystemRandom

import tensorflow

from tensorflow.data \
    import AUTOTUNE

from keras.losses \
    import SparseCategoricalCrossentropy

from keras.utils \
    import image_dataset_from_directory

from keras \
    import \
    Model, \
    Sequential

from keras \
    import layers

import wandb

from wandb.integration.keras \
    import \
    WandbCallback, \
    WandbEvalCallback, \
    WandbMetricsLogger, \
    WandbModelCheckpoint

In [None]:
random_generator = SystemRandom()

def get_max_random_value() -> int:
    return 16777215

def get_min_random_value() -> int:
    return 0

def generate_random_seed() -> int:
    global random_generator
    
    return random_generator.randint(
        get_min_random_value(), 
        get_max_random_value()
    )

In [None]:
# Constants
def label_training() -> str:
    return 'training'

def label_validation() -> str:
    return 'validation'

In [None]:
def get_log_runs() -> bool:
    global log_runs
    return log_runs

def get_gpus_to_use() -> list:
    return [ '/gpu:0', '/gpu:1' ]

In [None]:
def set_size_of_training_set_categories(
    value: int
) -> None:
    global configuration
    configuration['dataset']['training']['size'] = value

def set_labels_of_training_set(
    value: list
) -> None:
    global configuration
    configuration['dataset']['training']['labels'] = value
    set_size_of_training_set_categories(
        len(value)
    )

def get_size_of_training_set_categories() -> int:
    global configuration
    return configuration['dataset']['training']['size']



def set_size_of_validation_set_categories(
    value: int
) -> None:
    global configuration
    configuration['dataset']['validation']['size'] = value


def set_labels_of_validation_set(
    value: list
) -> None:
    global configuration
    configuration['dataset']['validation']['labels'] = value
    
    set_size_of_validation_set_categories(
        len(value)
    )


def get_size_of_validation_set_categories() -> int:
    global configuration
    return configuration['dataset']['validation']['size']

In [None]:
configuration = {
    'batch_size': 15,
    'epochs': 10,
    'view': {
        'height': 512,
        'width': 512,
        'channels': 3
    },
    'dataset':
    {
        'split_validation_at': 0.25,
        'training':
        {
            'labels': [],
            'size': 0
        },
        'validation':
        {
            'labels': [],
            'size': 0
        }
    },
    'seed': generate_random_seed()
}

In [None]:
#
use_cache: bool = False
log_runs: bool = False

epochs = configuration['epochs']
cache_buffer_size: int = AUTOTUNE

vision_height: int = configuration['view']['height']
vision_width: int = configuration['view']['width']

vision_number_of_color_channels: int = configuration['view']['channels']

vision_size = (vision_height, vision_width) 
vision_size_w_channels = (vision_height, vision_width, vision_number_of_color_channels)

split_dataset_at: float = configuration['dataset']['split_validation_at']

dataset_seed: int = configuration['seed']
dataset_batch_size: int = configuration['batch_size']

path_to_model: str = '/kaggle/working/model'
path_to_dataset: str = '/kaggle/input/letter-images-dataset/dataset'

training_dataset = None
validation_dataset = None

pointer_to_training_dataset = None
pointer_to_validation_dataset = None

model = None
    
# Callbacks
wandb_callback = None

fit_callbacks = []

# result
results: list = []



In [None]:
strategy = tensorflow.distribute.MirroredStrategy(
    devices=get_gpus_to_use()
)

In [None]:
if get_log_runs():
    wandb.login()

In [None]:
training_dataset = image_dataset_from_directory(
    path_to_dataset,
    validation_split = split_dataset_at,
    subset= label_training(),
    seed = dataset_seed,
    image_size = vision_size,
    batch_size = dataset_batch_size,
    shuffle = True,
    crop_to_aspect_ratio = True
)

set_labels_of_training_set(
    training_dataset.class_names
)

validation_dataset = image_dataset_from_directory(
    path_to_dataset,
    validation_split = split_dataset_at,
    subset = label_validation(),
    seed = dataset_seed,
    image_size = vision_size,
    batch_size = dataset_batch_size,
    shuffle = True,
    crop_to_aspect_ratio = True
)

set_labels_of_validation_set(
    validation_dataset.class_names
)

In [None]:
if get_log_runs():
    wandb.init(
        project = 'letter-identification',
        entity = 'designermadsen', 
        config = configuration,
        sync_tensorboard = True,
        save_code = True,
        monitor_gym = True
    )

In [None]:
def make_model():
    global vision_size_w_channels
    
    division = (2, 2)
    
    return \
        Sequential(
            [
                layers.Rescaling(
                    1./255, 
                    input_shape=vision_size_w_channels, 
                    trainable=True
                ),
                layers.Conv2D(
                    256, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    128, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64, 
                    3,
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64,
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    64, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    32, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    32, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    32, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    32, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.Conv2D(
                    16, 
                    3, 
                    padding='same', 
                    activation='relu'
                ),
                layers.MaxPooling2D(
                    pool_size=division
                ),
                layers.Flatten(),
                layers.Dense(
                    (get_size_of_training_set_categories() * 8), 
                    activation='relu',
                ),
                layers.Dense(
                    get_size_of_training_set_categories()
                )
            ]
        )

In [None]:
if not (strategy is None):
    with strategy.scope():    
        model = make_model()
else:
    model = make_model()

In [None]:
model.compile(
    optimizer='adam', 
    loss=SparseCategoricalCrossentropy(
        from_logits=True
    ), 
    metrics=['accuracy'], 
)
model.summary()

In [None]:
if use_cache:
    pointer_to_training_dataset = training_dataset.cache().prefetch(
        buffer_size=cache_buffer_size
    )
    
    pointer_to_validation_dataset = validation_dataset.cache().prefetch(
        buffer_size=cache_buffer_size
    )
else:
    pointer_to_training_dataset = training_dataset
    pointer_to_validation_dataset = validation_dataset

In [None]:
if get_log_runs():
    wandb_callback = WandbCallback(
        log_weights=True, 
        log_gradients=True, 
        training_data=pointer_to_training_dataset, 
        validation_data=pointer_to_validation_dataset,

        log_evaluation=True, 

        monitor='val_accuracy',
        mode='max'
    )

In [None]:
if not (wandb_callback is None):
    fit_callbacks.append(wandb_callback)

In [None]:
history = None

In [None]:
if(len(fit_callbacks) > 0):
    history = model.fit(
        pointer_to_training_dataset,
        validation_data = pointer_to_validation_dataset,
        epochs=epochs,
        callbacks=fit_callbacks
    )
else:
    history = model.fit(
        pointer_to_training_dataset,
        validation_data = pointer_to_validation_dataset,
        epochs=epochs
    )

In [None]:
result = {
    'validation': {
        'accuracy': history.history['val_accuracy'],
        'loss': history.history['val_loss']
    },
    'result': {
        'accuracy': history.history['accuracy'],
        'loss': history.history['loss']
    }
}

results.append(result)

In [None]:
if (get_log_runs()):
    wandb.log( { 'training': results } )

model.save(path_to_model)

if get_log_runs():
    wandb.finish()