In [1]:
import random as rn
import os
from itertools import product
from PIL import Image
import numpy as np
from plotly.figure_factory import create_distplot
import plotly.graph_objects as go

import tensorflow as tf
# from keras.models import load_model

from func import read_img, set_model, fit_model, evaluate_model, plot_history, create_data

# Ensure the environment is set to deterministic operations
tf.config.threading.set_inter_op_parallelism_threads(1)
tf.config.threading.set_intra_op_parallelism_threads(1)


os.environ['PYTHONHASHSEED'] = '0'

# Disable parallelism to ensure operations are executed in a consistent order
os.environ['TF_DETERMINISTIC_OPS'] = '1'

# Optionally disable GPU for full control (GPU computations can be non-deterministic)
os.environ['CUDA_VISIBLE_DEVICES'] = '-1'

seed = 0
np.random.seed(seed)
rn.seed(seed)
tf.random.set_seed(seed)




In [2]:
# Count data

total_amount_train, total_amount_test = 0, 0
for dir in os.listdir('data/train'):

    print(dir, end=': ')
    count_train = len(os.listdir(f'data/train/{dir}'))
    count_test = len(os.listdir(f'data/test/{dir}'))
    print(f'{count_train} /', count_test)
    total_amount_train += count_train
    total_amount_test += count_test

print(f'TOTAL AMOUNT train, test: ', f'{total_amount_train} /', total_amount_test)

Avulsion fracture: 98 / 43
Comminuted fracture: 153 / 66
Compression-Crush fracture: 105 / 45
Fracture Dislocation: 111 / 48
Greenstick fracture: 95 / 41
Hairline Fracture: 97 / 42
Impacted fracture: 112 / 49
Intra-articular fracture: 72 / 32
Longitudinal fracture: 90 / 39
Oblique fracture: 86 / 38
Pathological fracture: 90 / 39
Spiral Fracture: 93 / 41
TOTAL AMOUNT train, test:  1202 / 523


# Data preparation

In [3]:
# Find max image shape in dataset

shape_width_max, shape_height_max = 0, 0
shape_lst = []
for dir in os.listdir('data/train'):
    for file in os.listdir(f'data/train/{dir}'):
        img = read_img(f'data/train/{dir}/{file}', as_array=True, black_white=True)

        shape_width, shape_height = img.shape

        shape_lst.append(img.shape)

shape_max = max([i[0] for i in shape_lst]), max([i[1] for i in shape_lst])
shape_max


Palette images with Transparency expressed in bytes should be converted to RGBA images



(6714, 4430)

In [4]:
# Image height and width distributions

fig = create_distplot(
    hist_data=[[i[0] for i in shape_lst], [i[1] for i in shape_lst]],
    group_labels=['width', 'height'],
    bin_size=100,
    histnorm='probability',
    show_hist=True,
    show_curve=False,
    show_rug=False,
)
fig

In [None]:
# Fraction of images with shape less than pad_shape_value for each axis

pad_shape_value = 250
pad_shape = (pad_shape_value, pad_shape_value)
len([i for i in shape_lst if i[0] < pad_shape_value and i[1] < pad_shape_value]) / len(shape_lst)

0.24958402662229617

In [7]:
data_dir_name = 'data_processed'

In [None]:
# Process images with format, not allowed by keras (turn it to .png)

allowed_formats = ['jpeg', 'png', 'bmp', 'gif']
allowed_formats += [i.upper() for i in allowed_formats]

for i, sample in enumerate(('train', 'test')):
    for j, dir in enumerate(os.listdir(f'{data_dir_name}/{sample}')):

        path = f'{data_dir_name}/{sample}/{dir}'
        # Read one label data
        for img_name in os.listdir(path):

            img_path = f'{path}/{img_name}'
            img = Image.open(img_path)

            if img.format not in allowed_formats:

                img.convert("RGB")

                name = img_name.rsplit('.', maxsplit=1)[0]
                img_name_new = f'{name}.png'
                img_path_new = f'{path}/{img_name_new}'

                img.save(img_path_new)
                img.close()
                os.remove(img_path)

# Model structure optimization

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(2.6e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [23]:
# Add dropout to every convolution
fig_accuracy

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(2.6e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.
Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
# Add another hidden layer to fully connected part with dropout
fig_accuracy

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(5e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.


Epoch 1/50


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [15]:
# Add another hidden layer to fully connected part with dropout, increase learning rate
fig_accuracy

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(2.6e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [20]:
# Add another hidden layer to fully connected part with dropout, add BatchNorm after first Dense before Dropout
fig_accuracy

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(2.6e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [16]:
# Add BatchNorm after last conv2d and its maxpool
fig_accuracy

In [None]:
train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

model = set_model(2.6e-3, 60, 0.4, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs=50)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(2.6e-3, 60, 0.4),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

metrics = evaluate_model(model, [train_data, valid_data, test_data])

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [19]:
# Add BatchNorm after 2nd and 3d conv2d after maxpool
fig_accuracy

# Hyperparameters optimization

In [20]:
# Hyperparameters grid

params_grid = dict(
    lr=np.arange(1e-3, 4e-3, 2e-4),
    n=np.arange(60, 70, 5),
    p=np.arange(0.3, 0.6, 0.1),
)

count = 1
for value in params_grid.values():
    count *= len(value)
count

90

In [None]:
fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

n_epochs=20

for i, params in enumerate(product(params_grid['lr'], params_grid['n'], params_grid['p'])):
    print(f'START: {i}, {params}')

    train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

    lr, n, p = params
    model = set_model(lr, n, p, pad_shape=pad_shape)

    his = fit_model(model, train_data, valid_data, n_epochs)

    fig_loss, fig_accuracy = plot_history(
        history=his,
        params=params,
        fig_loss=fig_loss,
        fig_accuracy=fig_accuracy,
    )

    fig_loss.write_html('hyperparams_loss.html')
    fig_accuracy.write_html('hyperparams_accuracy.html')

# Build model with best hyperparameters

In [9]:
fig_loss = go.Figure(layout={'title': 'Loss'})
fig_accuracy = go.Figure(layout={'title': 'Accuracy'})

n_epochs = 16

train_data, valid_data, test_data = create_data(data_dir_name=data_dir_name, batch_size=50, pad_shape=pad_shape)

lr, n, p = np.arange(1e-3, 4e-3, 2e-4)[2], 60, 0.4 # Best hyperparams
model = set_model(lr, n, p, pad_shape=pad_shape)

his = fit_model(model, train_data, valid_data, n_epochs)

fig_loss, fig_accuracy = plot_history(
    history=his,
    params=(lr, n, p),
    fig_loss=fig_loss,
    fig_accuracy=fig_accuracy,
)

fig_loss.write_html('model_best_loss.html')
fig_accuracy.write_html('nodel_best_accuracy.html')

Found 1202 files belonging to 12 classes.
Using 1022 files for training.
Found 1202 files belonging to 12 classes.
Using 180 files for validation.
Found 523 files belonging to 12 classes.


Epoch 1/16


Epoch 2/16
Epoch 3/16
Epoch 4/16
Epoch 5/16
Epoch 6/16
Epoch 7/16
Epoch 8/16
Epoch 9/16
Epoch 10/16
Epoch 11/16
Epoch 12/16
Epoch 13/16
Epoch 14/16
Epoch 15/16
Epoch 16/16


In [10]:
evaluate_model(model, [train_data, valid_data, test_data])



[[1.3873101472854614, 0.9011741876602173],
 [2.1659295558929443, 0.4166666567325592],
 [2.180077075958252, 0.3594646155834198]]

In [11]:
model.save('model.keras')