In [1]:
import os
import sys
from PIL import Image

import numpy as np
import pandas as pd
from keras import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten
from keras.utils import to_categorical

In [2]:
# Count data

total_amount_train, total_amount_test = 0, 0
for dir in os.listdir('data/train'):

    print(dir, end=': ')
    count_train = len(os.listdir(f'data/train/{dir}'))
    count_test = len(os.listdir(f'data/test/{dir}'))
    print(f'{count_train} /', count_test)
    total_amount_train += count_train
    total_amount_test += count_test

print(f'TOTAL AMOUNT train, test: ', f'{total_amount_train} /', total_amount_test)

Avulsion fracture: 98 / 43
Comminuted fracture: 153 / 66
Compression-Crush fracture: 105 / 45
Fracture Dislocation: 111 / 48
Greenstick fracture: 95 / 41
Hairline Fracture: 97 / 42
Impacted fracture: 112 / 49
Intra-articular fracture: 72 / 32
Longitudinal fracture: 90 / 39
Oblique fracture: 86 / 38
Pathological fracture: 90 / 39
Spiral Fracture: 93 / 41
TOTAL AMOUNT train, test:  1202 / 523


In [3]:
def read_img(filepath, as_array=False, black_white=False):
    """Read image, turn it to black and white scale if needed. The result can be returned as img or array"""
    img = Image.open(filepath)
    if black_white:
        img = img.convert('L')
    if as_array:
        return np.asarray(img)
    return img


In [97]:
def read_data(sample, to_normalize=True, to_pad=False, shape_pad=0):
    """Read data and return tuple of X and Y data. Extend arrays to some shape with zeros if needed"""
    data, labels = [], []

    # Read sample data
    for i, dir in enumerate(os.listdir(f'data/{sample}')):

        path = f'data/{sample}/{dir}'
        # Read one label data
        for file in os.listdir(path):

            img = read_img(f'{path}/{file}', as_array=True, black_white=True)
            if to_pad:
                # Extend data to specified shape by putting values to zeros array of shape needed
                img_padded = np.zeros(shape_pad)
                img_padded[:img.shape[0],:img.shape[1]] = img
                img_padded = img_padded.astype('float16')
                data.append(img_padded)
            else:
                data.append(img)
            labels.append(i)

    data = np.array(data, dtype='object')

    if to_normalize:
        data = data / 255

    return data, np.array(labels)


# Data preparation

In [72]:
# Find max image shape in dataset

shape_width_max, shape_height_max = 0, 0
for dir in os.listdir('data/train'):
    for file in os.listdir(f'data/train/{dir}'):
        img = read_img(f'data/train/{dir}/{file}', as_array=True, black_white=True)

        shape_width, shape_height = img.shape

        if shape_width > shape_width_max:
            shape_width_max = shape_width

        if shape_height > shape_height_max:
            shape_height_max = shape_height

shape_max = shape_width_max, shape_height_max
shape_max



(6714, 4430)

In [98]:
# Read train data

X_train, y_train = read_data('train', to_pad=True, shape_pad=shape_max)

print(X_train.shape, y_train.shape)

MemoryError: Unable to allocate 56.7 MiB for an array with shape (6714, 4430) and data type float16

In [75]:
X_test, y_test = read_data('test', to_pad=True, shape_pad=shape_max)

print(X_test.shape, y_test.shape)

MemoryError: Unable to allocate 227. MiB for an array with shape (6714, 4430) and data type float64

In [152]:
y_train_cat = to_categorical(y_train, 12)
y_test_cat = to_categorical(y_test, 12)

print(y_train_cat.shape, y_test_cat.shape)

(1202, 12) (523, 12)


In [153]:
X_train = X_train / 255
X_test = X_test / 255

In [158]:
X_train[0][0][0]

0.22745098039215686

# Building model

In [None]:
model = Sequential([
    Conv2D(
        64, # Количество фильтров (каналов)
        (5,5), # Размер каждого фильтра
        padding='same', # На выходе получится изображение той же размерности, за исключением глубины
        activation='relu',
    ),
    MaxPool2D(
        (2,2), # Размер окна
        strides=1, # Шаг сканирования
        padding='valid', # Не добавлять нулевых значений на границах
    ),
    Flatten(),
    Dense(128, activation='relu'),
    Dense(12, activation='softmax'),
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

In [None]:
his = model.fit(
    X_train,
    y_train_cat,
    batch_size=32,
    epochs=5,
    validation_split=0.2,
)