# Импорт библиотек

In [None]:
import numpy as np
import pandas as pd
import os
import PIL
import pathlib
import tensorflow as tf
import matplotlib 
from matplotlib.image import imread
import matplotlib.pyplot as plt
from tensorflow import keras
from keras import layers
from IPython.display import display
#tf.compat.v1.set_random_seed(290)
# детерминирование случайных величин
tf.random.set_seed(290)
np.random.seed(290)

### Скачивание датасета

In [None]:
# пример того, как можно скачать датасет (он уже скачан)
'''dataset_url = "https://storage.googleapis.com/download.tensorflow.org/example_images/flower_photos.tgz"
archive = tf.keras.utils.get_file(origin=dataset_url, extract=True, cache_dir=os.getcwd())
data_dir = pathlib.Path(archive).with_suffix('')''';

### Разметка данных (изначально в датасете нет размытых изображений)

In [None]:
TRAIN_PERCENT = 0.60 
VAL_PERCENT = 0.20
TEST_PERCENT = 0.20

# folder with random photo without blur or labeling
PATH_TO_UNPREPARED_DATASET = 'flower_photos'
# forler where will be stored labeled data
PATH_TO_SAVE_DATASET = 'dataset'

dataset = {'train':{'blur': [], 'sharp': []},
           'val':{'blur': [], 'sharp': []},
           'test':{'blur': [], 'sharp': []}}

In [None]:
# sort all photos to train,val and test datasets (and to sharp and blur ones)
for tup in os.walk(PATH_TO_UNPREPARED_DATASET):
    # tup - это кортеж 3 элементов
    # tup[0] - относительный путь до обрабатываемой папки
    # tup[1] - список всех папок в ней
    # tup[2] - список всех файлов в ней

    # если в папке есть изображения типа jpg
    if [filename for filename in tup[2] if filename.endswith('.jpg')]:
        print(tup[0], tup[1], len(tup[2]), 'items in current path')
        # make every relative path absolute and cast to numpy ndarray
        temp_arr = np.array([f'{tup[0]}/{path}' for path in tup[2]])
        # split ndarray of absolute paths to images to 3 parts by percentage
        temp_train, temp_validate, temp_test = np.split(temp_arr, 
                                                        [int(temp_arr.shape[0]*TRAIN_PERCENT), 
                                                         int(temp_arr.shape[0]*(TRAIN_PERCENT+VAL_PERCENT))])
        # add ndarray to list of each dataset part
        # like dataset['train'] = list(np.ndarray(1,2,3), np.ndarray(4,5,6))
        dataset['train']['blur'].append(temp_train[:temp_train.shape[0]//2])
        dataset['train']['sharp'].append(temp_train[temp_train.shape[0]//2:])
        
        dataset['val']['blur'].append(temp_validate[:temp_validate.shape[0]//2])
        dataset['val']['sharp'].append(temp_validate[temp_validate.shape[0]//2:])
        
        dataset['test']['blur'].append(temp_validate[:temp_test.shape[0]//2])
        dataset['test']['sharp'].append(temp_validate[temp_test.shape[0]//2:])

# concatenate each numpy ndarray in each dataset part to one ndarray
for key, value in dataset.items():
    for key2,value2 in value.items():
        dataset[key][key2] = np.concatenate(value2)

In [None]:
# print dataset info
for key, value in dataset.items():
    print('|'*8,f'{key} part','|'*8)
    for key2, value2 in value.items():
        print('\t','|'*8,f'{key2} part','|'*8)
        print('\t',f'Total items: {value2.shape[0]}')
        print('\t',f'Example (first 3 items):',*value2[:3],sep='\n')
    print()

### Обработка размеченных данных (применение размытия) и помощение в новую папку

In [None]:
from PIL import Image, ImageFilter
# blur labeled images and put all images in new folders
for key, value in dataset.items():
    for key2, value2 in value.items():
        # create path to save new photo
        path_to_save_image =  f'{PATH_TO_SAVE_DATASET}/{key}/{key2}'
        if not os.path.exists(path_to_save_image):
            os.makedirs(path_to_save_image)
            
        if key2 == 'blur':
            for image_path in value2:
                box_image = Image.open(image_path).filter(ImageFilter.BoxBlur(5))
                box_image.save(f'{path_to_save_image}/{image_path.split("/")[-1]}')
        if key2 == 'sharp':
            for image_path in value2:
                box_image = Image.open(image_path)
                box_image.save(f'{path_to_save_image}/{image_path.split("/")[-1]}')

### Чтение путей обработанных изображение в датафрейм

In [None]:
dataset_df = pd.DataFrame(data=list(pathlib.Path(PATH_TO_SAVE_DATASET).rglob('*.jpg')), columns=['path'])
dataset_df['dataset'] = dataset_df['path'].map(lambda x: str(x).split('/')[-3])
dataset_df['blur'] = dataset_df['path'].map(lambda x: 1 if str(x).split('/')[-2]=='blur' else 0)
display(dataset_df)

In [None]:
# display only test dataset
display(dataset_df[dataset_df['dataset']=='test'])

# Создание модели

In [None]:
# network model learning params
BATCH_SIZE = 32
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256

LEARNING_RATE = 0.001

LOSS_FUNCTION = keras.losses.BinaryCrossentropy()

OPTIMIZER = keras.optimizers.SGD(LEARNING_RATE)

METRICS = [keras.metrics.BinaryCrossentropy(), 
           keras.metrics.MeanSquaredError(),
           keras.metrics.Precision(name='Precision'),
           keras.metrics.Recall(name='Recall'),
           keras.metrics.AUC(name='PR_AUC', curve='PR'),
           keras.metrics.AUC(name='ROC_AUC', curve='ROC')]

### Блок предобработка изображений

In [None]:
resize_and_rescale = tf.keras.Sequential([
  layers.Resizing(IMAGE_HEIGHT, IMAGE_WIDTH),
  layers.Rescaling(1./255)
])

### Блок аугментации изображений

In [None]:
augment_data = tf.keras.Sequential([
  layers.RandomFlip("horizontal_and_vertical"),
  layers.RandomRotation(1, fill_mode='reflect'),
  #layers.RandomTranslation(0.2,0.2, fill_mode="reflect"),
  #layers.RandomZoom(0.3,0.3, fill_mode="reflect")
])

### Архитектура модели

In [None]:
model = keras.Sequential()
model.add(layers.Conv2D(32, (3,3), 1, activation='relu', input_shape=(256,256,3)))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, (3,3), 1, activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(32, (3,3), 1, activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Conv2D(16, (3,3), 1, activation='relu'))
model.add(layers.MaxPooling2D())
model.add(layers.Flatten())
model.add(layers.Dense(64, activation='selu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(32, activation='selu'))
model.add(layers.Dropout(0.5))
model.add(layers.Dense(1, activation='sigmoid'))

#model.compile(optimizer=OPTIMIZER, loss=LOSS_FUNCTION, metrics=METRICS)

# model summary
print(model.summary())

### Пример сырого изображения, предобработанного и аугментированного

In [None]:
FONT_SIZE = 15
fig, axes = plt.subplots(1,3)

fig.set_figwidth(22)
fig.set_figheight(8)

# read image
path = dataset_df['path'].iloc[0]
img = np.expand_dims(imread(path), axis=0)
axes[0].imshow(img[0])
axes[0].set_title(f"Raw image", fontsize=FONT_SIZE, pad=15)

# preprocess image
img = resize_and_rescale(img)
axes[1].imshow(img[0])
axes[1].set_title(f"Preprocessed image", fontsize=FONT_SIZE, pad=15)

# augmentate image
img = augment_data(img)
axes[2].imshow(img)
axes[2].set_title(f"Augmented and preprocessed image image", fontsize=FONT_SIZE, pad=15)

plt.show()