In [None]:
# dependencies import
from common_dependencies import *
from typing import NamedTuple
import model_versions as mv
import logging
logger = logging.getLogger(f'main.model_train')
# детерминация случайных величин, отвечающих за выбор первоначальных весов и биасов
tf.compat.v1.set_random_seed(290)
tf.random.set_seed(290)
np.random.seed(290)

In [None]:
# paths for files with original data
PATH_TO_DATA = {
    'run_1': 
        ('data/original_data/run_1/run1_WM32_data.csv',
        'data/original_data/run_1/run1_WM32_defects.csv',
        'data/original_data/run_1/run1_WM32_pipe.csv'),
    'run_2':
        ('data/original_data/run_2/run2_WM32_data.csv',
        'data/original_data/run_2/run2_WM32_defects.csv',
        'data/original_data/run_2/run2_WM32_pipe.csv')
}

CROP_SIZE = 20
#CROP_STEP = 4

DataCrop = NamedTuple("DataCrop", [('top', int), ('bottom', int),('left', int), ('right', int)])

In [None]:
def get_dataset_gen(path_to_data_tuple: tuple(),
                x_shift: int,
                data_part: DataCrop,
                crop_size: int, 
                crop_step: int):
    logger.debug("start")
    x_df, y_df = dw.get_x_and_y_data(*path_to_data_tuple)
    #dw.draw_defects_map(y_df, title=f'Read defects map')

    x_df = dw.roll_df(x_df, x_shift, 1)
    y_df = dw.roll_df(y_df, x_shift, 1)
    dw.draw_defects_map(y_df, title=f'The shifted by {x_shift} on x axis read defects map')
    
    x_df = x_df.iloc[data_part.top:data_part.bottom, data_part.left:data_part.right]
    y_df = y_df.iloc[data_part.top:data_part.bottom, data_part.left:data_part.right]
    dw.draw_defects_map(y_df, title=f'The part took for learning')
    
    x_df = dw.extend_df_for_crops_dividing(x_df, crop_size, crop_step)
    y_df = dw.extend_df_for_crops_dividing(y_df, crop_size, crop_step)
    dw.draw_defects_map(y_df, title=f'The part after extending for crop dividing')

    x_arr = dw.df_to_numpy(x_df)
    y_arr = y_df.to_numpy()
    
    x_arr_time = dw.standardize_data(x_arr[:,:,:32])
    x_arr_amp = dw.standardize_data(x_arr[:,:,32:])
    y_arr = dw.standardize_data(y_arr)
    
    x_arr_time_crops_gen = dw.get_augmented_crop_generator(x_arr_time, crop_size, crop_step)
    x_arr_amp_crops_gen = dw.get_augmented_crop_generator(x_arr_amp, crop_size, crop_step)
    
    y_data_binary_gen = (1 if np.sum(crop > 0) else 0 for crop in 
                         dw.get_augmented_crop_generator(y_arr, crop_size, crop_step))
    y_data_depth_gen = (np.max(crop) for crop in 
                        dw.get_augmented_crop_generator(y_arr, crop_size, crop_step))

    logger.debug("end")
    return x_arr_time_crops_gen, x_arr_amp_crops_gen, y_data_binary_gen, y_data_depth_gen

In [None]:
(train_x_time_gen1, 
 train_x_amp_gen1, 
 train_y_binary_gen1,
 train_y_depth_gen1) = get_dataset_gen(PATH_TO_DATA['run_1'], 200, DataCrop(None,None,200,None), CROP_SIZE, 20)

(train_x_time_gen2, 
 train_x_amp_gen2, 
 train_y_binary_gen2,
 train_y_depth_gen2) = get_dataset_gen(PATH_TO_DATA['run_2'], 200, DataCrop(None,None,None,200), CROP_SIZE, 20)

train_x_time = np.stack([crop for crop in itertools.chain(train_x_time_gen1, train_x_time_gen2)])
train_x_amp = np.stack([crop for crop in itertools.chain(train_x_amp_gen1, train_x_amp_gen2)])
train_y_binary = np.array([binary for binary in itertools.chain(train_y_binary_gen1, train_y_binary_gen2)])
train_y_depth = np.array([depth for depth in itertools.chain(train_y_depth_gen1, train_y_depth_gen2)])

In [None]:
(val_x_time_gen, 
 val_x_amp_gen, 
 val_y_binary_gen,
 val_y_depth_gen) = get_dataset_gen(PATH_TO_DATA['run_1'], 200, DataCrop(None,None,None,200), CROP_SIZE, 20)

val_x_time = np.stack([crop for crop in val_x_time_gen])
val_x_amp = np.stack([crop for crop in val_x_amp_gen])
val_y_binary = np.array([binary for binary in val_y_binary_gen])
val_y_depth = np.array([depth for depth in val_y_depth_gen])

In [None]:
print(f'{train_x_time.shape=}')
print(f'{train_x_amp.shape=}')
print(f'{train_y_binary.shape=}')
print(f'{train_y_depth.shape=}')
print()
print(f'{val_x_time.shape=}')
print(f'{val_x_amp.shape=}')
print(f'{val_y_binary.shape=}')
print(f'{val_y_depth.shape=}')

In [None]:
# всякие константы для последующей работы

#///////////////////////////////// для компиляции 

CMP_learning_rate = 0.0001 #0.0000002 # шаг сходимости back propogation
#CMP_solver = keras.optimizers.Adam(CMP_learning_rate) # оптимизатор
CMP_solver = keras.optimizers.SGD(CMP_learning_rate) # оптимизатор
CMP_loss_funcs = keras.losses.BinaryCrossentropy() 
CMP_metrics = [keras.metrics.BinaryAccuracy(name='BinaryAccuracy'),
               keras.metrics.MeanSquaredError(name='MeanSquaredError'),
               keras.metrics.TruePositives(name='TruePositives'),
               keras.metrics.FalsePositives(name='FalsePositives'),
               keras.metrics.TrueNegatives(name='TrueNegatives'),
               keras.metrics.FalseNegatives(name='FalseNegatives'),     
               keras.metrics.Precision(name='Precision'),
               keras.metrics.Recall(name='Recall'),
               keras.metrics.AUC(name='AUC')]
#///////////////////////////////// для колбэков

'''    # для Early_stopping
ES_patience = 3 # кол-во эпох без улучшений
ES_min_delta = 0.00001 # минимальное улучшение параметра за cur_patience
ES_monitor_parametr =  'loss' # отслеживаемый параметр 
ES_save_best_weights = True # сохранять ли веса нейронки с лучшими результатами
    
'''    # для ReduceLROnPlateau
RLPOP_monitor_parametr = 'BinaryAccuracy'  # отслеживаемый параметр 
RLPOP_factor = 0.3 # множитель для расчета нового шага сходимости (new_learning_rate = old_learning_rate*RLPOP_factor)
RLPOP_patience = 1 # кол-во эпох без улучшений
RLPOP_verbose = 1 # выводить ли прогресс изменения шага сходимости в его процессее
RLPOP_mode = 'auto' # выбирает, уменьшать шаг сходимости при росте величины или при её уменьшении
RLPOP_min_delta = 0.0001 # порог изменения отслеживаемого значения
RLPOP_cooldown = 2 # количество эпох до возобновления работы после изменения шага сходимости
RLPOP_min_lr = 0 # минимальное значение шага сходимости

    # для CallbackList
CBL_add_history = True # вызывать ли колбэк History (если он не был довавлен вручную)
CBL_add_progbar = True # вызывать ли колбэк ProgbarLogger (если он не был довавлен вручную)
    
#///////////////////////////////// для тренировки

FIT_batch_size = 1 # размерpython concat lists bach при обучении/тестировании1
#FIT_shuffle = True # перемешивать ли данные
FIT_verbose = True # выводить ли прогресс обучения в его процессее
FIT_epochs = 2 # количество эпох обучения
#FIT_validation_split = 0.10 #0.20 # процент валидационных данных, отсекаемых из тестовой выборки

In [None]:
model = mv.get_model_v10(CROP_SIZE)

model.compile(optimizer=CMP_solver, loss=CMP_loss_funcs, metrics=CMP_metrics)

print(model.summary())

In [None]:
plot_model(
    model,
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=200,
    show_layer_activations=False,
    show_trainable=False,
)

In [None]:
# Создание и настройка колбэков

callback_list = [] # массив колбэков до подачи в колбек "callbacklist"

'''temp = keras.callbacks.EarlyStopping(
            monitor = ES_monitor_parametr, 
            min_delta = ES_min_delta, 
            patience = ES_patience,
            restore_best_weights = ES_save_best_weights
            )
callback_list.append(temp)

temp = keras.callbacks.ReduceLROnPlateau(
            monitor = RLPOP_monitor_parametr, 
            factor = RLPOP_factor, 
            patience = RLPOP_patience, 
            verbose = RLPOP_verbose,
            mode = RLPOP_mode, 
            min_delta = RLPOP_min_delta, 
            cooldown = RLPOP_cooldown, 
            min_lr = RLPOP_min_lr
            )
callback_list.append(temp)'''

FIT_callback_list = keras.callbacks.CallbackList(
            callbacks = callback_list, 
            add_history = CBL_add_history, 
            add_progbar = CBL_add_progbar, 
            model = model
            )

In [None]:
history = model.fit([train_x_time, train_x_amp], train_y_binary,
                    batch_size = FIT_batch_size, 
                    epochs = FIT_epochs, 
                    verbose = FIT_verbose, 
                    shuffle=True,
                    validation_data = ([val_x_time, val_x_amp], val_y_binary), 
                    callbacks = FIT_callback_list)

In [None]:
FONT_SIZE = 15
for key1,key2 in zip(['BinaryAccuracy','Precision','MeanSquaredError','TruePositives','TrueNegatives'],
                     ['loss','Recall','AUC','FalseNegatives','FalsePositives']):
    fig, axes = plt.subplots(1,2)

    fig.set_figwidth(22)
    fig.set_figheight(8)
    
    axes[0].plot(history.history[key1], 
             label='Train dataset',  linewidth=1.5, color='blue')
    axes[0].plot(history.history[f'val_{key1}'], linestyle = '--', 
             label='Validation dataset',  linewidth=3, color='red')
    axes[0].set_xlabel('Epoch number', fontsize=FONT_SIZE)
    axes[0].set_ylabel(f'{key1} value', fontsize=FONT_SIZE)
    axes[0].set_title(f"Learning process {key1} plot", fontsize=FONT_SIZE, pad=15)
    axes[0].tick_params(axis='both', which='both', labelsize = FONT_SIZE)
    axes[0].minorticks_on()
    axes[0].grid(which='major', linewidth=2)
    axes[0].grid(which='minor', color = 'gray', linestyle = ':')
    axes[0].legend(fontsize = FONT_SIZE, facecolor = "white", loc = 'upper right')

    axes[1].plot(history.history[key2], 
             label='Train dataset',  linewidth=1.5, color='blue')
    axes[1].plot(history.history[f'val_{key2}'], linestyle = '--', 
             label='Validation dataset',  linewidth=3, color='red')
    axes[1].set_xlabel('Epoch number', fontsize=FONT_SIZE)
    axes[1].set_ylabel(f'{key2} value', fontsize=FONT_SIZE)
    axes[1].set_title(f"Learning process {key2} plot", fontsize=FONT_SIZE, pad=15)
    axes[1].tick_params(axis='both', which='both', labelsize = FONT_SIZE)
    axes[1].minorticks_on()
    axes[1].grid(which='major', linewidth=2)
    axes[1].grid(which='minor', color = 'gray', linestyle = ':')
    axes[1].legend(fontsize = FONT_SIZE, facecolor = "white", loc = 'upper right')

    plt.show()

In [None]:
# сохранение модели в файл
#model.save(f"networks/CNN/id=v10n01_in(16x16+16x16)_out(1)_train=0dot0_test=0dot0.keras")

In [None]:
'''for key in [k for k in history.history.keys() if not k.startswith('val')]:
    fig, ax = plt.subplots()

    fig.set_figwidth(12)
    fig.set_figheight(8)
    
    plt.plot(history.history[key], 
             label='Train dataset',  linewidth=1.5, color='blue')
    plt.plot(history.history[f'val_{key}'], linestyle = '--', 
             label='Validation dataset',  linewidth=3, color='red')
    
    ax.set_xlabel('Epoch number', fontsize=20)
    ax.set_ylabel(f'{key} value', fontsize=20)
    ax.set_title(f"Learning process {key} plot", fontsize=20, pad=15)
    
    ax.patch.set_alpha(0)
    
    #  Устанавливаем форматирование делений:
    ax.tick_params(axis='both', which='both', labelsize = 20)
    
    # Вывод и настройка сетки
    ax.minorticks_on()
    ax.grid(which='major', linewidth=2)
    ax.grid(which='minor', color = 'gray', linestyle = ':')
    
    ax.legend(fontsize = 20, facecolor = "white", loc = 'upper right')
    
    plt.show()'''

#### 