In [None]:
# dependencies import
from common_dependencies import *
from typing import NamedTuple
#import model_versions as mv
import logging
logger = logging.getLogger(f'main.ae_model_test')

# Загрузка модели для тестирования

In [None]:
# детерминация случайных величин, отвечающих за выбор первоначальных весов и биасов
tf.compat.v1.set_random_seed(290)
tf.random.set_seed(290)
np.random.seed(290)

## Поиск модели по идентификатору

In [None]:
# какую модель взять
MODEL_VER = 1
MODEL_NUM = 1

model_pathes = [path for path in pathlib.Path(f'networks/AE/').rglob(f'*.keras') 
                     if re.search(rf'id=v0*{MODEL_VER}n0*{MODEL_NUM}',path.name)]

if len(model_pathes) == 1:
    PATH_TO_MODEL = model_pathes[0]
else:
    print(model_pathes)
    raise ValueError('Few or none model have been found instead of one')


PATH_TO_SAVE_IMAGES = pathlib.Path('data/generated_content/AE models results')/PATH_TO_MODEL.parent.stem/PATH_TO_MODEL.stem

print(f'{PATH_TO_MODEL=}')
print(f'{PATH_TO_SAVE_IMAGES=}')

### Загрузка модели

In [None]:
# Загрузка модели
model = keras.models.load_model(PATH_TO_MODEL)
ENCODED_SIZE = min([layer.output.shape[1] for layer in model.layers]) 

if not os.path.exists(PATH_TO_SAVE_IMAGES):
    os.makedirs(PATH_TO_SAVE_IMAGES)

print(model.summary())
'''tf.keras.utils.plot_model(
    model,
    show_shapes=True,
    show_dtype=False,
    show_layer_names=True,
    rankdir="TB",
    expand_nested=False,
    dpi=200,
    show_layer_activations=False,
    show_trainable=False,
)''';

# Загрузка данных для тестирования

## Параметры  выборок

In [None]:
MAX_VAL = 1000 # не трогать
XSHIFT = 200 # не трогать
dataset_desc = {'run_1': (DatasetPartDescription(PATH_TO_DATA['run_1'],DataCrop(0,0,MAX_VAL,MAX_VAL),SlidingCrop(1,1),XSHIFT),),
                'run_2': (DatasetPartDescription(PATH_TO_DATA['run_2'],DataCrop(0,0,MAX_VAL,MAX_VAL),SlidingCrop(1,1),XSHIFT),)}

In [None]:
#входные и выходные данные
# reading
dataset = {'run_1':dict(zip(['x','y','bin'], [np.array(list(gen)) for gen in chain_dataset_gens(dataset_desc['run_1'])])),
           'run_2':dict(zip(['x','y','bin'], [np.array(list(gen)) for gen in chain_dataset_gens(dataset_desc['run_2'])]))}

# squueze datasets
for dataset_part_name, dataset_part in dataset.items():
    for data_part_name, data_part in dataset_part.items():
        if data_part_name == 'x':
            dataset[dataset_part_name][data_part_name] = data_part.reshape(-1,64)
        if data_part_name == 'y':
            dataset[dataset_part_name][data_part_name] = data_part.reshape(-1)
        if data_part_name == 'bin':
            dataset[dataset_part_name][data_part_name] = data_part.reshape(-1)

## Размерности данных в обучающих выборках

In [None]:
# displaying
logger.debug('\nDataset')
for dataset_part_name, dataset_part in dataset.items():
    logger.debug('|'*8+dataset_part_name+'|'*8)
    for data_part_name, data_part in dataset_part.items():
        logger.debug(f'{data_part_name}.shape: {data_part.shape}, max={np.max(data_part)}, min={np.min(data_part)}')

# Анализ качетсва кодирования и декодирования

## Кодирование и декодирование данных

In [None]:
for dataset_part_name, dataset_part in dataset.items():
    dataset[dataset_part_name]['autoencode_result'] = model.predict(dataset_part['x'], verbose=0)

In [None]:
for dataset_part_name, dataset_part in dataset.items():
    print(dataset_part_name)
    for part_name, part in dataset_part.items():
        print(part_name,part.shape)
    print()

## Вывести примеры кодирования и декодирования графиков

In [None]:
# example of autoencode nondef graphs

FONT_SIZE = 20
for dataset_part_name, dataset_part in dataset.items():
    path_to_save = f'{PATH_TO_SAVE_IMAGES}/{dataset_part_name}'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
    
    for i in range(10):
        fig, ax = plt.subplots()
        fig.set_figwidth(12)
        fig.set_figheight(8)

        time = dataset_part['x'][dataset_part['bin']!=0][i,1:32]
        amp = dataset_part['x'][dataset_part['bin']!=0][i,33:]
        
        pred_time = dataset_part['autoencode_result'][dataset_part['bin']!=0][i,1:32]
        pred_amp = dataset_part['autoencode_result'][dataset_part['bin']!=0][i,33:]
        
        ax.plot(time, amp, c='blue', linestyle='--', label='Исходные данные')
        ax.plot(pred_time, pred_amp, c='red', label='Декодированные данные')
        
        ax.set_title(f'Результат декодирования для замера датчика в области дефекта',fontsize=FONT_SIZE)
        ax.legend(fontsize = FONT_SIZE)
        ax.set_xlabel('Время',fontsize=FONT_SIZE)
        ax.set_ylabel('Амплитуда',fontsize=FONT_SIZE)
        ax.tick_params(axis='both', labelsize = FONT_SIZE)
        ax.grid(True)
        #plt.show()
        plt.savefig(f'{path_to_save}/defect_{i}.jpg', bbox_inches='tight')
        plt.show()
        plt.close()
        
    for i in range(10):
        fig, ax = plt.subplots()
        fig.set_figwidth(12)
        fig.set_figheight(8)

        time = dataset_part['x'][dataset_part['bin']==0][i,1:32]
        amp = dataset_part['x'][dataset_part['bin']==0][i,33:]
        
        pred_time = dataset_part['autoencode_result'][dataset_part['bin']==0][i,1:32]
        pred_amp = dataset_part['autoencode_result'][dataset_part['bin']==0][i,33:]
    
        ax.plot(time, amp, c='blue', linestyle='--', label='Исходные данные')
        ax.plot(pred_time, pred_amp, c='red', label='Декодированные данные')
        
        ax.set_title(f'Результат декодирования для замера датчика в области без дефектов',fontsize=FONT_SIZE)
        #ax.legend(fontsize = FONT_SIZE)
        ax.set_xlabel('Время',fontsize=FONT_SIZE)
        ax.set_ylabel('Амплитуда',fontsize=FONT_SIZE)
        ax.tick_params(axis='both', labelsize = FONT_SIZE)
        ax.grid(True)
        #plt.show()
        plt.savefig(f'{path_to_save}/nondefect_{i}.jpg', bbox_inches='tight')
        plt.show()
        plt.close()

In [None]:
# The loss function is just an example, the reduction is the important one
#model.compile(optimizer=keras.optimizers.Adam(0.01), 
#              loss=keras.losses.MeanSquaredError(reduction=tf.compat.v1.losses.Reduction.NONE) ) # keep your original optimizer


#pred_y = model.predict(dataset['run_1']['x'], verbose=0)
#loss = keras.losses.MeanSquaredError(reduction='sum')(dataset['run_1']['x'], pred_y) # sum_over_batch_size
#print(loss)
# And then you'll get each loss for each instance within a batch
#for dataset_part_name, dataset_part in dataset.items():
#    res = model.evaluate(dataset_part['x'], dataset_part['y'], batch_size=128)
#    print(res)

## Посчитать функцию ошибки для каждого наблюдения в выборках

In [None]:
for dataset_part_name, dataset_part in dataset.items():
    dataset[dataset_part_name]['all_losses'] = np.array([float(keras.losses.MeanSquaredError(reduction='sum')(true_res, pred_res)) for 
                                                true_res, pred_res in 
                                                         zip(dataset[dataset_part_name]['x'], model.predict(dataset[dataset_part_name]['x'], verbose=0))])

In [None]:
print(dataset['run_1']['all_losses'].shape)
print(dataset['run_2']['all_losses'].shape)

## Вывести распределение размеров функции ошибки для наблюдений

In [None]:
FONT_SIZE=15

fig, axes = plt.subplots(1,2)
fig.suptitle(f'Гистограммы распределения величины функции ошибки (Mean squared error) к разным частям выборки', fontsize=FONT_SIZE)
fig.set_figwidth(16)
fig.set_figheight(8)

#axes[0].set_title('Для всех наблюдений в выборке',fontsize = FONT_SIZE)
axes[0].set_title('Для наблюдений, относящихся к местам дефектов',fontsize = FONT_SIZE)
axes[1].set_title('Для наблюдений, не относящихся к местам дефектов',fontsize = FONT_SIZE)

for ((dataset_part_name, dataset_part),clr) in zip(dataset.items(),['red', 'blue']):
    #axes[0].hist(dataset_part['all_losses'], label=f'файл: {dataset_part_name}', color=clr, alpha=0.5)
    axes[0].hist(dataset_part['all_losses'][dataset_part['bin']!=0], label=f'файл: {dataset_part_name}', color=clr, alpha=0.5)
    axes[1].hist(dataset_part['all_losses'][dataset_part['bin']==0], label=f'файл: {dataset_part_name}', color=clr, alpha=0.5)


for ax in axes:
    ax.set_xlabel('Значение mean squared error',fontsize = FONT_SIZE)
    ax.set_ylabel('Количество наблюдений',fontsize = FONT_SIZE)
    ax.grid(True)
    ax.tick_params(axis='both', which='both', labelsize = FONT_SIZE)
    ax.margins(0.05)
    ax.legend(fontsize = FONT_SIZE)
    
plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.15, hspace=0.1)
plt.savefig(f'{PATH_TO_SAVE_IMAGES}/losses_encode_to_{ENCODED_SIZE}.jpg', bbox_inches='tight')
plt.show()
plt.close()

# Анализ результатов кодирования

In [None]:
# Предполагая, что у вас есть обученная модель ae
#enc_model = keras.Model(inputs=ae.input, outputs=ae.get_layer('enc_output').output)  # Получение модели для вывода слоя enc_output
enc_model = keras.Model(inputs=model.input, outputs=min([layer.output for layer in model.layers], key=lambda x: x.shape[1]))

for dataset_part_name, dataset_part in dataset.items():
    dataset[dataset_part_name]['encode_result'] = enc_model.predict(dataset[dataset_part_name]['x'])

## Можно запустить для моделей со слоем кодирования равным 2

In [None]:
FONT_SIZE = 20
name = f"""Отображения признаков из 64-x мерного пространства на двух мерное"""

fig, axes = plt.subplots(1,2)
fig.suptitle(name, fontsize=FONT_SIZE)
fig.set_figwidth(22)
fig.set_figheight(8)

for (i, (dataset_part_name, dataset_part)) in zip(range(len(dataset)), dataset.items()):
    axes[i].set_title(f'Результат для {dataset_part_name}', fontsize=FONT_SIZE)
    axes[i].scatter(dataset_part['encode_result'][dataset_part['bin']==0,0], 
                    dataset_part['encode_result'][dataset_part['bin']==0,1], c='blue', s=3, label='non defect')
    
    axes[i].scatter(dataset_part['encode_result'][dataset_part['bin']!=0,0], 
                    dataset_part['encode_result'][dataset_part['bin']!=0,1], c='red', s=3, label='defect')

for ax in axes:
    ax.set_xlabel('X',fontsize = FONT_SIZE)
    ax.set_ylabel('Y',fontsize = FONT_SIZE)
    ax.grid(True)
    ax.tick_params(axis='both', which='both', labelsize = FONT_SIZE)
    ax.margins(0.05)
    ax.legend(fontsize = FONT_SIZE)

plt.subplots_adjust(left=0.1, bottom=0.1, right=0.9, top=0.9, wspace=0.15, hspace=0.2)

plt.savefig(f'{PATH_TO_SAVE_IMAGES}/encoded_to_{ENCODED_SIZE}_distribution.jpg', bbox_inches='tight')
plt.show()
plt.close()

### Если на распределении видны скопления точек - можноо руками тут выделиль эти точки срезами по осям x и y

In [None]:
'''df_1 = pd.DataFrame(data=dataset['run_1']['encode_result'],columns=['x','y'])
df_2 = pd.DataFrame(data=dataset['run_2']['encode_result'],columns=['x','y'])

def label_class(row):
    if row['y'] < 0.2:
        return 0
    if row['x'] < 0.2:
        return 1
    elif row['x'] >= 0.2 and row['x'] < 0.6:
        return 2
    else :
        return 3
        
df_1['class'] = df_1.apply(label_class, axis=1)
df_2['class'] = df_2.apply(label_class, axis=1)

display(df_1)
display(df_2)''';

### Если выделил точки - тут можно отобразить их положение в файлах

In [None]:
'''for class_i in range(4):
    df = df_1[df_1['class']==class_i]
    
    class_vals = df.index.to_list()
    
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_1']))

    xshift = 200
    crop_size = 1
    crop_step = 1
    
    x_df = dw.roll_df(x_df, xshift, 1)
    y_df = dw.roll_df(y_df, xshift, 1)

    it = 0
    
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step): 
            if it in class_vals:
                y_df.iloc[i:i+crop_size, j:j+crop_size] = 1
            else:
                y_df.iloc[i:i+crop_size, j:j+crop_size] = 0
            it+=1
            
    dw.draw_defects_map(y_df, 
        title = f'Точки run_1 относящиеся к классу {class_i} ({len(class_vals)} точек)',
        path_to_save = f'run_1_encoded_to_2_class={class_i}_dot_quantity={len(class_vals)}.jpg')
''';

In [None]:
'''for class_i in range(4):
    df = df_2[df_2['class']==class_i]
    
    class_vals = df.index.to_list()
    
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_1']))

    xshift = 200
    crop_size = 1
    crop_step = 1
    
    x_df = dw.roll_df(x_df, xshift, 1)
    y_df = dw.roll_df(y_df, xshift, 1)

    it = 0
    
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step): 
            if it in class_vals:
                y_df.iloc[i:i+crop_size, j:j+crop_size] = 1
            else:
                y_df.iloc[i:i+crop_size, j:j+crop_size] = 0
            it+=1
            
    dw.draw_defects_map(y_df, 
        title = f'Точки run_2 относящиеся к классу {class_i} ({len(class_vals)} точек)',
        path_to_save = f'run_2_encoded_to_2_class={class_i}_dot_quantity={len(class_vals)}.jpg')
''';

In [None]:
'''df_1 = pd.DataFrame(data=dataset['run_1']['encode_result'],columns=['x','y'])
df_2 = pd.DataFrame(data=dataset['run_2']['encode_result'],columns=['x','y'])

df_1['down'] = df_1['y'].map(lambda y: 1 if y < 0.2 else 0)
df_2['down'] = df_2['y'].map(lambda y: 1 if y < 0.2 else 0)

print(f'run_1 up: {df_1[df_1["down"]==0].shape[0]}')
print(f'run_1 down: {df_1[df_1["down"]==1].shape[0]}')
print(f'run_2 up: {df_2[df_2["down"]==0].shape[0]}')
print(f'run_2 down: {df_2[df_2["down"]==1].shape[0]}')
#display(df_1)
#display(df_2)

x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_1']))

xshift = 200
crop_size = 1
crop_step = 1

x_df = dw.roll_df(x_df, xshift, 1)
y_df = dw.roll_df(y_df, xshift, 1)

down = iter(df_1['down'].values)

for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):  
            y_df.iloc[i:i+crop_size, j:j+crop_size] = next(down)

dw.draw_defects_map(y_df, 
                    title = f'Точки run_1 относящиеся к нижнему скоплению - яркие ({df_1[df_1["down"]==1].shape[0]} точек), к верхнему - темные ({df_1[df_1["down"]==0].shape[0]} точек)')


x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_1']))
x_df = dw.roll_df(x_df, xshift, 1)
y_df = dw.roll_df(y_df, xshift, 1)

down = iter(df_2['down'].values)

for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):  
            y_df.iloc[i:i+crop_size, j:j+crop_size] = next(down)

dw.draw_defects_map(y_df, title = f'Точки run_2 относящиеся к нижнему скоплению - яркие ({df_2[df_2["down"]==1].shape[0]} точек), к верхнему - темные ({df_2[df_2["down"]==0].shape[0]} точек)')''';

## Можно запустить для моделей со слоем кодирования более 2

In [None]:
df_1 = pd.DataFrame(data=dataset['run_1']['encode_result'])
#display(df_1)
df_1.columns = [f"{col} параметр" for col in df_1.columns.to_list()]
axarr = df_1.hist(figsize=(20,20), bins=ENCODED_SIZE)

for ax_row in axarr:
    for ax in ax_row:
        ax.set_xlabel("Значение")
        ax.set_ylabel("Кол-во наблюдений")

axarr[0][0].axvline(1.5, c='r', linewidth=2)
axarr[1][1].axvline(1.5, c='r', linewidth=2)
axarr[2][3].axvline(2, c='r', linewidth=2)
axarr[3][0].axvline(2.5, c='r', linewidth=2)
axarr[3][2].axvline(1, c='r', linewidth=2)
axarr[3][3].axvline(1, c='r', linewidth=2)

In [None]:
df_2 = pd.DataFrame(data=dataset['run_2']['encode_result'])
#display(df_1)
df_2.columns = [f"{col} параметр" for col in df_2.columns.to_list()]
axarr = df_2.hist(figsize=(20,20), bins=ENCODED_SIZE)

for ax_row in axarr:
    for ax in ax_row:
        ax.set_xlabel("Значение")
        ax.set_ylabel("Кол-во наблюдений")

axarr[0][0].axvline(1.5, c='r', linewidth=2)
axarr[1][1].axvline(1.5, c='r', linewidth=2)
axarr[2][3].axvline(2, c='r', linewidth=2)
axarr[3][0].axvline(2.5, c='r', linewidth=2)
axarr[3][2].axvline(1, c='r', linewidth=2)
axarr[3][3].axvline(1, c='r', linewidth=2)

# 0 1.5
# 5 1.5
# 11 2
# 12 2.5
# 14 1
# 15 1

#### Если есть распределения в форме параболы - можно по ним разделить точки на N классов

In [None]:
df_1 = pd.DataFrame(data=dataset['run_1']['encode_result'])
#display(df_1)
df_1.hist(figsize=(20,20), bins=20)
# 0 1.5
# 5 1.5
# 11 2
# 12 2.5
# 14 1
# 15 1
df_1['0_div'] = df_1[0].map(lambda x: 1 if x > 1.5 else 0)
df_1['5_div'] = df_1[5].map(lambda x: 1 if x > 1.5 else 0)
df_1['11_div'] = df_1[11].map(lambda x: 1 if x > 2 else 0)
df_1['12_div'] = df_1[12].map(lambda x: 1 if x > 2.5 else 0)
df_1['14_div'] = df_1[14].map(lambda x: 1 if x > 1 else 0)
df_1['15_div'] = df_1[15].map(lambda x: 1 if x > 1 else 0)

display(df_1)
df_1_unique_rows = df_1[['0_div','5_div','11_div','12_div','14_div','15_div']].drop_duplicates(ignore_index=True)
display(df_1_unique_rows)

new_column = []
for i in range(df_1.shape[0]):
    row = df_1.iloc[i]
    for j in range(df_1_unique_rows.shape[0]):
        class_row = df_1_unique_rows.iloc[j]
        if  (row['0_div'] == class_row['0_div'] and
            row['5_div'] == class_row['5_div'] and
            row['11_div'] == class_row['11_div'] and
            row['12_div'] == class_row['12_div'] and
            row['14_div'] == class_row['14_div'] and
            row['15_div'] == class_row['15_div']):
            new_column.append(j)
            
#print(new_column)
df_1['class'] = new_column
df_1['class'].hist(figsize=(10,10), bins=64)



'''for class_i in range(64):
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_1']))

    xshift = 200
    crop_size = 1
    crop_step = 1
    
    x_df = dw.roll_df(x_df, xshift, 1)
    y_df = dw.roll_df(y_df, xshift, 1)

    it = 0
    items_iter = df_1[df_1['class']==class_i].index.to_list()
    
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
            for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):
                if it in items_iter:
                    y_df.iloc[i:i+crop_size, j:j+crop_size] = 1
                else:
                    y_df.iloc[i:i+crop_size, j:j+crop_size] = 0
                it+=1
                
    path_to_save = f'{PATH_TO_SAVE_IMAGES}/run_1/classes'
    if not os.path.exists(path_to_save):
        os.makedirs(path_to_save)
        
    dw.draw_defects_map(y_df, title = f'Точки относящиеся к классу {class_i} ({len(items_iter)} точек)',
                        path_to_save = f'{path_to_save}/class={class_i}_dot_quantity={len(items_iter)}.jpg')'''

In [None]:
def draw_chosen_classes(path_to_data, file_num, new_title, path_to_save, choosen_df, xshift, crop_size, crop_step):
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=path_to_data))

    y_df = dw.roll_df(y_df, xshift, 1)
    y_df = dw.match_df_for_crops_dividing(y_df, crop_size, crop_step)

    new_y_df = y_df.copy()
    new_y_df.iloc[:,:] = 0
    
    it = iter(sorted(choosen_df.index.to_list()))
    def_ind = next(it)
    cur_ind = 0
    all_defects = 0
    current_defects = 0
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):
            value = np.max(y_df.iloc[i:i+crop_size, j:j+crop_size].to_numpy())
            if value > 0:
                all_defects += 1
    
    for i in range(0, new_y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, new_y_df.shape[1] - crop_size + 1, crop_step): 
            if cur_ind == def_ind:
                #print(def_ind)
                if np.max(y_df.iloc[i:i+crop_size, j:j+crop_size].to_numpy()) > 0:
                    current_defects += 1
                new_y_df.iloc[i:i+crop_size, j:j+crop_size] += 1
                try:
                    def_ind = next(it)
                except:
                    continue
                
            cur_ind += 1
    
    title = f'Развернутая карта дефектов' # для файла {file_num}
    dw.draw_defects_map(y_df, title=title, path_to_save=f'{path_to_save}/{title} для файла {file_num}.jpeg')#, pcolormesh_cmap='Greys') #черно белое/
    #title = f'Развернутая карта предсказания зон дефектов с помощью t-SNE' # для файла {file_num}
    dw.draw_defects_map(new_y_df, 
                        title=f'{new_title}\n({current_defects} дефектов из {all_defects})', 
                        path_to_save=f'{path_to_save}/{new_title.split('\n')[0]}{new_title.split('\n')[2]} для файла {file_num}.jpeg')#, pcolormesh_cmap='Greys')


In [None]:
def defect(path_to_data, xshift=200, crop_size=1,crop_step=1):
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=path_to_data))

    y_df = dw.roll_df(y_df, xshift, 1)
    y_df = dw.match_df_for_crops_dividing(y_df, crop_size, crop_step)
    
    defects = []
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
        for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):
            defects.append(np.max(y_df.iloc[i:i+crop_size, j:j+crop_size].to_numpy()))  
    return defects

df_1['defect'] = defect(PATH_TO_DATA['run_1'])

In [None]:
print(df_1.shape)
def_df = df_1.query("defect > 0")
print(def_df.shape)
print(sorted(def_df['class'].unique()))

often_df = df_1[df_1['class'].isin(def_df['class'].unique())]

draw_chosen_classes(PATH_TO_DATA['run_1'], 1,
    f'Развернутая карта для классов\n {sorted(def_df['class'].unique())} \n (все классы в которые попало хоть 1 наблюдение для дефектной зоны) \n({often_df.shape[0]} точек из {df_1.shape[0]})',
    f'{PATH_TO_SAVE_IMAGES}/run_1/few_classes', often_df, 200, 1, 1)

In [None]:
'''axarr = df_1[df_1['defect']==0]['class'].hist(figsize=(5,5), bins=64)

axarr.set_title('Гистограмма кол-ва наблюдений, \nне относящихся к зонам дефектов для каждого класса')
axarr.set_xlabel("Номер класса")
axarr.set_ylabel("Кол-во наблюдений")'''

nd,bd,pd = plt.hist(df_1[df_1['defect']>0]['class'].to_list(), bins=64)
n,b,p = plt.hist(df_1[df_1['defect']==0]['class'].to_list(), bins=64)

FONT_SIZE = 15
plt.close()

fig, ax = plt.subplots()
fig.set_figwidth(5)
fig.set_figheight(5)

ax.grid(True)
ax.tick_params(axis='both', which='both', labelsize = FONT_SIZE)
ax.margins(0.05)
ax.legend(fontsize = FONT_SIZE)

ax.hist(weights=n/max(n), bins=64, label=f'Дефект', color='r', alpha=0.5)

#ax.hist(df_1[df_1['defect']>0]['class'], density=True, bins=64, label=f'Дефект', color='r', alpha=0.5)
#ax.hist(df_1[df_1['defect']==0]['class'], density=True, bins=64, label=f'Не дефект', color='b', alpha=0.5)

ax.set_title('Гистограмма соотношения кол-во наблюдений относящегося и нет к зонам дефектов')
ax.set_xlabel("Номер класса")
ax.set_ylabel("Кол-во наблюдений")
plt.show()

In [None]:
n,b,p = plt.hist(df_1['class'].to_list(), bins=64)
val = 2500
options = [i for i, item in enumerate(n) if item <= val] 
 
often_df = df_1[df_1['class'].isin(options)]
#draw_chosen_classes(path_to_data, file_num, new_title, path_to_save, choosen_df, xshift, crop_size, crop_step):
draw_chosen_classes(PATH_TO_DATA['run_1'], 1,
                    f'Развернутая карта для классов\n {options} \n (в каждом меньше или равно {val} элементов) \n({often_df.shape[0]} точек из {df_1.shape[0]})',
                    f'{PATH_TO_SAVE_IMAGES}/run_1/few_classes', often_df, 200, 1, 1)

In [None]:
f'{[1,2,3,4,5]}'

In [None]:
'''df_2 = pd.DataFrame(data=dataset['run_2']['encode_result'])
#display(df_1)
df_2.hist(figsize=(20,20), bins=20)
# 0 1.5
# 5 1.5
# 11 2
# 12 2.5
# 14 1
# 15 1
df_2['0_div'] = df_2[0].map(lambda x: 1 if x > 1.5 else 0)
df_2['5_div'] = df_2[5].map(lambda x: 1 if x > 1.5 else 0)
df_2['11_div'] = df_2[11].map(lambda x: 1 if x > 2 else 0)
df_2['12_div'] = df_2[12].map(lambda x: 1 if x > 2.5 else 0)
df_2['14_div'] = df_2[14].map(lambda x: 1 if x > 1 else 0)
df_2['15_div'] = df_2[15].map(lambda x: 1 if x > 1 else 0)

display(df_2)
df_2_unique_rows = df_2[['0_div','5_div','11_div','12_div','14_div','15_div']].drop_duplicates(ignore_index=True)
display(df_2_unique_rows)

new_column = []
for i in range(df_2.shape[0]):
    row = df_2.iloc[i]
    for j in range(df_2_unique_rows.shape[0]):
        class_row = df_2_unique_rows.iloc[j]
        if  (row['0_div'] == class_row['0_div'] and
            row['5_div'] == class_row['5_div'] and
            row['11_div'] == class_row['11_div'] and
            row['12_div'] == class_row['12_div'] and
            row['14_div'] == class_row['14_div'] and
            row['15_div'] == class_row['15_div']):
            new_column.append(j)
            
print(len(new_column))
df_2['class'] = new_column

df_2['class'].hist(figsize=(10,10), bins=64)

for class_i in range(64):
    x_df, y_df = dataset.get_x_and_y_data_dfs(dw.DataPart(path=PATH_TO_DATA['run_2']))

    xshift = 200
    crop_size = 1
    crop_step = 1
    
    x_df = dw.roll_df(x_df, xshift, 1)
    y_df = dw.roll_df(y_df, xshift, 1)

    it = 0
    items_iter = df_2[df_2['class']==class_i].index.to_list()
    
    for i in range(0, y_df.shape[0] - crop_size + 1, crop_step):  
            for j in range(0, y_df.shape[1] - crop_size + 1, crop_step):
                if it in items_iter:
                    y_df.iloc[i:i+crop_size, j:j+crop_size] = 1
                else:
                    y_df.iloc[i:i+crop_size, j:j+crop_size] = 0
                it+=1
    
    dw.draw_defects_map(y_df, title = f'Точки run_2 относящиеся к классу {class_i} ({len(items_iter)} точек)',
                        path_to_save = f'run_2_encoded_to_16_class={class_i}_dot_quantity={len(items_iter)}.jpg')''';