### Инструкция
Постройте сегментацию изображений облаков типа Fish, используя сети Unet, PSPNet или FPN. В качестве базовых сетей можно использовать ResNet, MobileNet, DenseNet или любые другие подходящие. Можно использовать обученные модели сетей (входные размеры 384х256).

Постройте ансамбль предсказаний, выбирая среднее значение из нескольких. Выгрузите результаты предсказания в требуемом формате (sample_submission.csv).

Данные:
* video.ittensive.com/machine-learning/clouds/train.csv.gz (54 Мб)
* video.ittensive.com/machine-learning/clouds/train_images_small.tar.gz (212 Мб)
* video.ittensive.com/machine-learning/clouds/test_images_small.tar.gz (142 Мб)
* video.ittensive.com/machine-learning/clouds/sample_submission.csv.gz

Модели:
* video.ittensive.com/machine-learning/clouds/unet.fish.h5
* video.ittensive.com/machine-learning/clouds/fpn.fish.h5
* video.ittensive.com/machine-learning/clouds/pspnet.fish.h5

Итоговый файл с кодом (.py или .ipynb) выложите в github с портфолио.

### Подключаем библиотеки 

In [84]:
import numpy as np
import pandas as pd
import keras
keras.utils.generic_utils.get_custom_objects = keras.utils.get_custom_objects 
import segmentation_models as sm
from keras.utils import load_img, img_to_array  # замена from keras.preprocessing import image  + нужно править функции в которой image использовался 
import os
import sys




### Вспомогательные функции 

In [85]:
batch_size = 10
filesDir = "train_images_small" # директория картинок для обучения 
filesDir_test = "test_images_small" # директория картинок для предсказания 

image_x = 384 # 525 - уменьшили под требование моделей 
image_y = 256 # 350
image_ch = 3 # 3 - канала RGB
mask_x = 384
mask_y = 256

def mask_rate (a, x, y):
    b = a//1400 # - 395.0
    return np.round(x*(b*x//2100) + y*(a%1400)//1400).astype("uint32")

def calc_mask (px, x=image_x, y=image_y):
    p = np.array([int(n) for n in px.split(' ')]).reshape(-1,2)
    mask = np.zeros(x*y, dtype='uint8')
    for i, l in p:
        mask[mask_rate(i, x, y) - 1:mask_rate(l+i, x, y)] = 1
    return mask.reshape(y,x).transpose()

def load_y (df):
    y = [[0]]*len(df)
    for i, ep in enumerate(df["EncodedPixels"]):
        if ep == ep:
            y[i] = calc_mask(ep, mask_x, mask_y).transpose().flatten()
        else:
            y[i] = np.zeros(mask_x*mask_y, dtype="i1")
    # return np.array(y).reshape(len(df), mask_y, mask_x, 1) # ОШИБКА что type int16 of argument 'x'
    return np.array(y).astype("float32").reshape(len(df), mask_y, mask_x, 1)   # ДАЁТ СТАРТ 


def load_x (df):
    x = [[]]*len(df)
    for j, file in enumerate(df["Image"]):
        img = load_img(os.path.join(filesDir, file),  # обновлено с image.load_img на load_img
        # img = image.load_img(os.path.join(filesDir, file),
                     target_size=(image_y, image_x))
        img = img_to_array(img)  # обновлено с image.img_to_array на img_to_array
        # img = image.img_to_array(img)
        x[j] = np.expand_dims(img, axis=0)
    return np.array(x).reshape(len(df), image_y, image_x, image_ch)

def load_data (df, batch_size):
    while True:
        batch_start = 0
        batch_end = batch_size
        while batch_start < len(df):
            limit = min(batch_end, len(df))
            yield (load_x(df[batch_start:limit]),
                   load_y(df[batch_start:limit]))
            batch_start += batch_size   
            batch_end += batch_size

### Загрузка данных

In [86]:
data = pd.read_csv("data/sample_submission.csv.gz")  # загрузка данных для предсказаний

In [87]:
# Разделим информацию о изображениях на имя файла и предсказания категории облака на нем, отберем только fish так как модели обучены на fish.
data["Image"] = data["Image_Label"].str.split("_").str[0]
data["Label"] = data["Image_Label"].str.split("_").str[1]
data.drop(labels=["Image_Label"], axis=1, inplace=True)
data_fish = pd.DataFrame(data[data["Label"] == "Fish"])
print (data_fish.head())

   EncodedPixels        Image Label
0            1 1  002f507.jpg  Fish
4            1 1  0035ae9.jpg  Fish
8            1 1  0038327.jpg  Fish
12           1 1  004f759.jpg  Fish
16           1 1  005ba08.jpg  Fish


### Загрузка и подготовка моделей
Загрузим обученные модели сетей Unet, PSPNet и FPN.


In [88]:
# для загрузки моделей нам понадобится добавить custom_object_scope.
custom_object_scope = {'dice_loss':sm.losses.dice_loss, 'iou_score':sm.metrics.iou_score}

In [89]:
# модель Unet
model_Unet = keras.models.load_model("unet.fish.h5", custom_object_scope)
model_Unet.summary()

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 data (InputLayer)              [(None, 256, 384, 3  0           []                               
                                )]                                                                
                                                                                                  
 bn_data (BatchNormalization)   (None, 256, 384, 3)  9           ['data[0][0]']                   
                                                                                                  
 zero_padding2d_1 (ZeroPadding2  (None, 262, 390, 3)  0          ['bn_data[0][0]']                
 D)                                                                                               
                                                                                            

In [90]:
# модель PSPNet 
# model_PSPNet = keras.models.load_model("pspnet.fish.h5", custom_object_scope)
# model_PSPNet.summary()

# загрузка заканчивается ValueError: bad marshal data (unknown type code)

In [91]:
# модель FPN
model_FPN = keras.models.load_model("fpn.fish.h5", custom_object_scope)
model_FPN.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 256, 384, 3  0           []                               
                                )]                                                                
                                                                                                  
 zero_padding2d_1 (ZeroPadding2  (None, 262, 390, 3)  0          ['input_1[0][0]']                
 D)                                                                                               
                                                                                                  
 conv1/conv (Conv2D)            (None, 128, 192, 64  9408        ['zero_padding2d_1[0][0]']       
                                )                                                           

### Построение предсказания

In [92]:
filesDir = filesDir_test # заменим директорию на тестовую (в load_data используется)


In [93]:
prediction_FPN = model_FPN.predict(load_data(data_fish, 1),
                                    steps=len(data_fish), verbose=1) # steps=len(data)



In [94]:
prediction_Unet = model_Unet.predict(load_data(data_fish, 1),
                                    steps=len(data_fish), verbose=1) # steps=len(data)



In [95]:
np.set_printoptions(threshold=sys.maxsize)
target = [0]*len(prediction_FPN)
predictions = [prediction_FPN, prediction_Unet]
masks = [0]*len(prediction_FPN)
for i in range(1):
    for k in range(len(predictions)):
        for i,vals in enumerate(predictions[k]):
            if (vals.sum() > mask_x*mask_y/10):
                targ = 1
            else:
                targ = 0
            if len(predictions) > 2:
                target[i]+= np.array((targ).astype("int8") / len(predictions))
                masks[i] += np.array(vals.flatten()).astype("int8") / len(predictions)
            else:
                target[i]+= np.array(targ).astype("int8")*1.1 / len(predictions)
                masks[i] += np.array(vals.flatten()).astype("int8")*1.1 / len(predictions)
    target = np.around(target).astype("int8")
    masks = np.around(masks).astype("int8")
            # print(i)


Сформиуем TargetPixels и добавим к данным

In [96]:
TargetPixels = []
for i in range(len(masks)):
    TargetPixels.append(np.array2string(masks[i].flatten().astype("int8"),
        separator=" ")[1:-1])
    # print(i)

In [97]:
data_fish["EncodedPixels"] = TargetPixels
data_fish["target"] = target
print(data_fish.head())

                                        EncodedPixels        Image Label  \
0   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  002f507.jpg  Fish   
4   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0035ae9.jpg  Fish   
8   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0038327.jpg  Fish   
12  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  004f759.jpg  Fish   
16  0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  005ba08.jpg  Fish   

    target  
0        1  
4        1  
8        1  
12       1  
16       1  


In [98]:
data_fish["Image_Label"] = data_fish["Image"] + "_" + data_fish["Label"]
print(data_fish.head())

                                        EncodedPixels        Image Label  \
0   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  002f507.jpg  Fish   
4   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0035ae9.jpg  Fish   
8   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0038327.jpg  Fish   
12  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  004f759.jpg  Fish   
16  0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  005ba08.jpg  Fish   

    target       Image_Label  
0        1  002f507.jpg_Fish  
4        1  0035ae9.jpg_Fish  
8        1  0038327.jpg_Fish  
12       1  004f759.jpg_Fish  
16       1  005ba08.jpg_Fish  


In [99]:
data_fish.drop(labels=["Image", "Label", "target",], axis=1, inplace=True)
print(data_fish.head())

                                        EncodedPixels       Image_Label
0   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  002f507.jpg_Fish
4   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0035ae9.jpg_Fish
8   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  0038327.jpg_Fish
12  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  004f759.jpg_Fish
16  0 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...  005ba08.jpg_Fish


In [100]:
data["Image_Label"] = data["Image"] + "_" + data["Label"]

print(data.head())

  EncodedPixels        Image   Label         Image_Label
0           1 1  002f507.jpg    Fish    002f507.jpg_Fish
1           1 1  002f507.jpg  Flower  002f507.jpg_Flower
2           1 1  002f507.jpg  Gravel  002f507.jpg_Gravel
3           1 1  002f507.jpg   Sugar   002f507.jpg_Sugar
4           1 1  0035ae9.jpg    Fish    0035ae9.jpg_Fish


In [101]:
data.drop(labels=["Image", "Label", "EncodedPixels" ], axis=1, inplace=True)
print(data.head())

          Image_Label
0    002f507.jpg_Fish
1  002f507.jpg_Flower
2  002f507.jpg_Gravel
3   002f507.jpg_Sugar
4    0035ae9.jpg_Fish


In [102]:
data_merge = pd.merge(left=data, right=data_fish, how="left",
                   left_on="Image_Label", right_on="Image_Label")

In [103]:
print(data_merge.head())

          Image_Label                                      EncodedPixels
0    002f507.jpg_Fish  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...
1  002f507.jpg_Flower                                                NaN
2  002f507.jpg_Gravel                                                NaN
3   002f507.jpg_Sugar                                                NaN
4    0035ae9.jpg_Fish  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...


In [104]:
data_sample_submission = pd.DataFrame(data_merge, columns=["EncodedPixels", "Image_Label"])

In [105]:
print(data_sample_submission.head(25))

                                        EncodedPixels         Image_Label
0   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    002f507.jpg_Fish
1                                                 NaN  002f507.jpg_Flower
2                                                 NaN  002f507.jpg_Gravel
3                                                 NaN   002f507.jpg_Sugar
4   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    0035ae9.jpg_Fish
5                                                 NaN  0035ae9.jpg_Flower
6                                                 NaN  0035ae9.jpg_Gravel
7                                                 NaN   0035ae9.jpg_Sugar
8   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    0038327.jpg_Fish
9                                                 NaN  0038327.jpg_Flower
10                                                NaN  0038327.jpg_Gravel
11                                                NaN   0038327.jpg_Sugar
12  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 

### Выгружаем результаты предсказания в требуемом формате (sample_submission.csv).

In [106]:
data_sample_submission.to_csv('sample_submission.csv.gz', index=False) 

In [107]:
# тестируем записанный файл
data_2 = pd.read_csv('sample_submission.csv.gz')
print(data_2.head(25))

                                        EncodedPixels         Image_Label
0   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    002f507.jpg_Fish
1                                                 NaN  002f507.jpg_Flower
2                                                 NaN  002f507.jpg_Gravel
3                                                 NaN   002f507.jpg_Sugar
4   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    0035ae9.jpg_Fish
5                                                 NaN  0035ae9.jpg_Flower
6                                                 NaN  0035ae9.jpg_Gravel
7                                                 NaN   0035ae9.jpg_Sugar
8   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 ...    0038327.jpg_Fish
9                                                 NaN  0038327.jpg_Flower
10                                                NaN  0038327.jpg_Gravel
11                                                NaN   0038327.jpg_Sugar
12  0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 