In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import shutil
import matplotlib.pyplot as plt
%matplotlib inline
from PIL import Image

import tensorflow as tf
from tensorflow.keras.layers import Input, Add, Dense, Activation, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D, Dropout
from tensorflow.keras import Model
from tensorflow.keras.applications import ResNet50, MobileNetV2, ResNet101
from tensorflow.keras.optimizers import Adam, SGD
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator, save_img

import cv2
import random
import multiprocessing

In [None]:
! unzip -o /kaggle/input/platesv2/plates.zip

# TRANSFORM IMAGES

In [None]:
# CLEANING
if (os.path.exists('data_augment')):
    shutil.rmtree('data_augment')
if (os.path.exists('data_test')):
    shutil.rmtree('data_test')

# CREATE DIRECTORY FOR DATA-AUGMENT
os.makedirs('data_augment/plates/train/cleaned')
os.makedirs('data_augment/plates/train/dirty')
os.makedirs('data_augment/valid/plates/train/cleaned')
os.makedirs('data_augment/valid/plates/train/dirty')

# CREATE DIRECTORY FOR TEST
os.makedirs('data_test/plates/test')

In [None]:
# 画像加工・生成系関数


## 画像標準化
def image_standardization(img):
    return tf.image.per_image_standardization(
        img
    )


## グレースケール
def image_grayscale(img):
    
    # PIL型 -> OpenCV型
    img = np.array(img, dtype=np.uint8)
    img = img[:, :, ::-1]
    
    # グレースケール
    #img_gray, _ = cv2.decolor(img)
    img_gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    
    # PIL型 を返却
    return cv2.cvtColor(img_gray, cv2.COLOR_BGR2RGB)


## 背景除去
def grabCutFirst(img):

    # PIL型 -> OpenCV型
    img = np.array(img, dtype=np.uint8)
    img = img[:, :, ::-1]
    
    height, width = img.shape[:2]
    rect = (15, 15, width-30, height-30)
    
    mask = np.zeros(img.shape[:2],np.uint8)
    bgdModel = np.zeros((1,65),np.float64)
    fgdModel = np.zeros((1,65),np.float64)
    cv2.grabCut(img,mask,rect,bgdModel,fgdModel,5,cv2.GC_INIT_WITH_RECT)
    mask2 = np.where((mask==2)|(mask==0),0,1).astype('uint8')
    output_img = img*mask2[:,:,np.newaxis]   # 背景が[0,0,0]（黒）となる

    # 背景領域の取得
    background = img - output_img

    # 黒から白に変換
    background[np.where((background > [0, 0, 0]).all(axis = 2))] = [255, 255, 255]

    # 合成値 + PIL型 を返却
    return cv2.cvtColor(background + output_img, cv2.COLOR_BGR2RGB)


## クロップ関数
def crop(img, l):

    img = Image.fromarray(img.astype(np.uint8))
    
    # クロップ
    l2 = l // 2     # クロップしたい大きさの半分
    w, h = img.size # 画像の横幅と高さ
    w2 = w // 2     # 横幅の半分
    h2 = h // 2     # 高さの半分
    img = img.crop((w2 - l2, h2 - l2, w2 + l2, h2 + l2))

    # リサイズ｜入力サイズに戻す
    img = img.resize((w, h))
    
    return img


## 学習画像加工・生成関数
def image_transform_for_training(org_image_dir_path, crop_size_list, rotation_range, sum_data_num, valid_data_num):
    
    print('START:image_transform_for_training - ' + org_image_dir_path + ' - ' + str(crop_size_list[0]))
    
    # ImageDataGenerator インスタンス生成
    datagen = ImageDataGenerator(
           rotation_range=rotation_range,
           width_shift_range=0,
           height_shift_range=0,
           shear_range=0,
           zoom_range=0,
           horizontal_flip=False,
           vertical_flip=False)#,
           #preprocessing_function=image_standardization)
    
    i = 0
    valid_iter = random.sample(range(sum_data_num), int(valid_data_num))
    for org_image_file_name in os.listdir(org_image_dir_path):
        
        root, ext = os.path.splitext(org_image_file_name)
        if (ext != '.jpg'):
            continue

        # 学習用・バリデーション用の画像ディレクトリを完全に分ける
        image_dir_path = org_image_dir_path
        if (i in valid_iter):
            image_dir_path = 'valid/' + org_image_dir_path

        #print('image transform for training : ' + org_image_file_name)
            
        # 画像ファイルをPIL形式でオープン
        img = image.load_img(org_image_dir_path + '/' + org_image_file_name)
        # PIL形式をnumpyのndarray形式に変換
        img = image.img_to_array(img)
        # 背景除去
        x = grabCutFirst(img)
        # (height, width, 3) -> (1, height, width, 3)
        x = x.reshape((1,) + x.shape)
        
        # 画像生成
        j = 0
        for d in datagen.flow(x, batch_size=1):
            
            grab_cut_img = grabCutFirst(d[0])
            
            for l in crop_size_list:
                crop_img = image.img_to_array(crop(grab_cut_img, l))
                #std_img = np.array(image_standardization(crop_img))
                std_img = crop_img
                gray_img = image_grayscale(std_img)
                #gray_img = std_img
                save_img('data_augment/' + image_dir_path + '/' + root + '_' + str(l) + '_' + str(j * rotation_range) + ext, Image.fromarray(gray_img.astype(np.uint8)))
            
            j = j + 1
            if ((360/rotation_range) <= j):
                break
                
        i = i + 1

    print('START:image_transform_for_training - ' + org_image_dir_path + ' - ' + str(crop_size_list[0]))


## テスト画像加工関数
def image_transfrom_for_test(org_image_dir_path, crop_size_list):
    
    print('START:image_transform_for_test - ' + org_image_dir_path + ' - ' + str(crop_size_list[0]))
    
    for org_image_file_name in os.listdir(org_image_dir_path):

        root, ext = os.path.splitext(org_image_file_name)
        if (ext != '.jpg'):
            continue
    
        #print('image transform for test : ' + org_image_file_name)
    
        img = image.load_img(org_image_dir_path + '/' + org_image_file_name)
        img = image.img_to_array(img)
        img = grabCutFirst(img) # 背景除去
        
        for l in crop_size_list:

            if (os.path.exists('data_test/' + org_image_dir_path + '/' + str(l)) == False):
                os.makedirs('data_test/' + org_image_dir_path + '/' + str(l))

            crop_img =  image.img_to_array(crop(img, l))
            #std_img = np.array(image_standardization(crop_img))
            std_img = crop_img
            gray_img = image_grayscale(std_img)
            #gray_img = std_img
            save_img('data_test/' + org_image_dir_path + '/' + str(l) + '/' + org_image_file_name,Image.fromarray(gray_img.astype(np.uint8)))
    
    print('END:image_transform_for_test - ' + org_image_dir_path + ' - ' + str(crop_size_list[0]))

In [None]:
# 加工用のクロップサイズ種類
crop_size_training_list = [91, 171, 251]
crop_size_test_list = [91, 171, 251]

# 生成用の回転単位角度
rotation_range = 90

In [None]:
# 加工・生成
processes = []
## 学習画像加工・生成
for l in crop_size_training_list:  
    processes.append(multiprocessing.Process(target=image_transform_for_training, args=('plates/train/cleaned',[l], rotation_range, 20, 20 * 0.3,)))
    processes.append(multiprocessing.Process(target=image_transform_for_training, args=('plates/train/dirty',[l], rotation_range, 20, 20 * 0.3,)))
## テスト画像加工
for l in crop_size_test_list:
    processes.append(multiprocessing.Process(target=image_transfrom_for_test, args=('plates/test',[l],)))

## プロセスの開始
for p in processes:
    p.start()

## プロセス終了まで待つ
for p in processes:
    p.join()  

# PREPARE FOR LEARNING MODEL

In [None]:
# 学習回数等
image_size = (224, 224)
batch_size = 4
epochs = 30

In [None]:
# 学習データ・訓練
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    #plates/train",
    "data_augment/plates/train",
    #labels='inferred', 
    #label_mode='categorical',
    #validation_split=0.3,
    #subset="training",
    seed=1307,
    image_size=image_size,
    batch_size=batch_size,
)

# 学習データ・バリデーション
val_ds = tf.keras.preprocessing.image_dataset_from_directory(
    #plates/train",
    "data_augment/valid/plates/train",
    #labels='inferred', 
    #label_mode='categorical',
    #validation_split=0.3,
    #subset="validation",
    seed=1307,
    image_size=image_size,
    batch_size=batch_size,
)

In [None]:
# 学習データ｜目視確認
plt.figure(figsize=(20, 20))
for images, labels in train_ds.take(1):
    for i in range(batch_size):
        ax = plt.subplot(7, 5, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

In [None]:
# バリデーションデータ｜目視確認
plt.figure(figsize=(20, 20))
for images, labels in val_ds.take(1):
    for i in range(batch_size):
        ax = plt.subplot(7, 5, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")

# LEARNING MODEL

In [None]:
# ディープラーニングモデル
def get_model():
    # 転移モデル
    ## https://keras.io/ja/applications/#resnet50
    input_shape = image_size + (3,)
    model_res = ResNet101(include_top=False, input_shape=input_shape, weights='imagenet')
    
    # 追加レイヤー
    x = model_res.output
    
    x = Flatten()(x)

    x = Dense(256)(x)
    x = Activation('relu')(x)
    x = Dropout(.5)(x)
    
    x = Dense(256)(x)
    x = Activation('relu')(x)
    x = Dropout(.5)(x)
    
    x = Dense(128)(x)
    x = Activation('relu')(x)
    x = Dropout(.5)(x)

    x = Dense(1)(x)
    
    outputs = Activation('sigmoid')(x)

    # 転移モデルの学習はしない
    # 追加した層以外はフリーズする。(パラメータ更新しない)
    for l in model_res.layers[1:]:
        l.trainable = False
    
    # 合成｜転移モデル + 追加レイヤー
    model = Model(model_res.input, outputs)
    
    return model

In [None]:
# モデルインスタンス | 同じモデル内容でアンサンブル学習実施
models = {}
models[0] = get_model()
models[1] = get_model()
models[2] = get_model()
#models[3] = get_model()
#models[4] = get_model()
#models[0].summary()

In [None]:
# 学習
def learning(key, model):

    ## コンパイル
    #model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
    #model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    #model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
    model.compile(optimizer=Adam(decay=0.1), loss='binary_crossentropy', metrics=['binary_accuracy'])

    ## 学習実施
    ### アーリーストッピング設定
    callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss', 
        patience=30
    )
    ### モデル保存設定
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        '/tmp/checkpoint_' + str(key), 
        monitor='val_binary_accuracy', 
        save_best_only=True   # val_binary_accuracy が最良のものを保存する設定
    )
    ### フィッティング
    return model.fit(
        train_ds,
        validation_data=val_ds, 
        epochs=epochs, 
        callbacks=[callback, checkpoint]
    )

In [None]:
# 学習実施
results = {}
for key, model in models.items():
    print('=== model-' + str(key) + ' fiting ===')
    results[key] = learning(key, model)

In [None]:
# 学習概要見える化
result = results[0]
his_range = len(result.history['loss'])

plt.figure(figsize=(15, 5))

plt.subplot(1, 2, 1)
plt.plot(range(1, his_range+1), result.history['binary_accuracy'], label="training")
plt.plot(range(1, his_range+1), result.history['val_binary_accuracy'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(1, his_range+1), result.history['loss'], label="training")
plt.plot(range(1, his_range+1), result.history['val_loss'], label="validation")
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()

# MAKE submission.csv

In [None]:
! ls plates/test/ | head 

In [None]:
# テストジェネレイター
def create_test_generator(l):
    test_datagen = ImageDataGenerator()
    return test_datagen.flow_from_directory(  
        'data_test/plates/test',
        classes=[str(l)],
        target_size = image_size,
        batch_size = 100,
        shuffle = False,        
        class_mode = None)  

In [None]:
# テストジェネレイター生成
test_generators = {}
for l in crop_size_test_list:
    test_generators[str(l)] = create_test_generator(l)

In [None]:
# ナイーブテスト_1

## 画像の読み込み
#img = Image.open('data_test/plates/test/0028.jpg')
#img = img.resize(image_size)
#plt.imshow(img)

## 予測
#img = np.array(img)
#model.predict([img[None,...]])

In [None]:
# ナイーブテスト_2
model = models[0]

## 順番リセット
test_generator = test_generators[str(crop_size_test_list[0])]
test_generator.reset()

## 精度感目視チェック  
for d in test_generator:
    for i in range(30):
        print(model.predict([d[i][None,...]]))
        plt.imshow(d[i].astype(np.uint8))
        plt.show()
    break

In [None]:
# 予測
predicts = {}
for key, model in models.items():
    for key_gen, test_generator in test_generators.items():
        # 順番リセット
        test_generator.reset()
        # 予測
        predicts['model:' + str(key) + ' - inputsize:' + str(key_gen)] = pd.Series(
            np.ravel( # 一次元化
                model.predict_generator(
                    test_generator, 
                    steps = len(test_generator.filenames)
                )
            )
        )

In [None]:
predicts_df = pd.DataFrame(predicts)
predicts_df.head(30)

In [None]:
sub_df = pd.read_csv('../input/platesv2/sample_submission.csv')

In [None]:
f = lambda x: 'dirty' if x > 0.5 else 'cleaned'
sub_df['label'] = pd.DataFrame(
    np.mean(
        predicts_df, 
        axis=1
    )
)
sub_df['label'] = sub_df['label'].apply(f)
sub_df.head(30)

In [None]:
sub_df['label'].value_counts()

In [None]:
sub_df.to_csv('submission.csv', index=False)