In [3]:
# 사용할 모듈 불러오기
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import shutil
import os

from pathlib import Path
from tqdm import tqdm
from rembg import remove, new_session

import tensorflow as tf
import keras

In [None]:
import zipfile

# ZIP 파일 압축을 해제, 현재 경로에 추출
with zipfile.ZipFile('/kaggle/input/platesv2/plates.zip', 'r') as zip_obj:
   zip_obj.extractall('/kaggle/working/')

In [None]:
data_root = '/kaggle/working/plates/'

In [None]:
session = new_session()
labels = ['cleaned', 'dirty']

# 파일 별 위치 탐지
for dir_name in ['train', 'val']:
    for l in labels:
        os.makedirs(os.path.join(dir_name, l), exist_ok=True)

# 파일 열어보기 (훈련용 파일)
for l in labels:
    for i, file in enumerate(tqdm(Path(f"/kaggle/working/plates/train/{l}").glob('*.jpg'))):
        input_path = str(file)
        if i % 5 == 0:
            output_path = f"/kaggle/working/val/{l}/{file.stem}.jpg"
        else:
            output_path = f"/kaggle/working/train/{l}/{file.stem}.jpg"
        with open(input_path, 'rb') as i:
            with open(output_path, 'wb') as o:
                input = i.read()
                output = remove(input, session=session)
                o.write(output)

In [None]:
os.makedirs("test/unknown", exist_ok=True)

session = new_session()

for i, file in enumerate(tqdm(Path("/kaggle/working/plates/test").glob('*.jpg'))):
        input_path = str(file)
        output_path = f"/kaggle/working/test/unknown/{file.stem}.jpg"
        with open(input_path, 'rb') as i:
            with open(output_path, 'wb') as o:
                input = i.read()
                output = remove(input, session=session)
                o.write(output)

In [None]:
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
        rotation_range=10,
        width_shift_range=0.1,
        height_shift_range=0.1,
        zoom_range=0.2,
        horizontal_flip=True,
        vertical_flip = True
        )

train_ds = train_datagen.flow_from_directory(
        "./train",
        target_size=(224, 224),
        keep_aspect_ratio=True,
        batch_size=32,
        class_mode='binary',
        shuffle=True)

val_datagen = tf.keras.preprocessing.image.ImageDataGenerator()

val_ds = val_datagen.flow_from_directory(
        "./val",
        target_size=(224, 224),
        keep_aspect_ratio=True,
        batch_size=8,
        class_mode='binary',
        shuffle=False)

In [None]:
def get_image_and_label_batch(dataset, n, labels=None):
    plt.figure(figsize=(3 * min(n, 8), 4 * (n // 8 + 1)))
    if labels is None:
        images, labels = next(dataset)
    else:
        images, _ = next(dataset)
    for i, (img, l) in enumerate(zip(images[:n], labels[:n])):
        ax = plt.subplot(n // 8 + 1, min(n, 8), i + 1)
        plt.imshow(img.astype('uint8'))
        plt.title(l)
        plt.axis("off")

In [None]:
get_image_and_label_batch(train_ds, 4)

In [None]:
get_image_and_label_batch(val_ds, 4)

In [None]:
base_model = keras.applications.ResNet152(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(224, 224, 3),
    include_top=False,  # Do not include the ImageNet classifier at the top.
)

# Freeze the base_model
base_model.trainable = False

# Create new model on top
inputs = keras.Input(shape=(224, 224, 3))

# specific ResNet preprocessing
x = keras.applications.resnet.preprocess_input(inputs)

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)

x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dense(400, activation='relu')(x)
x = keras.layers.Dropout(0.25)(x)  # Regularize with dropout
outputs = keras.layers.Dense(1, activation='sigmoid')(x)

model = keras.Model(inputs, outputs)

model.compile(loss='binary_crossentropy', 
              optimizer = keras.optimizers.Adam(learning_rate=0.0003, amsgrad=True), 
              metrics=['binary_accuracy'])

model.summary()

In [None]:
cb_early_stopper = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 7)

hist = model.fit(train_ds,
          validation_data=val_ds,
          epochs=200,
          callbacks=[cb_early_stopper])

In [None]:
history_frame = pd.DataFrame(hist.history)
history_frame.loc[:, ['loss', 'val_loss']].plot()
history_frame.loc[:, ['binary_accuracy', 'val_binary_accuracy']].plot()

In [None]:
shutil.copytree('plates/test', 'test/unknown', dirs_exist_ok=True)

In [None]:
test_datagen = tf.keras.preprocessing.image.ImageDataGenerator()
test_ds = test_datagen.flow_from_directory(
        './test',
        target_size=(224, 224),
        keep_aspect_ratio=True,
        batch_size=32,
        shuffle=False)

In [None]:
test_ds.reset()
preds = model.predict(test_ds, verbose=True)

In [None]:
preds[:10]

In [None]:
test_ds.reset()
get_image_and_label_batch(test_ds, 4, labels=preds)

In [None]:
labels = ['dirty' if x > 0.5 else 'cleaned' for x in preds]
labels[:8]

In [None]:
submission_df = pd.read_csv('/kaggle/input/platesv2/sample_submission.csv')

In [None]:
submission_df['label'] = labels
submission_df

In [None]:
submission_df.to_csv('submission.csv', index=False)