In [1]:
import os
import cv2
import numpy as np

from pathlib import Path
from config import DATA_FOLDER
from sklearn.model_selection import train_test_split

BASE_PATH = Path("..")
data_path = BASE_PATH.joinpath(DATA_FOLDER)
tdd_path = data_path / "MVTecAD" / "hazelnut"

### Unsupervised Train and Validation

In [2]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        img = cv2.imread(os.path.join(folder,filename))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        if img is not None:
            images.append(img)
    return np.asarray(images)

# read training images
x_train = load_images_from_folder(tdd_path / "train" / "good")
y_train = np.zeros(x_train.shape[0])

In [3]:
x_train.shape, y_train.shape

((391, 1024, 1024, 3), (391,))

In [4]:
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, random_state=42, test_size=0.125, stratify=y_train)

In [5]:
np.savez(tdd_path / 'train', x=x_train, y=y_train)
x_train.shape, y_train.shape

((342, 1024, 1024, 3), (342,))

In [6]:
np.savez(tdd_path / 'val', x=x_val, y=y_val)
x_val.shape, y_val.shape

((49, 1024, 1024, 3), (49,))

### Supervised Test

In [7]:
x_test = []
y_test = []
y_mask_test = []

for root, dirs, files in os.walk(tdd_path / "test"):
    for file in files:
        img = cv2.imread(str(Path(root) / Path(file)))
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        if Path(root).name == "good":
            mask = np.zeros(img.shape).mean(axis=-1)
        else:
            mask_path = str(Path(tdd_path) / "ground_truth" / Path(root).name / file.replace('.png', '_mask.png'))
            mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)

        x_test.append(img)
        y_test.append(Path(root).name)
        y_mask_test.append(mask)

x_test = np.asarray(x_test)
y_test = np.asarray(y_test)
y_mask_test = np.asarray(y_mask_test)

In [8]:
x_test.shape, y_test.shape, y_mask_test.shape

((110, 1024, 1024, 3), (110,), (110, 1024, 1024))

In [9]:
classes = ['good','crack','cut','hole','print']

def convert_labels(labels, classes):
    return [classes.index(label) for label in labels]

In [10]:
y_test = convert_labels(y_test, classes)

In [11]:
np.savez(tdd_path / 'test', x=x_test, y=y_test, z=y_mask_test)