# Defect Classifier


In [None]:
import os, cv2, numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras import layers
import xml.etree.ElementTree as ET


In [35]:
DATA = Path('/Users/willekjellberg/.cache/kagglehub/datasets/zhangyunsheng/defects-class-and-location/versions/1')
CLASSES = ['crease', 'crescent_gap', 'inclusion', 'oil_spot', 'punching_hole', 'rolled_pit', 'silk_spot']

def load(path):
    img = cv2.imread(str(path), 0)
    if img is None: return None, None
    
    # Location from XML
    xml = DATA / 'label/label' / (path.stem + '.xml')
    loc = np.array([0.5, 0.5, 0.5, 0.5], np.float32)
    if xml.exists():
        try:
            root = ET.parse(xml).getroot()
            w, h = float(root.find('.//width').text), float(root.find('.//height').text)
            box = root.find('.//bndbox')
            xmin, ymin = float(box.find('xmin').text)/w, float(box.find('ymin').text)/h
            xmax, ymax = float(box.find('xmax').text)/w, float(box.find('ymax').text)/h
            loc = np.array([(xmin+xmax)/2, (ymin+ymax)/2, xmax-xmin, ymax-ymin], np.float32)
        except: pass
    
    # Preprocess
    mask = img > 10
    if mask.any():
        r, c = np.where(mask)
        img = img[r.min():r.max()+1, c.min():c.max()+1]
    img = cv2.resize(img, (128, 128))
    img = cv2.createCLAHE(2.0, (8,8)).apply(img).astype(np.float32)
    m, s = img.mean(), img.std()
    if s > 0: img = (img - m) / s
    mn, mx = img.min(), img.max()
    if mx > mn: img = (img - mn) / (mx - mn)
    return img[..., None], loc


In [36]:
# Load
X_img, X_loc, y = [], [], []
for i, cls in enumerate(CLASSES):
    for p in (DATA / 'images/images' / cls).glob('*.jpg'):
        img, loc = load(p)
        if img is not None:
            X_img.append(img); X_loc.append(loc); y.append(i)

X_img, X_loc, y = np.array(X_img), np.array(X_loc), np.array(y)
X_img_train, X_img_test, X_loc_train, X_loc_test, y_train, y_test = train_test_split(
    X_img, X_loc, y, test_size=0.2, random_state=42, stratify=y)
class_weights = {i: len(y_train)/(len(CLASSES)*c) for i,c in enumerate(np.bincount(y_train))}
print(f"Loaded {len(y)} images")


Loaded 1598 images


In [None]:
# Model (improved)
img_in = layers.Input((128,128,1), name='image')
loc_in = layers.Input((4,), name='location')

x = layers.RandomRotation(.3)(img_in)
x = layers.RandomTranslation(.2,.2)(x)
x = layers.RandomZoom(.2)(x)
x = layers.RandomFlip('horizontal')(x)

# Deeper network with batch norm
for f in [32,32,64,64,128,128,256]:
    x = layers.Conv2D(f, 3, padding='same', use_bias=False)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    if f in [32,64,128]: x = layers.MaxPooling2D()(x); x = layers.Dropout(.3)(x)
x = layers.GlobalAveragePooling2D()(x)

# Stronger location branch
loc = layers.Dense(64, 'relu')(layers.Dense(32, 'relu')(loc_in))
loc = layers.Dropout(.3)(loc)

x = layers.Concatenate()([x, loc])
x = layers.Dense(256, 'relu')(x)
x = layers.Dropout(.5)(x)
x = layers.Dense(128, 'relu')(x)
x = layers.Dropout(.5)(x)

model = tf.keras.Model([img_in, loc_in], layers.Dense(len(CLASSES), 'softmax')(x))
model.compile(tf.keras.optimizers.Adam(1e-3), 'sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Train
train = tf.data.Dataset.from_tensor_slices(({'image': X_img_train, 'location': X_loc_train}, y_train)).shuffle(2048).batch(32).prefetch(2)
val = tf.data.Dataset.from_tensor_slices(({'image': X_img_test, 'location': X_loc_test}, y_test)).batch(32).prefetch(2)

model.fit(train, validation_data=val, epochs=int(os.getenv("EPOCHS", "40")), class_weight=class_weights,
    callbacks=[tf.keras.callbacks.EarlyStopping('val_accuracy', 20, restore_best_weights=True),
               tf.keras.callbacks.ReduceLROnPlateau('val_accuracy', .5, 7, min_lr=1e-7)])


Epoch 1/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 87ms/step - accuracy: 0.2183 - loss: 1.9125 - val_accuracy: 0.4594 - val_loss: 1.7428 - learning_rate: 0.0010
Epoch 2/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.3881 - loss: 1.8170 - val_accuracy: 0.4500 - val_loss: 1.6642 - learning_rate: 0.0010


<keras.src.callbacks.history.History at 0x139d9bdd0>

In [None]:
# Eval & confusion matrix
acc = model.evaluate({'image': X_img_test, 'location': X_loc_test}, y_test, verbose=0)[1]
cm = tf.math.confusion_matrix(y_test, model.predict({'image': X_img_test, 'location': X_loc_test}, verbose=0).argmax(1), len(CLASSES)).numpy()
cm_norm = cm / np.maximum(cm.sum(1, keepdims=True), 1)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 5))
for ax, data, title in [(ax1, cm, 'Count'), (ax2, cm_norm, 'Normalized')]:
    im = ax.imshow(data, cmap='Blues')
    ax.set_xticks(range(len(CLASSES))); ax.set_yticks(range(len(CLASSES)))
    ax.set_xticklabels(CLASSES, rotation=45, ha='right')
    ax.set_yticklabels(CLASSES)
    ax.set_xlabel('Predicted'); ax.set_ylabel('True')
    ax.set_title(f'{title} - Acc: {acc:.1%}')
    for i in range(len(CLASSES)):
        for j in range(len(CLASSES)):
            ax.text(j, i, f'{data[i,j]:.0f}' if title=='Count' else f'{data[i,j]:.2f}',
                   ha='center', va='center', color='white' if data[i,j]>data.max()/2 else 'black', fontsize=9)
    plt.colorbar(im, ax=ax)
plt.tight_layout(); plt.show()


Accuracy: 45.9%

Confusion (row=true, col=pred):
 [[  4   0   0   0   0   0   7]
 [  2   0   0   0   7   0  36]
 [  0   0   0   0  26   0  17]
 [  0   0   0   0  26   0  15]
 [  0   0   0   0  33   0  11]
 [  0   0   0   0   0   0   6]
 [  0   0   0   0  20   0 110]]

Normalized:
 [[0.36 0.   0.   0.   0.   0.   0.64]
 [0.04 0.   0.   0.   0.16 0.   0.8 ]
 [0.   0.   0.   0.   0.6  0.   0.4 ]
 [0.   0.   0.   0.   0.63 0.   0.37]
 [0.   0.   0.   0.   0.75 0.   0.25]
 [0.   0.   0.   0.   0.   0.   1.  ]
 [0.   0.   0.   0.   0.15 0.   0.85]]


In [40]:
model.save('defect_classifier_model.keras')
