In [None]:
#Imports
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

plt.style.use('ggplot')
import os
import keras

from keras import Sequential
from keras.src.optimizers import Adam
from keras.src.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.src.callbacks import ModelCheckpoint
from keras.src.saving import load_model
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
#Download Dataset

# path = kagglehub.dataset_download("ravirajsinh45/real-life-industrial-dataset-of-casting-product")
#print("Path to dataset files:", path)

In [None]:
#Check Dataset
# Specify directory of train data

train_dir = 'dataset/casting_data/train/'
#train_dir = "../kaggle/input/real-life-industrial-dataset-of-casting-product/casting_data/casting_data/train/"
train_def_dir = train_dir + 'def_front/'  # Label: Defective
train_ok_dir = train_dir + 'ok_front/'  # Label: OK

# Specify directory of test data
test_dir = 'dataset/casting_data/test/'
#test_dir = "../kaggle/input/real-life-industrial-dataset-of-casting-product/casting_data/casting_data/test/"
test_def_dir = test_dir + 'def_front/'
test_ok_dir = test_dir + 'ok_front/'

# Plot samples of defective and non-defective casting
fig, axes = plt.subplots(1, 2, figsize=(8, 4))
sample_def = plt.imread(train_def_dir + os.listdir(train_def_dir)[0])
sample_ok = plt.imread(train_ok_dir + os.listdir(train_ok_dir)[0])
axes[0].imshow(sample_def)
axes[1].imshow(sample_ok)
axes[0].set_title('Casting Sample: Defective', loc='left')
axes[1].set_title('Casting Sample: OK', loc='left')
axes[0].grid(False)
axes[1].grid(False)
plt.show()

In [None]:
#Check Dataset for Data Imbalance
train_len = [len(next(os.walk(train_ok_dir))[2]), len(next(os.walk(train_def_dir))[2])]
test_len = [len(next(os.walk(test_ok_dir))[2]), len(next(os.walk(test_def_dir))[2])]
ok_pct = (train_len[0] + test_len[0]) / (train_len[0] + test_len[0] + train_len[1] + test_len[1]) * 100
def_pct = (train_len[1] + test_len[1]) / (train_len[0] + test_len[0] + train_len[1] + test_len[1]) * 100

labels = [f'OK ({ok_pct:.2f}%)', f'Defective ({def_pct:.2f}%)']

x = range(len(labels))

plt.figure(figsize=(8, 6))
plt.bar(x, train_len, label='Training Set')
plt.bar(x, test_len, bottom=train_len, label='Test Set')

plt.xticks(x, labels)
plt.ylabel('Number of Files')
plt.title('File Distribution by Label and Dataset')
plt.legend()

plt.tight_layout()
plt.show()


In [None]:
train_generator = ImageDataGenerator(rescale=1. / 255, validation_split=0.2)
test_generator = ImageDataGenerator(rescale=1. / 255)

In [None]:
# Specify parameters/arguments for data generation

img_size, batch_size, rand_seed = (300, 300), 64, 0

arg_train = {'target_size': img_size,
             'color_mode': 'grayscale',
             'classes': {'ok_front': 0,
                         'def_front': 1},
             'class_mode': 'binary',
             'batch_size': batch_size,
             'seed': rand_seed}

arg_test = {'target_size': img_size,
            'color_mode': 'grayscale',
            'classes': {'ok_front': 0,
                        'def_front': 1},
            'class_mode': 'binary',
            'batch_size': batch_size,
            'seed': rand_seed,
            'shuffle': False}

In [None]:
# Generate data by iterating through directories
train_set = train_generator.flow_from_directory(
    directory=train_dir, subset='training', **arg_train)

valid_set = train_generator.flow_from_directory(
    directory=train_dir, subset='validation', **arg_train)

test_set = test_generator.flow_from_directory(
    directory=test_dir, **arg_test)

In [None]:
# Define CNN model architecture
cnn_model = Sequential([
    # First block
    Conv2D(32, 3, activation='relu', padding='same', strides=2, input_shape=img_size + (1,)),
    MaxPooling2D(pool_size=2, strides=2),

    # Second block
    Conv2D(64, 3, activation='relu', padding='same', strides=2),
    MaxPooling2D(pool_size=2, strides=2),

    # Flatenning
    Flatten(),

    # Fully connected layers
    Dense(128, activation='relu'),
    Dense(1, activation='sigmoid')  # Only 1 output
])

# Compile model
cnn_model.compile(
    optimizer=Adam(learning_rate=0.001),  # Default lr
    loss='binary_crossentropy',
    metrics=['accuracy'])

# Display summary of model architecture
cnn_model.summary()

In [None]:
# Fit model using train set and validation set
n_epochs = 20
cnn_model.fit(
    train_set,
    validation_data=valid_set,
    epochs=n_epochs,
    callbacks=ModelCheckpoint(
        'models/CNN_Casting_Inspection.keras',
        save_best_only=True,
        monitor='val_loss'),
    verbose=1)

In [None]:
# Plot learning curve from model history
histo_dict = cnn_model.history.history
histo_df = pd.DataFrame(histo_dict, index=range(1, n_epochs + 1))
fig, ax = plt.subplots(figsize=(8, 5))
for m in histo_df.columns:
    ax.plot(histo_df.index, m, data=histo_df)
ax.set_xlabel('Epoch')
ax.set_title('Learning Curve', loc='left', weight='bold')
ax.legend()
plt.show()

In [None]:
# Load saved model
best_model = load_model('models/CNN_Casting_Inspection.keras')

# Make predictions on images in the test set
y_pred_prob = best_model.predict(test_set, verbose=1)
y_pred = (y_pred_prob >= 0.5).reshape(-1, )
y_true = test_set.classes[test_set.index_array]

In [None]:
# Visualize the confusion matrix
fig, ax = plt.subplots(figsize=(4, 3))
ax = sns.heatmap(confusion_matrix(y_true, y_pred), annot=True,
                 annot_kws={'size': 14, 'weight': 'bold'},
                 fmt='d', cbar=False, cmap='Blues')
ax.set_xticklabels(['OK', 'Defective'])
ax.set_yticklabels(['OK', 'Defective'], va='center')
plt.tick_params(axis='both', labelsize=14, length=0)
plt.ylabel('Actual', size=14, weight='bold')
plt.xlabel('Predicted', size=14, weight='bold')
plt.show()

In [None]:
print(classification_report(y_true, y_pred, digits=4))

In [None]:
class_map = {0: 'OK', 1: 'Defective'}
images, labels = next(iter(test_set))
images = images.reshape(batch_size, *img_size)

fig, axes = plt.subplots(1, 3, figsize=(9, 4))
fig.suptitle('Prediction on Test Images', y=0.98, weight='bold', size=14)
for ax, img, label in zip(axes.flat, images, labels):
    ax.imshow(img, cmap='gray')
    [[pred_prob]] = best_model.predict(img.reshape(1, *img_size, -1))
    pred_label = class_map[int(pred_prob >= 0.5)]
    true_label = class_map[label]
    prob_class = 100 * pred_prob if pred_label == 'Defective' else 100 * (1 - pred_prob)
    ax.set_title(f'Actual: {true_label}', size=12)
    ax.set_xlabel(f'Predicted: {pred_label} ({prob_class:.2f}%)',
                  color='g' if pred_label == true_label else 'r')
    ax.set_xticks([])
    ax.set_yticks([])
plt.tight_layout()
plt.show()

In [None]:
misclassified = np.nonzero(y_pred != y_true)[0]
batch_num = misclassified // batch_size
image_num = misclassified % batch_size

fig, axes = plt.subplots(1, 2, figsize=(12, 4))
fig.suptitle('Misclassified Test Images', y=0.98, weight='bold', size=14)
for ax, bnum, inum in zip(axes.flat, batch_num, image_num):
    images, labels = test_set[bnum]
    img = images[inum]
    ax.imshow(img.reshape(*img_size), cmap='gray')
    [[pred_prob]] = best_model.predict(img.reshape(1, *img_size, -1))
    pred_label = class_map[int(pred_prob >= 0.5)]
    true_label = class_map[labels[inum]]
    prob_class = 100 * pred_prob if pred_label == 'Defective' else 100 * (1 - pred_prob)
    ax.set_title(f'Actual: {true_label}', size=12)
    ax.set_xlabel(f'Predicted: {pred_label} ({prob_class:.2f}%)',
                  color='g' if pred_label == true_label else 'r')
    ax.set_xticks([])
    ax.set_yticks([])
plt.tight_layout()
plt.show()