In [1]:
import os
import numpy as np
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [2]:
pip install imbalanced-learn scikit-learn


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip


In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import train_test_split


In [4]:
train_data_dir = '../Dataset/train'
validation_data_dir = '../Dataset/validation'
test_data_dir = '../Dataset/test'

In [5]:
# Define batch size
batch_size = 32

In [6]:
# Image data generators with preprocessing and augmentation for training
train_datagen = ImageDataGenerator(
    rescale=1./255,  # Normalize pixel values
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

In [7]:
# Image data generator for validation and testing (only rescaling)
validation_datagen = ImageDataGenerator(rescale=1./255)
test_datagen = ImageDataGenerator(rescale=1./255)

In [8]:
# Flow training images in batches using data generator
train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',  # for binary classification
    shuffle=True  # Shuffle the data
)

Found 593 images belonging to 2 classes.


In [9]:
# Flow validation images in batches using data generator
validation_generator = validation_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',  # for binary classification
    shuffle=False  # No need to shuffle for validation
)

Found 74 images belonging to 2 classes.


In [12]:
# Get the X and y data from the generator
X_train, y_train = train_generator.next()  # Assuming you are using a generator

# Split the data into defect and non-defect classes
defect_indices = np.where(y_train == 1)[0]
non_defect_indices = np.where(y_train == 0)[0]

# Oversample the defect class manually by duplicating samples
oversampled_defect_indices = np.random.choice(defect_indices, size=len(non_defect_indices), replace=True)
oversampled_indices = np.concatenate((oversampled_defect_indices, non_defect_indices))

# Shuffle the indices
np.random.shuffle(oversampled_indices)

# Get the oversampled data
X_train_oversampled = X_train[oversampled_indices]
y_train_oversampled = y_train[oversampled_indices]

# Reshape X_train_oversampled back to images
X_train_oversampled_images = X_train_oversampled.reshape(-1, 224, 224, 3)

# Re-create the generator with oversampled data
oversampled_train_generator = train_datagen.flow(X_train_oversampled_images, y_train_oversampled, batch_size=batch_size)

In [13]:
test_generator = test_datagen.flow_from_directory(
    test_data_dir,
    target_size=(224, 224),
    batch_size=batch_size,
    class_mode='binary',  # for binary classification
    shuffle=False  # No need to shuffle for testing
)

Found 71 images belonging to 2 classes.


In [14]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout

In [15]:
# Define the CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(224, 224, 3)),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D((2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Binary classification (defect or non-defect)
])

In [16]:
# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [17]:
history = model.fit(
    train_generator,
    steps_per_epoch=len(oversampled_train_generator),
    epochs=10,
    validation_data=validation_generator,
    validation_steps=len(validation_generator)
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [18]:
# Evaluate the model on test data
test_loss, test_accuracy = model.evaluate(test_generator, steps=len(test_generator))
print(f'Test Loss: {test_loss}, Test Accuracy: {test_accuracy}')

Test Loss: 0.8743811249732971, Test Accuracy: 0.2535211145877838


In [23]:
from tensorflow.keras.preprocessing import image
import numpy as np

# Load and preprocess the image
img_path = '../Dataset/test/Open Seam defect dataset/2024_04_02_11_17_IMG_8773.JPG'
img = image.load_img(img_path, target_size=(224, 224))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)  # Add batch dimension
img_array /= 255.  # Normalize pixel values

# Make predictions
prediction = model.predict(img_array)
prediction_label = 'Open-Defect' if prediction >= 0.5 else 'Non-defect'
prediction_prob = prediction[0][0] if prediction_label == 'Open-Defect' else 1 - prediction[0][0]

print(f'Prediction: {prediction_label}')
print(f'Probability: {prediction_prob}')


Prediction: Non-defect
Probability: 0.5493984222412109
