In [None]:
import os
import json
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report
import tensorflow as tf
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import to_categorical
import albumentations as A

IMG_SIZE = 224
DATA_FOLDER = '/content/drive/MyDrive/Colab Notebooks/Labeled'

# --- Augmentation ---
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.3),
])

# --- Load and augment images ---
def load_images(data_folder):
    X, y = [], []
    for file in os.listdir(data_folder):
        if file.endswith('.json'):
            with open(os.path.join(data_folder, file)) as f:
                label_data = json.load(f)
            label = label_data['shapes'][0]['label']
            image_path = os.path.join(data_folder, label_data['imagePath'])

            if os.path.exists(image_path):
                img = cv2.imread(image_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

                # Original
                X.append(preprocess_input(img))
                y.append(label)

                # Augmented
                img_aug = augment(image=img)['image']
                X.append(preprocess_input(img_aug))
                y.append(label)
    return np.array(X), np.array(y)

# data loading
X, y = load_images(DATA_FOLDER)

# label encoding
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_cat = to_categorical(y_encoded)

# Splitting the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y_cat, test_size=0.2, stratify=y_cat, random_state=42)

# Resnet base model
base_model = ResNet50(include_top=False, weights='imagenet', input_shape=(IMG_SIZE, IMG_SIZE, 3), pooling='avg')

# Freeze all layers except last 30
for layer in base_model.layers[:-30]:
    layer.trainable = False

# Extract features
X_train_feat = base_model.predict(X_train)
X_test_feat = base_model.predict(X_test)

# MLP classifier with batch norm, dropout
model = Sequential([
    Input(shape=(X_train_feat.shape[1],)),
    Dense(256, activation='relu'),
    BatchNormalization(),
    Dropout(0.5),
    Dense(128, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),
    Dense(y_cat.shape[1], activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Early stopping to prevent overfitting
early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model
model.fit(
    X_train_feat, y_train,
    epochs=50,
    batch_size=32,
    validation_split=0.1,
    callbacks=[early_stop],
    verbose=1
)

# Evaluate
y_pred_probs = model.predict(X_test_feat)
y_pred = np.argmax(y_pred_probs, axis=1)
y_true = np.argmax(y_test, axis=1)

print("Classification Report (CNN + MLP + Fine-tuning):")
print(classification_report(y_true, y_pred, target_names=le.classes_))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m94765736/94765736[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 0us/step
[1m22/22[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m132s[0m 6s/step
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m32s[0m 5s/step
Epoch 1/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 31ms/step - accuracy: 0.2965 - loss: 2.1337 - val_accuracy: 0.3913 - val_loss: 2.2712
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5157 - loss: 1.4309 - val_accuracy: 0.4348 - val_loss: 1.5472
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - accuracy: 0.5526 - loss: 1.1597 - val_accuracy: 0.4058 - val_loss: 1.3864
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14ms/step - accuracy: 0.5867 - loss: 1.0281 - val_accuracy: 0.4493 - 

In [None]:
import os
import shutil

# --- Configuration ---
SOURCE_FOLDER = '/content/drive/MyDrive/Colab Notebooks/Dataset'  # Root folder containing defect folders
DEST_FOLDER = '/content/drive/MyDrive/Colab Notebooks/All_Images'  # Destination folder for all images

# Create destination folder if it doesn't exist
os.makedirs(DEST_FOLDER, exist_ok=True)

# Traverse all directories and subdirectories
for root, dirs, files in os.walk(SOURCE_FOLDER):
    for file in files:
        if file.lower().endswith(('.png', '.jpg', '.jpeg')):  # Image file check
            source_path = os.path.join(root, file)
            dest_path = os.path.join(DEST_FOLDER, file)

            # If there's a filename conflict, rename the file
            base, ext = os.path.splitext(file)
            counter = 1
            while os.path.exists(dest_path):
                dest_path = os.path.join(DEST_FOLDER, f"{base}_{counter}{ext}")
                counter += 1

            shutil.copy(source_path, dest_path)

print(f"✅ All images from nested folders have been moved to: {DEST_FOLDER}")


✅ All images from nested folders have been moved to: /content/drive/MyDrive/Colab Notebooks/All_Images


In [None]:
import os
import cv2
import numpy as np
import json
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from skimage.feature import hog
from imblearn.over_sampling import SMOTE
import albumentations as A

LABELED_FOLDER = '/content/drive/MyDrive/Colab Notebooks/Labeled'
UNLABELED_FOLDER = '/content/drive/MyDrive/Colab Notebooks/All_Images'
IMG_SIZE = 128

# Data Augmentation
augment = A.Compose([
    A.HorizontalFlip(p=0.5),
    A.RandomBrightnessContrast(p=0.2),
    A.Rotate(limit=15, p=0.3),
])

# HOG feature extractor
def preprocess_with_hog(img):
    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    features = hog(img, pixels_per_cell=(8, 8), cells_per_block=(2, 2), feature_vector=True)
    return features

# loading labbeled dataset
def load_labeled_data(data_folder):
    X, y = [], []
    for file in os.listdir(data_folder):
        if file.endswith('.json'):
            with open(os.path.join(data_folder, file)) as f:
                label_data = json.load(f)
            label = label_data['shapes'][0]['label']
            image_path = os.path.join(data_folder, label_data['imagePath'])

            if os.path.exists(image_path):
                img = cv2.imread(image_path)
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))

                # Original image
                X.append(preprocess_with_hog(img))
                y.append(label)

                # Augmented
                img_aug = augment(image=img)['image']
                X.append(preprocess_with_hog(img_aug))
                y.append(label)
    return np.array(X), np.array(y)

# Generate synthetic non-defective samples from folder
def generate_non_defective_samples_from_folder(folder):
    features = []
    for file in os.listdir(folder):
        if file.endswith(('.png', '.jpg', '.jpeg')):
            path = os.path.join(folder, file)
            img = cv2.imread(path)
            if img is not None:
                img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                img = cv2.GaussianBlur(img, (5, 5), 0)  # simulate non-defect
                features.append(preprocess_with_hog(img))
    return np.array(features)

# Load labeled data
X, y = load_labeled_data(LABELED_FOLDER)

# Generate synthetic non-defective images from defective images
X_non_defect = generate_non_defective_samples_from_folder(UNLABELED_FOLDER)

# Combine defective and synthetic non-defective samples
X_train = np.concatenate([X, X_non_defect])
y_train = np.concatenate([y, ['non_defective'] * len(X_non_defect)])

# Split + SMOTE
X_train, X_test, y_train, y_test = train_test_split(X_train, y_train, test_size=0.2, stratify=y_train, random_state=42)
smote = SMOTE(random_state=42)
X_train_bal, y_train_bal = smote.fit_resample(X_train, y_train)

# Train initial RF model
rf = RandomForestClassifier(n_estimators=300, random_state=42)
rf.fit(X_train_bal, y_train_bal)

# Evaluate
y_pred = rf.predict(X_test)
print("=== Final Evaluation on Test Set ===")
print(classification_report(y_test, y_pred))


=== Final Evaluation on Test Set ===
               precision    recall  f1-score   support

   exc_solder       0.63      0.53      0.58        32
         good       0.79      0.58      0.67        65
      no_good       0.76      0.53      0.62        47
non_defective       0.86      1.00      0.93       328
  poor_solder       1.00      0.25      0.40        12
        spike       0.67      0.40      0.50        15

     accuracy                           0.83       499
    macro avg       0.78      0.55      0.62       499
 weighted avg       0.83      0.83      0.82       499

