In [1]:
# ============================
# BLOCK 1 — Setup + Dataset
# ============================

!pip -q install gdown

import os, zipfile, numpy as np, tensorflow as tf, random

# Set seeds for stable training
SEED = 42
tf.random.set_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)
os.environ['PYTHONHASHSEED'] = str(SEED)

# Dataset zip path
DATA_ZIP = "/content/dataset_zip/dataset.zip"
URL = "https://drive.google.com/uc?id=1SrhdI_vgvEmhWwYTzzSx0FFTSt1POemk"

# Create folder
os.makedirs("/content/dataset_zip", exist_ok=True)

# Download if not exists
if not os.path.exists(DATA_ZIP):
    import gdown
    print("Downloading dataset...")
    gdown.download(URL, DATA_ZIP, quiet=False)
else:
    print("Dataset zip already exists.")

# Extract dataset
if not os.path.exists("/content/dataset/CAI-SWTB-Dataset"):
    print("Extracting...")
    os.makedirs("/content/dataset", exist_ok=True)
    with zipfile.ZipFile(DATA_ZIP, 'r') as z:
        z.extractall("/content/dataset")
else:
    print("Dataset already extracted.")

# Path check
train_dir = "/content/dataset/CAI-SWTB-Dataset/Train"
test_dir  = "/content/dataset/CAI-SWTB-Dataset/Test"

print("\nTrain folder exists →", os.path.exists(train_dir))
print("Test folder exists →", os.path.exists(test_dir))

print("\nTRAIN SUBFOLDERS:", os.listdir(train_dir))
print("TEST SUBFOLDERS:", os.listdir(test_dir))


Downloading dataset...


Downloading...
From (original): https://drive.google.com/uc?id=1SrhdI_vgvEmhWwYTzzSx0FFTSt1POemk
From (redirected): https://drive.google.com/uc?id=1SrhdI_vgvEmhWwYTzzSx0FFTSt1POemk&confirm=t&uuid=cdf7cd95-9b57-4e65-abf5-95d59f5a15d5
To: /content/dataset_zip/dataset.zip
100%|██████████| 178M/178M [00:02<00:00, 76.4MB/s]


Extracting...

Train folder exists → True
Test folder exists → True

TRAIN SUBFOLDERS: ['normal', 'Fault']
TEST SUBFOLDERS: ['normal', 'Fault']


In [2]:
# ============================
# BLOCK 2 — Data Generators
# ============================

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input
from sklearn.utils.class_weight import compute_class_weight

IMG = 128
BATCH = 16

train_aug = ImageDataGenerator(
    preprocessing_function=preprocess_input,
    rotation_range=20,
    width_shift_range=0.12,
    height_shift_range=0.12,
    zoom_range=0.15,
    brightness_range=[0.8,1.2],
    horizontal_flip=True,
    validation_split=0.18
)

test_aug = ImageDataGenerator(preprocessing_function=preprocess_input)

train_gen = train_aug.flow_from_directory(
    train_dir,
    target_size=(IMG,IMG),
    batch_size=BATCH,
    class_mode='categorical',
    subset='training',
    shuffle=True,
    seed=42
)

val_gen = train_aug.flow_from_directory(
    train_dir,
    target_size=(IMG,IMG),
    batch_size=BATCH,
    class_mode='categorical',
    subset='validation',
    shuffle=False,
    seed=42
)

test_gen = test_aug.flow_from_directory(
    test_dir,
    target_size=(IMG,IMG),
    batch_size=BATCH,
    class_mode='categorical',
    shuffle=False
)

print("\nClass indices:", train_gen.class_indices)

# Class weight (just in case)
labels = train_gen.classes
cw = compute_class_weight(class_weight='balanced', classes=np.unique(labels), y=labels)
class_weights = {i: cw[i] for i in range(len(cw))}
print("Class weights:", class_weights)


Found 3444 images belonging to 2 classes.
Found 756 images belonging to 2 classes.
Found 1200 images belonging to 2 classes.

Class indices: {'Fault': 0, 'normal': 1}
Class weights: {0: np.float64(1.0), 1: np.float64(1.0)}


In [3]:
# ============================
# BLOCK 3 — Train Model
# ============================

from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.layers import Dense, Dropout, GlobalAveragePooling2D, Input
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping

# Build model
base = MobileNetV2(weights='imagenet', include_top=False, input_tensor=Input(shape=(IMG,IMG,3)))
base.trainable = False

x = base.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.4)(x)
outputs = Dense(2, activation='softmax')(x)

model = Model(inputs=base.input, outputs=outputs)
model.compile(optimizer=Adam(1e-4), loss="categorical_crossentropy", metrics=["accuracy"])

model.summary()

# Callbacks
os.makedirs("/content/checkpoints", exist_ok=True)
ck1 = ModelCheckpoint("/content/checkpoints/stage1.h5", monitor='val_accuracy', save_best_only=True)
reduce = ReduceLROnPlateau(monitor='val_loss', factor=0.3, patience=3)
early = EarlyStopping(monitor='val_accuracy', patience=4, restore_best_weights=True)

print("\n=== TRAINING STAGE 1 ===")
hist1 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=5,
    class_weight=class_weights,
    callbacks=[ck1, reduce, early]
)

# Unfreeze last layers
N = 40
for layer in base.layers[-N:]:
    layer.trainable = True

model.compile(optimizer=Adam(1e-5), loss="categorical_crossentropy", metrics=["accuracy"])
ck2 = ModelCheckpoint("/content/checkpoints/stage2.h5", monitor='val_accuracy', save_best_only=True)

print("\n=== TRAINING STAGE 2 ===")
hist2 = model.fit(
    train_gen,
    validation_data=val_gen,
    epochs=6,
    class_weight=class_weights,
    callbacks=[ck2, reduce, early]
)


  base = MobileNetV2(weights='imagenet', include_top=False, input_tensor=Input(shape=(IMG,IMG,3)))


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step



=== TRAINING STAGE 1 ===


  self._warn_if_super_not_called()


Epoch 1/5
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 253ms/step - accuracy: 0.5891 - loss: 0.8436



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m75s[0m 318ms/step - accuracy: 0.5894 - loss: 0.8430 - val_accuracy: 0.6151 - val_loss: 0.7457 - learning_rate: 1.0000e-04
Epoch 2/5
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 306ms/step - accuracy: 0.7389 - loss: 0.5518 - val_accuracy: 0.6071 - val_loss: 0.7874 - learning_rate: 1.0000e-04
Epoch 3/5
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m66s[0m 305ms/step - accuracy: 0.7756 - loss: 0.4732 - val_accuracy: 0.6124 - val_loss: 0.7727 - learning_rate: 1.0000e-04
Epoch 4/5
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m65s[0m 302ms/step - accuracy: 0.7972 - loss: 0.4320 - val_accuracy: 0.5714 - val_loss: 0.8335 - learning_rate: 1.0000e-04
Epoch 5/5
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 256ms/step - accuracy: 0.8080 - loss: 0.4130



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 309ms/step - accuracy: 0.8081 - loss: 0.4129 - val_accuracy: 0.6336 - val_loss: 0.7218 - learning_rate: 3.0000e-05

=== TRAINING STAGE 2 ===
Epoch 1/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 358ms/step - accuracy: 0.7424 - loss: 0.5189



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 420ms/step - accuracy: 0.7424 - loss: 0.5188 - val_accuracy: 0.6349 - val_loss: 0.7125 - learning_rate: 1.0000e-05
Epoch 2/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 356ms/step - accuracy: 0.7862 - loss: 0.4422



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m140s[0m 410ms/step - accuracy: 0.7862 - loss: 0.4422 - val_accuracy: 0.6812 - val_loss: 0.6909 - learning_rate: 1.0000e-05
Epoch 3/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 407ms/step - accuracy: 0.8064 - loss: 0.4286 - val_accuracy: 0.6653 - val_loss: 0.7462 - learning_rate: 1.0000e-05
Epoch 4/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step - accuracy: 0.8190 - loss: 0.3960



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 405ms/step - accuracy: 0.8190 - loss: 0.3960 - val_accuracy: 0.6905 - val_loss: 0.6943 - learning_rate: 1.0000e-05
Epoch 5/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m143s[0m 412ms/step - accuracy: 0.8370 - loss: 0.3639 - val_accuracy: 0.6746 - val_loss: 0.7267 - learning_rate: 1.0000e-05
Epoch 6/6
[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step - accuracy: 0.8384 - loss: 0.3642



[1m216/216[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m88s[0m 409ms/step - accuracy: 0.8384 - loss: 0.3642 - val_accuracy: 0.6944 - val_loss: 0.6988 - learning_rate: 3.0000e-06


In [4]:
# ============================
# BLOCK 4 — Evaluation & Outputs
# ============================

from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt
import numpy as np

# Load best checkpoint
best = "/content/checkpoints/stage2.h5"
if not os.path.exists(best):
    best = "/content/checkpoints/stage1.h5"

model = tf.keras.models.load_model(best)
print("Loaded:", best)

# Test evaluation
loss, acc = model.evaluate(test_gen, verbose=1)
print(f"\n🔥 FINAL TEST ACCURACY = {acc*100:.2f}%")
print(f"FINAL TEST LOSS = {loss:.4f}")

# Confusion matrix
preds = model.predict(test_gen)
y_pred = np.argmax(preds, axis=1)
y_true = test_gen.classes

cm = confusion_matrix(y_true, y_pred)
print("\nConfusion Matrix:\n", cm)

# Classification report
inv = {v:k for k,v in train_gen.class_indices.items()}
print("\nClassification Report:\n",
      classification_report(y_true, y_pred, target_names=[inv[i] for i in sorted(inv.keys())]))

# Save model
model.save("/content/mobilenetV2_final_cnn.h5")
print("\nModel saved: mobilenetV2_final_cnn.h5")




Loaded: /content/checkpoints/stage2.h5
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 175ms/step - accuracy: 0.7464 - loss: 0.6035

🔥 FINAL TEST ACCURACY = 82.42%
FINAL TEST LOSS = 0.3929
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 173ms/step





Confusion Matrix:
 [[465 135]
 [ 76 524]]

Classification Report:
               precision    recall  f1-score   support

       Fault       0.86      0.78      0.82       600
      normal       0.80      0.87      0.83       600

    accuracy                           0.82      1200
   macro avg       0.83      0.82      0.82      1200
weighted avg       0.83      0.82      0.82      1200


Model saved: mobilenetV2_final_cnn.h5
