In [None]:
# Install required packages

!pip install -q kagglehub scikit-learn pillow matplotlib seaborn pandas tqdm tensorflow


# Download dataset via kagglehub and auto-detect bottle folder
import kagglehub, os
from pathlib import Path
import shutil

base_path = Path(kagglehub.dataset_download("ipythonx/mvtec-ad"))
print("Base path:", base_path)

# Search for 'bottle' subfolder anywhere under base_path
bottle_path = None
for root, dirs, files in os.walk(base_path):
    if "bottle" in dirs:
        bottle_path = Path(root) / "bottle"
        break
if bottle_path is None:
#Show top-level structure for debugging
   print("Could not find 'bottle' inside the dataset. Top-level contents:")
   for p in base_path.iterdir():
       print(" ", p.name)
   raise FileNotFoundError("Bottle folder not found inside KaggleHub dataset. If using other packaging, upload or set path manually.")else:
else:
    data_dir = bottle_path
    print("Using bottle dataset at:", data_dir)  
# Prepare dataset structure: dataset/train/{Normal,Defective}, dataset/test/{Normal,Defective}
from pathlib import Path
import os, shutil

train_dir = Path("dataset/train")
test_dir = Path("dataset/test")

def prepare_dataset_from_mvtec(data_dir):
    # clear existing
        if Path("dataset").exists():
           shutil.rmtree("dataset")
        (train_dir / "Normal").mkdir(parents=True, exist_ok=True)
        (train_dir / "Defective").mkdir(parents=True, exist_ok=True)
        (test_dir / "Normal").mkdir(parents=True, exist_ok=True)
        (test_dir / "Defective").mkdir(parents=True, exist_ok=True)
        for p in (data_dir / "train" / "good").glob("*.png"):
            shutil.copy(p, train_dir / "Normal")
        for p in (data_dir / "test" / "good").glob("*.png"):
            shutil.copy(p, test_dir / "Normal")
        for defect_type in (data_dir / "train").iterdir():
            if defect_type.is_dir() and defect_type.name != "good":
                for p in defect_type.glob("*.png"):
                    shutil.copy(p, train_dir / "Defective")
                for defect_type in (data_dir / "test").iterdir():
                    if defect_type.is_dir() and defect_type.name != "good":
                        for p in defect_type.glob("*.png"):
                            shutil.copy(p, test_dir / "Defective")

prepare_dataset_from_mvtec(data_dir)
print("Dataset prepared. Counts:")
for root, dirs, files in os.walk("dataset"):
    print(root, "->", len(files))
# CONFIG
IMG_SIZE = 224            # recommended size for EfficientNet / ImageNet backbones
BATCH_SIZE = 32
EPOCHS = 25
AUTOTUNE = True

# Ensure the dataset directory exists
import os
from pathlib import Path
train_dir = Path("dataset/train")
test_dir = Path("dataset/test")

if not train_dir.exists():
    print("Dataset directory not found. Please run the cell to prepare the dataset first.")
else:
    # Data generators with augmentation (train) and deterministic (val/test)
    from tensorflow.keras.preprocessing.image import ImageDataGenerator
    import numpy as np
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        rotation_range=20,
        width_shift_range=0.12,
        height_shift_range=0.12,
        shear_range=0.08,
        zoom_range=0.12,
        brightness_range=(0.75,1.25),
        horizontal_flip=True,
        fill_mode='reflect',
        validation_split=0.15
     ) 
     test_datagen = ImageDataGenerator(rescale=1./255)

     # NOTE: EfficientNet expects 3-channel RGB. MVTec images are grayscale;
     # we'll convert grayscale to RGB by duplicating channels via color_mode='rgb'
    train_generator = train_datagen.flow_from_directory(
         "dataset/train",
         target_size=(IMG_SIZE, IMG_SIZE),
         color_mode='rgb',   # convert grayscale to RGB automatically
         batch_size=BATCH_SIZE,
         class_mode='binary',
         subset='training',
         shuffle=True
      )
      val_generator = train_datagen.flow_from_directory(
        "dataset/train",
         target_size=(IMG_SIZE, IMG_SIZE),
         color_mode='rgb',
         batch_size=BATCH_SIZE,
         class_mode='binary',
         subset='validation',
         shuffle=True
     )
     test_generator = test_datagen.flow_from_directory(
        "dataset/test",
        target_size=(IMG_SIZE, IMG_SIZE),
        color_mode='rgb',
        batch_size=1,
        class_mode='binary',
        shuffle=False
     ) 

    print("Train samples:", train_generator.samples)
    print("Val samples:", val_generator.samples)
    print("Test samples:", test_generator.samples)
# Transfer learning using EfficientNetB0 (lightweight & accurate)
import tensorflow as tf
from tensorflow.keras import layers, models, optimizers, callbacks

def build_model_finetune(input_shape=(IMG_SIZE, IMG_SIZE, 3), base_trainable=False):
    base = tf.keras.applications.EfficientNetB0(
        include_top=False, weights='imagenet', input_shape=input_shape, pooling='avg'
    )
    base.trainable = base_trainable  # start with frozen base, then optionally unfreeze
    inputs = layers.Input(shape=input_shape)
    x = tf.keras.applications.efficientnet.preprocess_input(inputs)  # same preprocessing
    x = base(x, training=False)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(256, activation='relu')(x)
    x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.3)(x)
    outputs = layers.Dense(1, activation='sigmoid')(x)
    model = models.Model(inputs, outputs)
    model.compile(
        optimizer=optimizers.Adam(learning_rate=1e-4)
        loss='binary_crossentropy',
        metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
    )
    return model, base

model, base_model = build_model_finetune()
model.summary()
# Compute class weights (helps if classes are imbalanced)
from sklearn.utils import class_weight
import numpy as np

y_train_classes = train_generator.classes
class_weights = class_weight.compute_class_weight('balanced', classes=np.unique(y_train_classes), y=y_train_classes)
class_weights = {i: w for i, w in enumerate(class_weights)}
print("Class weights:", class_weights)

# Callbacks
callbacks_list = [
    callbacks.ModelCheckpoint("best_mvtec_model.h5", monitor='val_accuracy', save_best_only=True, mode='max', verbose=1),
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, verbose=1),
    callbacks.EarlyStopping(monitor='val_loss', patience=7, restore_best_weights=True, verbose=1)
 ]
history1 = model.fit(
      train_generator,
      validation_data=val_generator,
      epochs=EPOCHS,
      class_weight=class_weights,
      callbacks=callbacks_list,
      verbose=2

)
# Unfreeze last blocks of base model and fine-tune with lower lr
base_model.trainable = True

# Freeze first N layers (so only later layers train)
fine_tune_at = int(len(base_model.layers) * 0.6)
for layer in base_model.layers[:fine_tune_at]:
    layer.trainable = False
model.compile(
    optimizer=optimizers.Adam(learning_rate=1e-5),
    loss='binary_crossentropy',
    metrics=['accuracy', tf.keras.metrics.AUC(name='auc')]
 )
history2 = model.fit(
    train_generator,
    validation_data=val_generator,
    epochs=10,               # a few more epochs for fine-tuning
    class_weight=class_weights,
    callbacks=callbacks_list,
    verbose=2
 )

# Load best model (if checkpoint saved)
from tensorflow.keras.models import load_model
best = load_model("best_mvtec_model.h5", compile=False)
best.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy', tf.keras.metrics.AUC(name='auc')])

# Evaluate
loss, acc, auc = best.evaluate(test_generator, verbose=0)
print(f"Test accuracy: {acc*100:.2f}%, Test AUC: {auc:.4f}")

# Predictions & reports
import numpy as np
from sklearn.metrics import confusion_matrix, classification_report
test_steps = test_generator.samples
test_generator.reset()
preds_prob = best.predict(test_generator, steps=test_steps, verbose=0).ravel()
preds = (preds_prob > 0.5).astype(int)
y_true = test_generator.classes
print("Confusion Matrix:")
cm = confusion_matrix(y_true, preds, labels=[0, 1])
print(cm)
print("\nClassification Report:")
print(classification_report(y_true, preds, target_names=["Normal","Defective"], labels=[0, 1]))

