1. Install Dependencies

In [None]:
!pip uninstall -y diffusers transformers huggingface_hub accelerate peft torch torchvision torchaudio xformers bitsandbytes -q

# PyTorch (CUDA 12.1) for Kaggle GPUs
!pip install -q torch==2.1.2 torchvision==0.16.2 torchaudio==2.1.2 --index-url https://download.pytorch.org/whl/cu121

# Modern ecosystem
!pip install -q diffusers==0.26.3 transformers==4.39.3 accelerate==0.28.0 peft==0.10.0 huggingface_hub==0.20.3 bitsandbytes==0.43.0

# Optional xFormers
!pip install -q xformers==0.0.22.post7 -f https://download.pytorch.org/whl/cu121/torch_stable.html

import torch, diffusers, transformers, accelerate, peft, huggingface_hub
print('torch', torch.__version__)
print('diffusers', diffusers.__version__)
print('transformers', transformers.__version__)
print('accelerate', accelerate.__version__)
print('peft', peft.__version__)
print('huggingface_hub', huggingface_hub.__version__)


2. Download DreamBooth LoRA Training Script

In [None]:
!curl -L https://raw.githubusercontent.com/huggingface/diffusers/v0.26.3/examples/dreambooth/train_dreambooth_lora.py -o /kaggle/working/train_dreambooth_lora.py
!sed -n '1,40p' /kaggle/working/train_dreambooth_lora.py | sed -n '1,20p'


3. Prepare MVTec Carpet Dataset

In [None]:
import os, glob, random, shutil, pathlib

MVT_DIR = "/kaggle/input/mvtec-ad/carpet/train/good"
INST_DIR = "/kaggle/working/carpet_instances"
VAL_DIR  = "/kaggle/working/carpet_val"

os.makedirs(INST_DIR, exist_ok=True)
os.makedirs(VAL_DIR, exist_ok=True)

imgs = sorted(glob.glob(f"{MVT_DIR}/*"))
assert len(imgs) > 0, "No MVTec 'carpet' images found."
imgs = imgs[:150]

for i, p in enumerate(imgs):
    ext = os.path.splitext(p)[1]
    shutil.copy2(p, f"{INST_DIR}/{i:05d}{ext}")

sample = random.sample(imgs, min(4, len(imgs)))
for i, p in enumerate(sample):
    ext = os.path.splitext(p)[1]
    shutil.copy2(p, f"{VAL_DIR}/{i:02d}{ext}")

print('Prepared instance images:', len(imgs))
print('Validation images:', len(sample))


4. Train LoRA on Carpet (DreamBooth)

In [None]:
import pathlib, sys, subprocess

BASE = "runwayml/stable-diffusion-v1-5"
TOKEN = "mvcarpet"
PROMPT = f"photo of {TOKEN} object"
OUT = "/kaggle/working/lora_sd15_carpet"

VAL = sorted([str(p) for p in pathlib.Path("/kaggle/working/carpet_val").glob("*")])

cmd = [
    sys.executable, "/kaggle/working/train_dreambooth_lora.py",
    "--pretrained_model_name_or_path", BASE,
    "--instance_data_dir", "/kaggle/working/carpet_instances",
    "--instance_prompt", PROMPT,
    "--output_dir", OUT,
    "--resolution", "512",
    "--train_batch_size", "1",
    "--gradient_accumulation_steps", "4",
    "--learning_rate", "1e-4",
    "--rank", "8",
    "--mixed_precision", "fp16",
    "--max_train_steps", "600",
    "--checkpointing_steps", "200",
    "--use_8bit_adam",
    "--pre_compute_text_embeddings",
    "--text_encoder_use_attention_mask",
    "--enable_xformers_memory_efficient_attention",
]

print("Running:\n", " ".join(cmd))
out = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
print(out.stdout)
print("Training finished.")


5. Load SD1.5 + LoRA and Generate Images

In [None]:
import torch
from diffusers import StableDiffusionPipeline, DPMSolverMultistepScheduler
from PIL import Image

device = "cuda" if torch.cuda.is_available() else "cpu"

pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16 if device=='cuda' else torch.float32
)

pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config, use_karras=True)
pipe = pipe.to(device)

pipe.load_lora_weights("/kaggle/input/sd1-5loracarpet/tensorflow2/default/1",
                       weight_name="pytorch_lora_weights.safetensors")

prompt = "high quality texture photo of mvcarpet object, studio lighting, white background"

images = []
for _ in range(4):
    img = pipe(prompt, guidance_scale=10, num_inference_steps=100).images[0]
    images.append(img)

grid = Image.new("RGB", (1024, 1024))
for i, im in enumerate(images):
    grid.paste(im, (512*(i%2), 512*(i//2)))

display(grid)
print("Done.")


6. Img2Img with LoRA for Defect-Free Reconstruction

In [None]:
import torch
from diffusers import StableDiffusionImg2ImgPipeline, DPMSolverMultistepScheduler
from PIL import Image

LORA_DIR = "/kaggle/working/lora_sd15_carpet/checkpoint-200"
TEST_IMG = "/kaggle/input/mvtec-ad/carpet/test/cut/000.png"

prompt = "photo of mvcarpet object, clean carpet, no defects, realistic texture"
negative_prompt = "holes, stains, defects, damaged, weird textures, distortion, blur"

device = "cuda"
dtype = torch.float16

pipe = StableDiffusionImg2ImgPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=dtype,
    safety_checker=None
).to(device)

pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)

pipe.load_lora_weights("/kaggle/input/sd1-5loracarpet/tensorflow2/default/1",
                       weight_name="pytorch_lora_weights.safetensors")
pipe.fuse_lora(lora_scale=1.0)

input_image = Image.open(TEST_IMG).convert("RGB")
input_image = input_image.resize((512, 512))

result = pipe(
    prompt=prompt,
    negative_prompt=negative_prompt,
    image=input_image,
    strength=0.2,
    guidance_scale=10,
    num_inference_steps=600,
)

clean_img = result.images[0]
display(clean_img)
clean_img.save("/kaggle/working/reconstructed_clean_carpet.png")

print("Saved at /kaggle/working/reconstructed_clean_carpet.png")


7. Install CLIP

In [None]:
!pip install git+https://github.com/openai/CLIP.git

8. Evaluation Loop for LoRA + Img2Img Anomaly Detection

In [None]:
import os
import cv2
import numpy as np
from PIL import Image
from tqdm import tqdm
import torch
import clip
from sklearn.metrics import roc_auc_score, precision_recall_curve, auc
from skimage.metrics import structural_similarity as ssim
import matplotlib.pyplot as plt

clip_model, clip_preprocess = clip.load("ViT-B/32", device=device)

9. Scoring Functions

In [None]:
def pixel_mse(img, recon):
    return float(np.mean((img - recon) ** 2))

def clip_similarity_error(img, recon):
    img_pil = Image.fromarray((img*255).astype(np.uint8))
    recon_pil = Image.fromarray((recon*255).astype(np.uint8))
    with torch.no_grad():
        a = clip_model.encode_image(clip_preprocess(img_pil).unsqueeze(0).to(device))
        b = clip_model.encode_image(clip_preprocess(recon_pil).unsqueeze(0).to(device))
        sim = torch.cosine_similarity(a, b).item()
    return float(1.0 - sim)

def hybrid_score(img, recon, alpha=0.7):
    return alpha * pixel_mse(img, recon) + (1-alpha) * clip_similarity_error(img, recon)

10. Reconstruction Function

In [None]:
def reconstruct(img_np,
                prompt,
                negative_prompt,
                strength,
                guidance_scale,
                num_inference_steps):

    pil = Image.fromarray((img_np*255).astype(np.uint8)).convert("RGB")
    pil = pil.resize((512, 512))

    with torch.autocast(device_type="cuda", dtype=torch.float16):
        out = pipe(
            prompt=prompt,
            negative_prompt=negative_prompt,
            image=pil,
            strength=strength,
            guidance_scale=guidance_scale,
            num_inference_steps=num_inference_steps,
        )

    rec = np.array(out.images[0]).astype(np.float32) / 255.0
    return rec


11. Visualization Helper

In [None]:
def show_result(img, recon, heat, idx):
    plt.figure(figsize=(12,3))
    plt.subplot(1,3,1); plt.imshow((img*255).astype(np.uint8)); plt.title("Original"); plt.axis("off")
    plt.subplot(1,3,2); plt.imshow((recon*255).astype(np.uint8)); plt.title("Reconstruction"); plt.axis("off")
    plt.subplot(1,3,3); plt.imshow((img*255).astype(np.uint8)); plt.imshow(heat, cmap='jet', alpha=0.4)
    plt.title("Heatmap"); plt.axis("off")
    plt.show()


12. Full Evaluation Function

In [None]:
def evaluate(
    category="carpet",
    resize=512,
    limit_test=50,
    prompt="photo of mvcarpet object, clean carpet, no defects, realistic texture",
    negative_prompt="holes, stains, defects, damaged",
    strength=0.25,
    guidance_scale=7.5,
    num_inference_steps=100,
    alpha=0.7,
):

    test_dir = f"/kaggle/input/mvtec-ad/{category}/test"
    classes = sorted(os.listdir(test_dir))

    test_imgs, test_labels, file_paths = [], [], []

    for defect_type in classes:
        img_dir = os.path.join(test_dir, defect_type)
        files = sorted(os.listdir(img_dir))[:limit_test]

        for fname in files:
            path = os.path.join(img_dir, fname)
            img = cv2.imread(path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            img = cv2.resize(img, (resize, resize)).astype(np.float32)/255.0
            test_imgs.append(img)
            test_labels.append(0 if defect_type == "good" else 1)
            file_paths.append(path)

    test_imgs = np.array(test_imgs)
    test_labels = np.array(test_labels)

    print("Loaded test images:", len(test_imgs))

    scores, ssim_vals = [], []

    for i in tqdm(range(len(test_imgs))):
        img = test_imgs[i]

        recon = reconstruct(
            img, prompt, negative_prompt,
            strength, guidance_scale, num_inference_steps
        )

        recon = cv2.resize((recon*255).astype(np.uint8), (resize,resize)).astype(np.float32)/255.0

        s = hybrid_score(img, recon, alpha)
        scores.append(s)

        try:
            ssim_val = 1.0 - ssim(img, recon, channel_axis=2)
        except:
            ssim_val = np.nan
        ssim_vals.append(ssim_val)

        if i < 5:
            heat = np.mean(np.abs(img - recon), axis=2)
            heat = heat / (heat.max()+1e-6)
            show_result(img, recon, heat, i)

    scores, ssim_vals = np.array(scores), np.array(ssim_vals)

    auc_val = roc_auc_score(test_labels, scores)
    prec, rec, _ = precision_recall_curve(test_labels, scores)
    auprc = auc(rec, prec)

    print(f"\n==== Final Results ({category}) ====")
    print("AUROC:", auc_val)
    print("AUPRC:", auprc)
    print("Mean (1-SSIM):", np.nanmean(ssim_vals))

    return {
        "auc": auc_val,
        "auprc": auprc,
        "scores": scores,
        "labels": test_labels,
        "ssim": ssim_vals,
        "files": file_paths
    }

13. Run Evaluation

In [None]:
CATEGORY = "carpet"
RESIZE = 512
LIMIT_TEST = 50

PROMPT = "photo of mvcarpet object, clean carpet, no defects, realistic texture"
NEGATIVE_PROMPT = "holes, stains, defects, damaged, distortion"

STRENGTH = 0.25
GUIDANCE = 7.5
STEPS = 100
ALPHA = 0.7

results = evaluate(
    category=CATEGORY,
    resize=RESIZE,
    limit_test=LIMIT_TEST,
    prompt=PROMPT,
    negative_prompt=NEGATIVE_PROMPT,
    strength=STRENGTH,
    guidance_scale=GUIDANCE,
    num_inference_steps=STEPS,
    alpha=ALPHA
)