## FOLDER RESTRUCTURING

In [16]:
# import os
# import shutil
# from pathlib import Path
# import random

# SOURCE_ROOT = "/Users/mrinalseth13331/Downloads/archive"         # your original dataset root
# TARGET_ROOT = "mvtec_all"     # new folder that ImageFolder expects
# VAL_LIMIT_PER_CLASS = None    # None -> copy all; or set int (e.g., 50) to limit validation images
# SHUFFLE_VAL = True            # if True, pick random images from test/good for val (useful when many)
# OVERWRITE = False             # if True, overwrite existing files in target


# SOURCE_ROOT = Path(SOURCE_ROOT)
# TARGET_ROOT = Path(TARGET_ROOT)

# if not SOURCE_ROOT.exists():
#     raise FileNotFoundError(f"Source root {SOURCE_ROOT} not found")

# # create train/val root
# train_root = TARGET_ROOT / "train"
# val_root = TARGET_ROOT / "val"
# train_root.mkdir(parents=True, exist_ok=True)
# val_root.mkdir(parents=True, exist_ok=True)

# categories = sorted([p.name for p in SOURCE_ROOT.iterdir() if p.is_dir()])
# print("Found categories:", categories)

# summary = {}

# for cat in categories:
#     src_train_good = SOURCE_ROOT / cat / "train" / "good"
#     src_test_good = SOURCE_ROOT / cat / "test" / "good"

#     dst_train_cat = train_root / cat
#     dst_val_cat = val_root / cat
#     dst_train_cat.mkdir(parents=True, exist_ok=True)
#     dst_val_cat.mkdir(parents=True, exist_ok=True)

#     # copy train/good -> mvtec_all/train/<cat>/
#     train_files = []
#     if src_train_good.exists():
#         train_files = sorted([f for f in src_train_good.iterdir() if f.is_file()])
#         for f in train_files:
#             dst = dst_train_cat / f.name
#             if dst.exists() and not OVERWRITE:
#                 continue
#             shutil.copy2(f, dst)
#     else:
#         print(f"Warning: {src_train_good} does not exist for category {cat}")

#     # prepare val files from test/good
#     val_files = []
#     if src_test_good.exists():
#         val_candidates = sorted([f for f in src_test_good.iterdir() if f.is_file()])
#         if SHUFFLE_VAL:
#             random.shuffle(val_candidates)
#         if VAL_LIMIT_PER_CLASS is not None:
#             val_candidates = val_candidates[:VAL_LIMIT_PER_CLASS]
#         for f in val_candidates:
#             dst = dst_val_cat / f.name
#             if dst.exists() and not OVERWRITE:
#                 continue
#             shutil.copy2(f, dst)
#         val_files = val_candidates
#     else:
#         # If test/good missing, optionally use some from train/good for val (fallback)
#         if train_files:
#             fallback = train_files[:min(10, len(train_files))]  # small fallback
#             for f in fallback:
#                 dst = dst_val_cat / f.name
#                 if dst.exists() and not OVERWRITE:
#                     continue
#                 shutil.copy2(f, dst)
#             val_files = fallback
#         else:
#             print(f"Warning: No test/good or train/good found for {cat}")

#     summary[cat] = {
#         "train_copied": len(list(dst_train_cat.iterdir())),
#         "val_copied": len(list(dst_val_cat.iterdir()))
#     }

# # Print summary
# print("\nCopy summary:")
# total_train = total_val = 0
# for cat, info in summary.items():
#     print(f"  {cat:12s} -> train: {info['train_copied']:3d}, val: {info['val_copied']:3d}")
#     total_train += info['train_copied']
#     total_val += info['val_copied']

# print(f"\nTotal images copied: train={total_train}, val={total_val}")
# print(f"New structure ready at: {TARGET_ROOT}/ (use ImageFolder on {TARGET_ROOT}/train and {TARGET_ROOT}/val)")


Found categories: ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal_nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper']

Copy summary:
  bottle       -> train: 209, val:  20
  cable        -> train: 224, val:  58
  capsule      -> train: 219, val:  23
  carpet       -> train: 280, val:  28
  grid         -> train: 264, val:  21
  hazelnut     -> train: 391, val:  40
  leather      -> train: 245, val:  32
  metal_nut    -> train: 220, val:  22
  pill         -> train: 267, val:  26
  screw        -> train: 320, val:  41
  tile         -> train: 230, val:  33
  toothbrush   -> train:  60, val:  12
  transistor   -> train: 213, val:  60
  wood         -> train: 247, val:  19
  zipper       -> train: 240, val:  32

Total images copied: train=3629, val=467
New structure ready at: mvtec_all/ (use ImageFolder on mvtec_all/train and mvtec_all/val)


In [17]:
%pip install torch torchvision pillow tqdm matplotlib scikit-learn gradio



Note: you may need to restart the kernel to use updated packages.


In [1]:
import torch
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print("Device:", device)


Device: mps


In [3]:
import torch
import torch.nn.functional as F
from torchvision.models import resnet50, ResNet50_Weights

class ResNetFeatureExtractor(torch.nn.Module):
    def __init__(self, pretrained=True, device=None):
        super().__init__()
        weights = ResNet50_Weights.DEFAULT if pretrained else None
        self.model = resnet50(weights=weights)
        self.model.eval()
        for p in self.model.parameters():
            p.requires_grad = False

        self.features = []
        self.handles = []
        self.handles.append(self.model.layer2[-1].register_forward_hook(self._hook))
        self.handles.append(self.model.layer3[-1].register_forward_hook(self._hook))

        if device is not None:
            self.to(device)

    def _hook(self, module, input, output):
        # detach and keep device (no cpu() here)
        self.features.append(output.detach())

    def forward(self, x, target_spatial=None):
        self.features = []
        with torch.no_grad():
            _ = self.model(x)

        if len(self.features) == 0:
            raise RuntimeError("No features captured by hooks")

        # default target: first fmap size
        if target_spatial is None:
            h, w = self.features[0].shape[-2], self.features[0].shape[-1]
            target_spatial = (h, w)

        resized = []
        for fmap in self.features:
            fmap_smoothed = F.avg_pool2d(fmap, kernel_size=3, stride=1, padding=1)  # keep spatial dims
            if fmap_smoothed.shape[-2:] != target_spatial:
                fmap_resized = F.interpolate(fmap_smoothed, size=target_spatial, mode='bilinear', align_corners=False)
            else:
                fmap_resized = fmap_smoothed
            resized.append(fmap_resized)

        patch = torch.cat(resized, dim=1)  # (B, 512+1024=1536, H, W)
        return patch

    def remove_hooks(self):
        for h in self.handles:
            h.remove()
        self.handles = []


In [4]:
extractor = ResNetFeatureExtractor(pretrained=True, device=device)


In [5]:
import os

root_path = "/Users/mrinalseth13331/Downloads/archive"  # path where all class folders exist

category_list = sorted([d for d in os.listdir(root_path) if os.path.isdir(os.path.join(root_path, d))])

print(category_list)
print(len(category_list))


['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal_nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper']
15


In [6]:
from torchvision.models import resnet18, ResNet18_Weights
num_classes = len(category_list)  # e.g., 15

clf = resnet18(weights=ResNet18_Weights.DEFAULT)
# replace final layer
clf.fc = torch.nn.Linear(clf.fc.in_features, num_classes)
clf = clf.to(device)


In [6]:
from torchvision import transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN,
                         std=IMAGENET_STD)
])
train_ds = ImageFolder("mvtec_all/train", transform=transform)  # folder containing class-subfolders
train_loader = DataLoader(train_ds, batch_size=32, shuffle=True, num_workers=4)

val_ds = ImageFolder("mvtec_all/val", transform=transform)
val_loader = DataLoader(val_ds, batch_size=32, shuffle=False, num_workers=4)


In [12]:
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm.auto import tqdm
import numpy as np

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(clf.parameters(), lr=1e-4)

def train_classifier(model, train_loader, val_loader=None, epochs=5, device=device, checkpoint_path="best_classifier.pth"):
    """
    Train a classifier (assumes optimizer & criterion already defined in the outer scope).
    Returns: history dict with train_loss, val_loss, train_acc, val_acc lists.
    """
    history = {
        "train_loss": [],
        "val_loss": [],
        "train_acc": [],
        "val_acc": []
    }

    best_val_loss = float('inf')

    model = model.to(device)

    # Optional scheduler example (uncomment if you like)
    # scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

    for epoch in range(1, epochs + 1):
        model.train()
        running_loss = 0.0
        running_corrects = 0
        total = 0

        pbar = tqdm(train_loader, desc=f"Epoch {epoch}/{epochs} [train]", leave=False)
        for imgs, labels in pbar:
            imgs = imgs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(imgs)                 # logits
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # stats
            running_loss += loss.item() * imgs.size(0)
            preds = outputs.argmax(dim=1)
            running_corrects += (preds == labels).sum().item()
            total += imgs.size(0)

            pbar.set_postfix({"batch_loss": loss.item(), "acc": running_corrects/total})

        epoch_train_loss = running_loss / max(1, total)
        epoch_train_acc = running_corrects / max(1, total)
        history["train_loss"].append(epoch_train_loss)
        history["train_acc"].append(epoch_train_acc)

        # Validation
        if val_loader is not None:
            model.eval()
            val_loss = 0.0
            val_corrects = 0
            val_total = 0
            with torch.no_grad():
                for imgs, labels in tqdm(val_loader, desc=f"Epoch {epoch}/{epochs} [val]", leave=False):
                    imgs = imgs.to(device)
                    labels = labels.to(device)
                    outputs = model(imgs)
                    loss = criterion(outputs, labels)

                    val_loss += loss.item() * imgs.size(0)
                    preds = outputs.argmax(dim=1)
                    val_corrects += (preds == labels).sum().item()
                    val_total += imgs.size(0)

            epoch_val_loss = val_loss / max(1, val_total)
            epoch_val_acc = val_corrects / max(1, val_total)
            history["val_loss"].append(epoch_val_loss)
            history["val_acc"].append(epoch_val_acc)


            # checkpoint best
            if epoch_val_loss < best_val_loss:
                best_val_loss = epoch_val_loss
                torch.save({
                    "epoch": epoch,
                    "model_state_dict": model.state_dict(),
                    "optimizer_state_dict": optimizer.state_dict(),
                    "val_loss": epoch_val_loss
                }, checkpoint_path)

            print(f"Epoch {epoch}/{epochs}  Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}  |  Val loss: {epoch_val_loss:.4f}, Val acc: {epoch_val_acc:.4f}")
        else:
            # no validation loader provided
            history["val_loss"].append(None)
            history["val_acc"].append(None)
            print(f"Epoch {epoch}/{epochs}  Train loss: {epoch_train_loss:.4f}, Train acc: {epoch_train_acc:.4f}")

    print("Training finished.")
    return history



In [13]:
history = train_classifier(
    model=clf,
    train_loader=train_loader,
    val_loader=val_loader,
    epochs=5,                     # change as needed
    device=device,
    checkpoint_path="best_classifier.pth"
)


Epoch 1/5 [train]:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 1/5 [val]:   0%|          | 0/15 [00:00<?, ?it/s]

Epoch 1/5  Train loss: 0.0992, Train acc: 0.9829  |  Val loss: 0.0009, Val acc: 1.0000


Epoch 2/5 [train]:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 2/5 [val]:   0%|          | 0/15 [00:00<?, ?it/s]

Epoch 2/5  Train loss: 0.0032, Train acc: 1.0000  |  Val loss: 0.0005, Val acc: 1.0000


Epoch 3/5 [train]:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 3/5 [val]:   0%|          | 0/15 [00:00<?, ?it/s]

IOStream.flush timed out
IOStream.flush timed out


Epoch 3/5  Train loss: 0.0013, Train acc: 1.0000  |  Val loss: 0.0003, Val acc: 1.0000


Epoch 4/5 [train]:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 4/5 [val]:   0%|          | 0/15 [00:00<?, ?it/s]

Epoch 4/5  Train loss: 0.0009, Train acc: 1.0000  |  Val loss: 0.0002, Val acc: 1.0000


Epoch 5/5 [train]:   0%|          | 0/114 [00:00<?, ?it/s]

Epoch 5/5 [val]:   0%|          | 0/15 [00:00<?, ?it/s]

Epoch 5/5  Train loss: 0.0006, Train acc: 1.0000  |  Val loss: 0.0001, Val acc: 1.0000
Training finished.


In [14]:
correct = 0
total = 0
clf.eval()

with torch.no_grad():
    for imgs, labels in val_loader:
        imgs = imgs.to(device)
        labels = labels.to(device)
        out = clf(imgs)
        preds = out.argmax(1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)

print("Validation Accuracy: ", correct / total)


Validation Accuracy:  1.0


In [None]:
torch.save(clf.state_dict(), "classifier_res18.pth")

In [7]:
train_ds = ImageFolder("mvtec_all/train", transform=transform)
train_ds.classes           # list of class names
train_ds.class_to_idx      # dictionary mapping names -> index
print(train_ds.classes)
category_list = train_ds.classes


NameError: name 'ImageFolder' is not defined

In [18]:
from PIL import Image

# Get class names from ImageFolder
category_list = train_ds.classes
print("Class names:", category_list)

def classify_image(img_path):
    img = Image.open(img_path).convert("RGB")
    x = transform(img).unsqueeze(0).to(device)

    with torch.no_grad():
        logits = clf(x)
        pred_idx = logits.argmax(1).item()

    pred_name = category_list[pred_idx]
    print("Predicted object:", pred_name)

    return pred_name


Class names: ['bottle', 'cable', 'capsule', 'carpet', 'grid', 'hazelnut', 'leather', 'metal_nut', 'pill', 'screw', 'tile', 'toothbrush', 'transistor', 'wood', 'zipper']


In [19]:
classify_image('/Users/mrinalseth13331/Downloads/mvtec_all/train/zipper/238.png')

Predicted object: zipper


'zipper'

#### In this taken help from the generative pre-trained transformer



In [1]:
# Gradio app: predict only the object name
import torch
from PIL import Image
import gradio as gr
from torchvision import transforms
import os

# -------- device --------
device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
print("Device:", device)

# -------- transform (ImageNet normalization) --------
IMAGENET_MEAN = [0.485, 0.456, 0.406]
IMAGENET_STD  = [0.229, 0.224, 0.225]

transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=IMAGENET_MEAN, std=IMAGENET_STD)
])

# -------- classifier: use existing `clf` if present, else try to load checkpoint --------
try:
    clf  # if clf exists in the notebook, use it
    print("Using existing classifier 'clf' from notebook.")
except NameError:
    clf = None

if clf is None:
    ckpt_path = "best_classifier.pth"
    if os.path.exists(ckpt_path):
        from torchvision.models import resnet18, ResNet18_Weights
        # we need category_list to know num_classes; try to infer from train_ds if present
        if 'train_ds' in globals():
            category_list = train_ds.classes
            num_classes = len(category_list)
        else:
            raise RuntimeError("No `clf` in memory and no `train_ds` found to infer classes. "
                               "Either define `clf` or place train_ds in the notebook or provide 'best_classifier.pth' and category_list.")
        # rebuild architecture and load
        clf = resnet18(weights=ResNet18_Weights.DEFAULT)
        clf.fc = torch.nn.Linear(clf.fc.in_features, num_classes)
        state = torch.load(ckpt_path, map_location=device)
        clf.load_state_dict(state["model_state_dict"])
        print(f"Loaded classifier from {ckpt_path}.")
    else:
        raise RuntimeError("No classifier `clf` found and no checkpoint at 'best_classifier.pth'.")

# move to device and eval
clf = clf.to(device)
clf.eval()

if 'train_ds' in globals():
    category_list = train_ds.classes
elif 'category_list' in globals():
    pass
else:
    raise RuntimeError("Cannot find `category_list`. Ensure `train_ds` exists or define `category_list` list in the notebook.")

# -------- prediction function for Gradio --------
def predict_object_name(pil_img: Image.Image) -> str:
    """
    pil_img: PIL.Image from Gradio
    returns: predicted class name (string)
    """
    try:
        img = pil_img.convert("RGB")
        x = transform(img).unsqueeze(0).to(device)          # (1,3,224,224)
        with torch.no_grad():
            logits = clf(x)
            pred_idx = int(logits.argmax(dim=1).item())
        pred_name = category_list[pred_idx]
        return str(pred_name)
    except Exception as e:
        return f"Error: {e}"

# -------- Gradio interface --------
iface = gr.Interface(
    fn=predict_object_name,
    inputs=gr.Image(type="pil", label="Upload image"),
    outputs=gr.Textbox(label="Predicted object"),
    title="MVTec Object Classifier (name only)",
    description="Upload an image; the model will predict the object category name (e.g., 'bottle')."
)

# Launch
iface.launch(share=False)


Device: mps


RuntimeError: No `clf` in memory and no `train_ds` found to infer classes. Either define `clf` or place train_ds in the notebook or provide 'best_classifier.pth' and category_list.