In [2]:
# Cell 1 - Installing and importing libraries
!pip install pennylane --quiet
!pip install matplotlib scikit-learn --quiet
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score, f1_score, precision_score, recall_score
from sklearn.utils import resample
import pennylane as qml
import torch
import glob
from collections import defaultdict
from google.colab import drive
drive.mount('/content/drive')
from pennylane import numpy as pnp



Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
# Cell 2 - Dataset Selection

# Define root path and selected categories
mvtec_root = '/content/drive/MyDrive/Colab Notebooks'
selected_categories = ['cable', 'grid', 'metal_nut', 'transistor']

# Collect image paths
dataset_info = defaultdict(lambda: {'train_good': 0, 'test_good': 0, 'test_defect': 0})
image_paths = {'train': [], 'test': [], 'labels': []}

for category in selected_categories:
    train_good_dir = os.path.join(mvtec_root, category, 'train', 'good')
    test_dir = os.path.join(mvtec_root, category, 'test')

    # Count training images
    train_images = glob.glob(os.path.join(train_good_dir, '*.png'))
    dataset_info[category]['train_good'] = len(train_images)

    # Append training data
    for img_path in train_images:
        image_paths['train'].append((img_path, category))
        image_paths['labels'].append(0)

    # Test images: good and defective
    for defect_type in os.listdir(test_dir):
        defect_dir = os.path.join(test_dir, defect_type)
        defect_images = glob.glob(os.path.join(defect_dir, '*.png'))
        label = 0 if defect_type == 'good' else 1

        key = 'test_good' if defect_type == 'good' else 'test_defect'
        dataset_info[category][key] += len(defect_images)

        for img_path in defect_images:
            image_paths['test'].append((img_path, category))
            image_paths['labels'].append(label)

# Summary table
df_summary = pd.DataFrame(dataset_info).T
df_summary['Total'] = df_summary.sum(axis=1)
display(df_summary)

Unnamed: 0,train_good,test_good,test_defect,Total
cable,224,58,92,374
grid,264,21,57,342
metal_nut,220,22,93,335
transistor,213,60,40,313


In [4]:
# Cell 3 - Image Preparation 1

# Parameters
img_size = 32
processed_data = {}  # Store per-object arrays

def process_image(path):
    img = Image.open(path).convert('L')  # grayscale
    img = img.resize((img_size, img_size))
    return np.asarray(img).flatten()

for category in selected_categories:
    X_train_cat = []
    y_train_cat = []
    X_test_cat = []
    y_test_cat = []

    # TRAIN: only 'good' images
    train_dir = os.path.join(mvtec_root, category, 'train', 'good')
    for img_path in glob.glob(os.path.join(train_dir, '*.png')):
        X_train_cat.append(process_image(img_path))
        y_train_cat.append(0)

    # TEST: good (0) and defective (1)
    test_dir = os.path.join(mvtec_root, category, 'test')
    for defect_type in os.listdir(test_dir):
        label = 0 if defect_type == 'good' else 1
        defect_dir = os.path.join(test_dir, defect_type)
        for img_path in glob.glob(os.path.join(defect_dir, '*.png')):
            X_test_cat.append(process_image(img_path))
            y_test_cat.append(label)

    # Store as numpy arrays
    processed_data[category] = {
        'X_train_raw': np.array(X_train_cat),
        'y_train': np.array(y_train_cat),
        'X_test_raw': np.array(X_test_cat),
        'y_test': np.array(y_test_cat)
    }

# Confirm structure
for cat in selected_categories:
    print(f"{cat}: Train={processed_data[cat]['X_train_raw'].shape}, Test={processed_data[cat]['X_test_raw'].shape}")

cable: Train=(224, 1024), Test=(150, 1024)
grid: Train=(264, 1024), Test=(78, 1024)
metal_nut: Train=(220, 1024), Test=(115, 1024)
transistor: Train=(213, 1024), Test=(100, 1024)


In [5]:
# Cell 4 - Image Preparation 2

# Apply MinMaxScaler to each object independently
scalers = {}  # Store scalers per object for optional reuse
for category in selected_categories:
    scaler = MinMaxScaler()

    X_train = processed_data[category]['X_train_raw']
    X_test = processed_data[category]['X_test_raw']

    # Fit on training data only
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Store scaled data
    processed_data[category]['X_train_scaled'] = X_train_scaled
    processed_data[category]['X_test_scaled'] = X_test_scaled
    scalers[category] = scaler

In [6]:
# Cell 5 - PCA Compression to Four Dimensions

# Apply PCA with 4 components per object category
pca_models = {}

for category in selected_categories:
    X_train = processed_data[category]['X_train_scaled']
    X_test = processed_data[category]['X_test_scaled']

    # Fit PCA only on training data
    pca = PCA(n_components=4)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test)

    # Store results
    processed_data[category]['X_train_pca'] = X_train_pca
    processed_data[category]['X_test_pca'] = X_test_pca
    pca_models[category] = pca

    print(f"{category}: PCA explained variance = {np.sum(pca.explained_variance_ratio_):.4f}")

cable: PCA explained variance = 0.3361
grid: PCA explained variance = 0.1933
metal_nut: PCA explained variance = 0.5935
transistor: PCA explained variance = 0.6257


The transistor and metal nut maintain a strong representation (close to 60%), cable is borderline with 33.6% and grid is very low (<20%) which suggests heavy information loss after compression. This expected and acceptable at this stage.

In [7]:
# Cell 6 - Checking class balance in training data

for category in selected_categories:
    unique, counts = np.unique(processed_data[category]['y_train'], return_counts=True)
    print(f"{category}: {dict(zip(unique, counts))}")

cable: {np.int64(0): np.int64(224)}
grid: {np.int64(0): np.int64(264)}
metal_nut: {np.int64(0): np.int64(220)}
transistor: {np.int64(0): np.int64(213)}


Only the test set contain defective images, there are none in the training set. For this reason, we will build a balanced train/test split.

In [8]:
# Cell 7 - Building a balanced train/test split

# Rebuild balanced_data with capped defect sampling
balanced_data = {}

for category in selected_categories:
    print(f"\n--- Balancing {category} ---")

    X_train_0 = processed_data[category]['X_train_scaled']
    y_train_0 = processed_data[category]['y_train']

    X_test = processed_data[category]['X_test_scaled']
    y_test = processed_data[category]['y_test']

    # Separate test set into defect and normal
    defect_indices = np.where(np.array(y_test) == 1)[0]
    normal_indices = np.where(np.array(y_test) == 0)[0]

    # Cap: Only take up to 80% of defects into training
    n_available_defects = len(defect_indices)
    n_to_sample = min(len(y_train_0), int(0.8 * n_available_defects))

    if n_to_sample == 0:
        print(f"[WARNING] Not enough defect samples in {category} to safely split.")
        continue

    np.random.seed(42)
    defect_train_indices = np.random.choice(defect_indices, size=n_to_sample, replace=False)
    defect_test_indices = list(set(defect_indices) - set(defect_train_indices))

    # Build training set
    X_train_1 = X_test[defect_train_indices]
    y_train_1 = np.ones(len(defect_train_indices))

    X_train = np.vstack([X_train_0, X_train_1])
    y_train = np.concatenate([y_train_0, y_train_1])

    # Build test set
    keep_test_indices = list(normal_indices) + list(defect_test_indices)
    X_test_bal = X_test[keep_test_indices]
    y_test_bal = np.array(y_test)[keep_test_indices]

    # Refit PCA
    pca = PCA(n_components=4)
    X_train_pca = pca.fit_transform(X_train)
    X_test_pca = pca.transform(X_test_bal)

    # Store
    balanced_data[category] = {
        'X_train_pca': X_train_pca,
        'y_train': y_train,
        'X_test_pca': X_test_pca,
        'y_test': y_test_bal
    }

    print(f"Train set: {len(y_train)} (0: {sum(y_train==0)}, 1: {sum(y_train==1)}) | Test set: {len(y_test_bal)} (1s: {sum(y_test_bal==1)})")


--- Balancing cable ---
Train set: 297 (0: 224, 1: 73) | Test set: 77 (1s: 19)

--- Balancing grid ---
Train set: 309 (0: 264, 1: 45) | Test set: 33 (1s: 12)

--- Balancing metal_nut ---
Train set: 294 (0: 220, 1: 74) | Test set: 41 (1s: 19)

--- Balancing transistor ---
Train set: 245 (0: 213, 1: 32) | Test set: 68 (1s: 8)


In [9]:
# Cell 8 - Classical SVM on balanced data

svm_results = {}

for category in selected_categories:
    print(f"\n--- {category.upper()} ---")

    # Get balanced and PCA-transformed data
    X_train = balanced_data[category]['X_train_pca']
    X_test = balanced_data[category]['X_test_pca']
    y_train = balanced_data[category]['y_train']
    y_test = balanced_data[category]['y_test']

    # Z-score normalization
    scaler = StandardScaler()
    X_train_std = scaler.fit_transform(X_train)
    X_test_std = scaler.transform(X_test)

    # Train classical linear SVM
    clf = SVC(kernel='linear', C=1.0)
    clf.fit(X_train_std, y_train)

    # Predict and evaluate
    y_pred = clf.predict(X_test_std)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    svm_results[category] = {
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1': f1
    }

    print(classification_report(y_test, y_pred, target_names=["Normal", "Defect"]))


--- CABLE ---
              precision    recall  f1-score   support

      Normal       0.75      1.00      0.86        58
      Defect       0.00      0.00      0.00        19

    accuracy                           0.75        77
   macro avg       0.38      0.50      0.43        77
weighted avg       0.57      0.75      0.65        77


--- GRID ---
              precision    recall  f1-score   support

      Normal       0.64      1.00      0.78        21
      Defect       0.00      0.00      0.00        12

    accuracy                           0.64        33
   macro avg       0.32      0.50      0.39        33
weighted avg       0.40      0.64      0.49        33


--- METAL_NUT ---
              precision    recall  f1-score   support

      Normal       0.71      0.23      0.34        22
      Defect       0.50      0.89      0.64        19

    accuracy                           0.54        41
   macro avg       0.61      0.56      0.49        41
weighted avg       0.61   

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [10]:
# Cell 9 - Defining quantum circuit and kernel matrix function

n_qubits = 4
dev = qml.device("default.qubit", wires=n_qubits)

@qml.qnode(dev)
def quantum_kernel_circuit(x1, x2, weights):
    # Embed input x1
    qml.AngleEmbedding(x1, wires=range(n_qubits), rotation='Y')
    for i in range(len(weights)):
        for j in range(n_qubits):
            qml.Rot(*weights[i][j], wires=j)
        for j in range(n_qubits - 1):
            qml.CNOT(wires=[j, j+1])
    qml.adjoint(qml.AngleEmbedding)(x2, wires=range(n_qubits), rotation='Y')
    return qml.probs(wires=range(n_qubits))

def compute_kernel_matrix(X1, X2):
    n1 = len(X1)
    n2 = len(X2)

    # Random but fixed weights for entanglement layers
    np.random.seed(42)
    weights = np.random.uniform(0, 2*np.pi, size=(1, n_qubits, 3))

    K = np.zeros((n1, n2))
    for i in range(n1):
        for j in range(n2):
            probs = quantum_kernel_circuit(X1[i], X2[j], weights)
            K[i, j] = probs[0]  # Fidelity proxy from overlap with |0000>

    return K


In [None]:
# Cell 10 - Training and evaluating the QSVM

qsvm_results = {}

for category in selected_categories:
    print(f"\n--- QSVM: {category.upper()} ---")

    X_train = balanced_data[category]['X_train_pca']
    X_test = balanced_data[category]['X_test_pca']
    y_train = balanced_data[category]['y_train']
    y_test = balanced_data[category]['y_test']

    # Compute quantum kernel matrices
    print("  Computing K_train...")
    K_train = compute_kernel_matrix(X_train, X_train)

    print("  Computing K_test...")
    K_test = compute_kernel_matrix(X_test, X_train)  # Note: test vs train

    # Train classical SVM with precomputed kernel
    clf = SVC(kernel='precomputed', C=1.0)
    clf.fit(K_train, y_train)

    # Predict
    y_pred = clf.predict(K_test)

    # Evaluation
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, zero_division=0)
    rec = recall_score(y_test, y_pred, zero_division=0)
    f1 = f1_score(y_test, y_pred, zero_division=0)

    qsvm_results[category] = {
        'Accuracy': acc,
        'Precision': prec,
        'Recall': rec,
        'F1': f1
    }

    print(classification_report(y_test, y_pred, target_names=["Normal", "Defect"]))


--- QSVM: CABLE ---
  Computing K_train...
  Computing K_test...
              precision    recall  f1-score   support

      Normal       0.75      0.93      0.83        58
      Defect       0.20      0.05      0.08        19

    accuracy                           0.71        77
   macro avg       0.47      0.49      0.46        77
weighted avg       0.61      0.71      0.65        77


--- QSVM: GRID ---
  Computing K_train...
  Computing K_test...


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


              precision    recall  f1-score   support

      Normal       0.64      1.00      0.78        21
      Defect       0.00      0.00      0.00        12

    accuracy                           0.64        33
   macro avg       0.32      0.50      0.39        33
weighted avg       0.40      0.64      0.49        33


--- QSVM: METAL_NUT ---
  Computing K_train...
  Computing K_test...
              precision    recall  f1-score   support

      Normal       0.54      0.91      0.68        22
      Defect       0.50      0.11      0.17        19

    accuracy                           0.54        41
   macro avg       0.52      0.51      0.43        41
weighted avg       0.52      0.54      0.44        41


--- QSVM: TRANSISTOR ---
  Computing K_train...
  Computing K_test...


After 2h15 minutes, the code was interrupted.

In [12]:
# Cell 11 - Recreating the values from the previous cell

qsvm_results = {
    "cable": {
        "Accuracy": 0.71,
        "Precision": 0.20,
        "Recall": 0.05,
        "F1": 0.08
    },
    "grid": {
        "Accuracy": 0.64,
        "Precision": 0.00,
        "Recall": 0.00,
        "F1": 0.00
    },
    "metal_nut": {
        "Accuracy": 0.54,
        "Precision": 0.50,
        "Recall": 0.11,
        "F1": 0.17
    }
}

In [13]:
# Cell 12 - Comparing the classical SVM against the QSVM

# Merge classical and quantum results
comparison = []

for category in ['cable', 'grid', 'metal_nut']:
    svm = svm_results[category]
    qsvm = qsvm_results[category]
    comparison.append({
        "Category": category,
        "SVM_Accuracy": svm['Accuracy'],
        "QSVM_Accuracy": qsvm['Accuracy'],
        "SVM_Precision": svm['Precision'],
        "QSVM_Precision": qsvm['Precision'],
        "SVM_Recall": svm['Recall'],
        "QSVM_Recall": qsvm['Recall'],
        "SVM_F1": svm['F1'],
        "QSVM_F1": qsvm['F1']
    })

# Display as DataFrame
df_comparison = pd.DataFrame(comparison)
display(df_comparison.round(3))


Unnamed: 0,Category,SVM_Accuracy,QSVM_Accuracy,SVM_Precision,QSVM_Precision,SVM_Recall,QSVM_Recall,SVM_F1,QSVM_F1
0,cable,0.753,0.71,0.0,0.2,0.0,0.05,0.0,0.08
1,grid,0.636,0.64,0.0,0.0,0.0,0.0,0.0,0.0
2,metal_nut,0.537,0.54,0.5,0.5,0.895,0.11,0.642,0.17
