# Layered Clustering: Sequential Defect Separation

- **Layer 1**: Center Intensity -> MISSING pillars
- **Layer 2**: Darkness + Area -> Collapsed/Dark defects
- **Layer 3**: Stitching Detection -> Optical stitching errors (sharp lines)
- **Layer 4**: Contextual Features -> Misshaped/Irregular

In [None]:
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.manifold import TSNE
from sklearn.neighbors import LocalOutlierFactor
from collections import Counter
from scipy import ndimage
from skimage.metrics import structural_similarity as ssim

np.random.seed(42)
plt.rcParams.update({'font.size': 10, 'figure.dpi': 100})

BASE_DIR = Path(os.getcwd())
if BASE_DIR.name == 'notebooks':
    BASE_DIR = BASE_DIR.parent
OUTPUT_DIR = BASE_DIR / "Meta_Atoms"
COLORS = ['#2ecc71', '#e74c3c', '#3498db', '#9b59b6', '#f39c12', '#1abc9c']

## Load Tiles

In [None]:
def segment_array(image_path, output_dir, grid_size=21, tile_size=32):
    img = cv2.imread(str(image_path))
    if img is None:
        return
    H, W = img.shape[:2]
    base_name = os.path.splitext(os.path.basename(image_path))[0]
    array_dir = os.path.join(output_dir, base_name)
    os.makedirs(array_dir, exist_ok=True)
    x_spacing, y_spacing = W / grid_size, H / grid_size
    for r in range(grid_size):
        for c in range(grid_size):
            cx, cy = int((c + 0.5) * x_spacing), int((r + 0.5) * y_spacing)
            x1, y1 = max(0, cx - tile_size), max(0, cy - tile_size)
            x2, y2 = min(W, cx + tile_size), min(H, cy + tile_size)
            tile = img[y1:y2, x1:x2]
            cv2.imwrite(os.path.join(array_dir, f"{base_name}_{r+1},{c+1}.bmp"), tile)

OUTPUT_DIR.mkdir(exist_ok=True)
for arr in ["Array_1Crop.bmp", "Array_2Crop.bmp", "Array_3Crop.bmp"]:
    arr_path = BASE_DIR / arr
    if arr_path.exists() and not (OUTPUT_DIR / arr.replace('.bmp', '')).exists():
        segment_array(str(arr_path), str(OUTPUT_DIR))

tile_data = []
for array_name in ["Array_1Crop", "Array_2Crop", "Array_3Crop"]:
    array_dir = OUTPUT_DIR / array_name
    if not array_dir.exists():
        continue
    for fpath in array_dir.glob("*.bmp"):
        img = cv2.imread(str(fpath), cv2.IMREAD_GRAYSCALE)
        if img is None:
            continue
        fname = fpath.stem
        try:
            coords = fname.split('_')[-1]
            row, col = map(int, coords.split(','))
        except:
            row, col = -1, -1
        tile_data.append({'array': array_name, 'filename': fpath.name, 'filepath': str(fpath),
                          'row': row, 'col': col, 'image': img, 'defect_type': 'Unknown'})

print(f"Loaded {len(tile_data)} tiles")

## Helper Functions

In [None]:
def cluster_and_visualize(tiles, X, method_name, n_clusters=3, n_samples=80):
    X_clean = np.nan_to_num(X, nan=0, posinf=0, neginf=0)
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X_clean)
    n_comp = min(5, X_scaled.shape[1])
    pca = PCA(n_components=n_comp)
    X_pca = pca.fit_transform(X_scaled)
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_pca)
    for i, tile in enumerate(tiles):
        tile[f'cluster_{method_name}'] = labels[i]
    print(f"Computing t-SNE for {method_name}...")
    tsne = TSNE(n_components=2, perplexity=min(30, len(tiles)//4), random_state=42, n_iter=500)
    X_tsne = tsne.fit_transform(X_pca[:, :min(3, n_comp)])
    
    fig, axes = plt.subplots(1, 3, figsize=(15, 4))
    for c in range(n_clusters):
        mask = labels == c
        axes[0].scatter(X_pca[mask, 0], X_pca[mask, 1], c=COLORS[c], label=f'C{c} (n={mask.sum()})', alpha=0.6, s=15)
        axes[1].scatter(X_tsne[mask, 0], X_tsne[mask, 1], c=COLORS[c], label=f'C{c} (n={mask.sum()})', alpha=0.6, s=15)
    axes[0].set_xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.1%})'); axes[0].set_ylabel(f'PC2')
    axes[0].set_title('PCA'); axes[0].legend(fontsize=9)
    axes[1].set_xlabel('t-SNE 1'); axes[1].set_ylabel('t-SNE 2'); axes[1].set_title('t-SNE'); axes[1].legend(fontsize=9)
    counts = [np.sum(labels == c) for c in range(n_clusters)]
    axes[2].bar(range(n_clusters), counts, color=COLORS[:n_clusters], edgecolor='black')
    axes[2].set_xticks(range(n_clusters)); axes[2].set_title('Cluster Sizes')
    for i, v in enumerate(counts): axes[2].text(i, v + 5, str(v), ha='center', fontweight='bold')
    plt.suptitle(f'{method_name}', fontsize=14, fontweight='bold'); plt.tight_layout(); plt.show()
    
    n_cols, n_rows = 10, n_samples // 10
    for c in range(n_clusters):
        cluster_tiles = [t for t in tiles if t[f'cluster_{method_name}'] == c]
        np.random.shuffle(cluster_tiles)
        fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols, n_rows))
        for i, ax in enumerate(axes.flatten()):
            if i < len(cluster_tiles): ax.imshow(cluster_tiles[i]['image'], cmap='gray')
            ax.axis('off')
        plt.suptitle(f'{method_name} - C{c}: {len(cluster_tiles)}', fontsize=12, fontweight='bold', color=COLORS[c])
        plt.tight_layout(); plt.show()
    return labels

def show_extracted_defects(tiles, title, n_samples=100):
    n_cols, n_rows = 10, (min(n_samples, len(tiles)) + 9) // 10
    fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols, n_rows))
    samples = tiles.copy(); np.random.shuffle(samples)
    for i, ax in enumerate(axes.flatten()):
        if i < len(samples): ax.imshow(samples[i]['image'], cmap='gray')
        ax.axis('off')
    plt.suptitle(f'{title}: {len(tiles)} total', fontsize=14, fontweight='bold', color='red')
    plt.tight_layout(); plt.show()

---
# LAYER 1: Extract MISSING (Center Intensity)

In [None]:
def extract_center_intensity(tiles):
    features = []
    for tile in tiles:
        img = tile['image']
        h, w = img.shape
        ch, cw = h // 2, w // 2
        start_h, start_w = h // 4, w // 4
        center = img[start_h:start_h+ch, start_w:start_w+cw]
        mean_c, std_c, min_c, max_c = np.mean(center), np.std(center), np.min(center), np.max(center)
        edge_mask = np.ones_like(img, dtype=bool)
        edge_mask[start_h:start_h+ch, start_w:start_w+cw] = False
        mean_e = np.mean(img[edge_mask])
        features.append([mean_c, std_c, min_c, max_c, mean_e, mean_c - mean_e])
    return np.array(features)

print("="*70)
print("LAYER 1: MISSING DETECTION")
print("="*70)
X_layer1 = extract_center_intensity(tile_data)
labels_layer1 = cluster_and_visualize(tile_data, X_layer1, 'Layer1_Missing', n_clusters=3, n_samples=80)

In [None]:
cluster_counts = Counter(labels_layer1)
MISSING_CLUSTER = min(cluster_counts, key=cluster_counts.get)
print("Cluster sizes:")
for c in sorted(cluster_counts.keys()):
    print(f"  C{c}: {cluster_counts[c]}{' <-- MISSING' if c == MISSING_CLUSTER else ''}")

missing_tiles = [t for t in tile_data if t['cluster_Layer1_Missing'] == MISSING_CLUSTER]
layer2_tiles = [t for t in tile_data if t['cluster_Layer1_Missing'] != MISSING_CLUSTER]
for t in missing_tiles: t['defect_type'] = 'Missing'
print(f"\nMISSING: {len(missing_tiles)} | Remaining: {len(layer2_tiles)}")
show_extracted_defects(missing_tiles, 'LAYER 1: MISSING', n_samples=100)

---
# LAYER 2: Extract Collapsed/Dark (Darkness + Area)

In [None]:
def extract_darkness_area(tiles):
    features = []
    for tile in tiles:
        img = tile['image']
        mean_int = np.mean(img)
        dark_ratio = np.sum(img < 80) / img.size
        very_dark_ratio = np.sum(img < 50) / img.size
        _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        area = cv2.contourArea(max(contours, key=cv2.contourArea)) if contours else 0
        features.append([mean_int, dark_ratio, very_dark_ratio, area, area/img.size])
    return np.array(features)

print("\n" + "="*70)
print("LAYER 2: DARKNESS + AREA")
print("="*70)
X_layer2 = extract_darkness_area(layer2_tiles)
labels_layer2 = cluster_and_visualize(layer2_tiles, X_layer2, 'Layer2_Darkness', n_clusters=3, n_samples=80)

In [None]:
cluster_counts2 = Counter(labels_layer2)
DEFECT2_CLUSTER = min(cluster_counts2, key=cluster_counts2.get)
DEFECT2_NAME = 'Collapsed'

print("Cluster sizes:")
for c in sorted(cluster_counts2.keys()):
    print(f"  C{c}: {cluster_counts2[c]}{f' <-- {DEFECT2_NAME}' if c == DEFECT2_CLUSTER else ''}")

defect2_tiles = [t for t in layer2_tiles if t['cluster_Layer2_Darkness'] == DEFECT2_CLUSTER]
layer3_tiles = [t for t in layer2_tiles if t['cluster_Layer2_Darkness'] != DEFECT2_CLUSTER]
for t in defect2_tiles: t['defect_type'] = DEFECT2_NAME
print(f"\n{DEFECT2_NAME}: {len(defect2_tiles)} | Remaining: {len(layer3_tiles)}")
show_extracted_defects(defect2_tiles, f'LAYER 2: {DEFECT2_NAME.upper()}', n_samples=100)

---
# LAYER 3: Optical Stitching Errors

Detect tiles where the microscope measurement cuts off at the meta-atom boundary.
These have a stark contrast line (horizontal or vertical) from image stitching.

In [None]:
def extract_stitching_features(tiles):
    """
    Detect stitching artifacts: sharp horizontal or vertical lines of contrast.
    
    Features:
    - Max row/column gradient (sharp transitions)
    - Edge intensity difference (left/right, top/bottom halves)
    - Line detection via projection profiles
    """
    features = []
    
    for tile in tiles:
        img = tile['image'].astype(float)
        h, w = img.shape
        
        # 1. Row-wise and column-wise mean intensity profiles
        row_profile = np.mean(img, axis=1)  # Average intensity per row
        col_profile = np.mean(img, axis=0)  # Average intensity per column
        
        # 2. Gradient of profiles (detect sharp jumps)
        row_grad = np.abs(np.diff(row_profile))
        col_grad = np.abs(np.diff(col_profile))
        
        max_row_jump = np.max(row_grad) if len(row_grad) > 0 else 0
        max_col_jump = np.max(col_grad) if len(col_grad) > 0 else 0
        max_jump = max(max_row_jump, max_col_jump)
        
        # Location of max jump (near edge = more likely stitching)
        row_jump_loc = np.argmax(row_grad) / h if len(row_grad) > 0 else 0.5
        col_jump_loc = np.argmax(col_grad) / w if len(col_grad) > 0 else 0.5
        
        # 3. Half-image intensity differences
        top_half = np.mean(img[:h//2, :])
        bottom_half = np.mean(img[h//2:, :])
        left_half = np.mean(img[:, :w//2])
        right_half = np.mean(img[:, w//2:])
        
        vertical_split = np.abs(top_half - bottom_half)
        horizontal_split = np.abs(left_half - right_half)
        max_split = max(vertical_split, horizontal_split)
        
        # 4. Edge strip analysis (look at border regions)
        border = 5  # pixels from edge
        top_strip = np.mean(img[:border, :])
        bottom_strip = np.mean(img[-border:, :])
        left_strip = np.mean(img[:, :border])
        right_strip = np.mean(img[:, -border:])
        center_region = np.mean(img[h//4:3*h//4, w//4:3*w//4])
        
        edge_center_diff = max(
            np.abs(top_strip - center_region),
            np.abs(bottom_strip - center_region),
            np.abs(left_strip - center_region),
            np.abs(right_strip - center_region)
        )
        
        # 5. Sobel edge detection - look for strong horizontal/vertical edges
        sobel_h = cv2.Sobel(img.astype(np.uint8), cv2.CV_64F, 0, 1, ksize=3)  # Horizontal edges
        sobel_v = cv2.Sobel(img.astype(np.uint8), cv2.CV_64F, 1, 0, ksize=3)  # Vertical edges
        
        # Max edge response along rows/columns
        max_horiz_edge = np.max(np.abs(sobel_h).mean(axis=1))
        max_vert_edge = np.max(np.abs(sobel_v).mean(axis=0))
        
        # Store stitching score
        stitching_score = max_jump + max_split + edge_center_diff
        tile['stitching_score'] = stitching_score
        
        features.append([
            max_row_jump, max_col_jump, max_jump,
            vertical_split, horizontal_split, max_split,
            edge_center_diff,
            max_horiz_edge, max_vert_edge,
            stitching_score
        ])
    
    return np.array(features)

print("\n" + "="*70)
print("LAYER 3: STITCHING ERROR DETECTION")
print("="*70)

X_stitching = extract_stitching_features(layer3_tiles)
print(f"Features shape: {X_stitching.shape}")
print(f"Stitching scores: min={X_stitching[:,-1].min():.1f}, max={X_stitching[:,-1].max():.1f}, mean={X_stitching[:,-1].mean():.1f}")

In [None]:
# Cluster to find stitching errors
labels_stitch = cluster_and_visualize(layer3_tiles, X_stitching, 'Layer3_Stitching', n_clusters=3, n_samples=80)

In [None]:
# Extract smallest cluster as stitching errors
cluster_counts3 = Counter(labels_stitch)
STITCH_CLUSTER = min(cluster_counts3, key=cluster_counts3.get)

print("Cluster sizes:")
for c in sorted(cluster_counts3.keys()):
    print(f"  C{c}: {cluster_counts3[c]}{' <-- STITCHING' if c == STITCH_CLUSTER else ''}")

stitching_tiles = [t for t in layer3_tiles if t['cluster_Layer3_Stitching'] == STITCH_CLUSTER]
layer4_tiles = [t for t in layer3_tiles if t['cluster_Layer3_Stitching'] != STITCH_CLUSTER]

for t in stitching_tiles: t['defect_type'] = 'Stitching'

print(f"\nSTITCHING errors: {len(stitching_tiles)} | Remaining: {len(layer4_tiles)}")

In [None]:
show_extracted_defects(stitching_tiles, 'LAYER 3: STITCHING ERRORS', n_samples=100)

---
# LAYER 4: Contextual / Relational Features

Detect deviation from fabrication context for misshaped pillars.

In [None]:
print(f"\nLAYER 4: Processing {len(layer4_tiles)} remaining tiles with CONTEXTUAL features...")

# Build spatial index for neighbor lookup
tile_index = {}
for t in layer4_tiles:
    key = (t['array'], t['row'], t['col'])
    tile_index[key] = t

## Feature 1: Rotation Symmetry

In [None]:
def compute_rotation_symmetry(tiles):
    features = []
    for tile in tiles:
        img = tile['image'].astype(float)
        h, w = img.shape
        s = min(h, w)
        img_sq = img[:s, :s]
        
        rotations = [img_sq, np.rot90(img_sq, 1), np.rot90(img_sq, 2), np.rot90(img_sq, 3)]
        
        l2_diffs, ssim_scores = [], []
        for i in range(4):
            for j in range(i+1, 4):
                l2_diffs.append(np.sqrt(np.mean((rotations[i] - rotations[j])**2)))
                try:
                    ssim_scores.append(ssim(rotations[i], rotations[j], data_range=255))
                except:
                    ssim_scores.append(1.0)
        
        tile['rotation_asymmetry'] = np.mean(l2_diffs)
        features.append([np.mean(l2_diffs), np.max(l2_diffs), 1 - np.mean(ssim_scores), 1 - np.min(ssim_scores)])
    return np.array(features)

print("Computing rotation symmetry...")
X_rotation = compute_rotation_symmetry(layer4_tiles)
print(f"  Rotation asymmetry range: {X_rotation[:,0].min():.2f} - {X_rotation[:,0].max():.2f}")

## Feature 2: Neighbor Deviation

In [None]:
def compute_base_features(img):
    mean_int, std_int = np.mean(img), np.std(img)
    _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        cnt = max(contours, key=cv2.contourArea)
        area = cv2.contourArea(cnt)
        perim = cv2.arcLength(cnt, True)
        circ = (4 * np.pi * area) / (perim**2) if perim > 0 else 0
    else:
        area, circ = 0, 0
    return np.array([mean_int, std_int, area / img.size, circ])

def compute_neighbor_deviation(tiles, tile_index):
    base_features = {(t['array'], t['row'], t['col']): compute_base_features(t['image']) for t in tiles}
    
    features = []
    for t in tiles:
        arr, r, c = t['array'], t['row'], t['col']
        my_feat = base_features[(arr, r, c)]
        
        neighbor_feats = []
        for dr in [-1, 0, 1]:
            for dc in [-1, 0, 1]:
                if dr == 0 and dc == 0: continue
                key = (arr, r + dr, c + dc)
                if key in base_features:
                    neighbor_feats.append(base_features[key])
        
        if neighbor_feats:
            deviation = my_feat - np.mean(neighbor_feats, axis=0)
            deviation_norm = np.linalg.norm(deviation)
        else:
            deviation, deviation_norm = np.zeros(4), 0
        
        t['neighbor_deviation'] = deviation_norm
        features.append(np.concatenate([deviation, [deviation_norm]]))
    return np.array(features)

print("Computing neighbor deviation...")
X_neighbor = compute_neighbor_deviation(layer4_tiles, tile_index)
print(f"  Neighbor deviation range: {X_neighbor[:,-1].min():.2f} - {X_neighbor[:,-1].max():.2f}")

## Feature 3: Anisotropy

In [None]:
def compute_anisotropy(tiles):
    features = []
    for tile in tiles:
        img = tile['image'].astype(float)
        gx = cv2.Sobel(img, cv2.CV_64F, 1, 0, ksize=3)
        gy = cv2.Sobel(img, cv2.CV_64F, 0, 1, ksize=3)
        
        Ixx = ndimage.gaussian_filter(gx * gx, sigma=2)
        Iyy = ndimage.gaussian_filter(gy * gy, sigma=2)
        Ixy = ndimage.gaussian_filter(gx * gy, sigma=2)
        
        Ixx_sum, Iyy_sum, Ixy_sum = np.sum(Ixx), np.sum(Iyy), np.sum(Ixy)
        trace = Ixx_sum + Iyy_sum
        det = Ixx_sum * Iyy_sum - Ixy_sum**2
        
        if trace > 0:
            discriminant = max(0, trace**2 - 4*det)
            lambda1 = (trace + np.sqrt(discriminant)) / 2
            lambda2 = (trace - np.sqrt(discriminant)) / 2
            aniso = (lambda1 - lambda2) / (lambda1 + lambda2 + 1e-10)
            coherence = aniso**2
        else:
            aniso, coherence, lambda1, lambda2 = 0, 0, 0, 0
        
        orientation = 0.5 * np.arctan2(2 * Ixy_sum, Ixx_sum - Iyy_sum)
        tile['anisotropy'] = aniso
        features.append([aniso, coherence, np.abs(orientation), lambda1/(lambda2+1e-10)])
    return np.array(features)

print("Computing anisotropy...")
X_aniso = compute_anisotropy(layer4_tiles)
print(f"  Anisotropy range: {X_aniso[:,0].min():.3f} - {X_aniso[:,0].max():.3f}")

## Feature 4: Curvature Irregularity

In [None]:
def compute_curvature_irregularity(tiles):
    features = []
    for tile in tiles:
        img = tile['image']
        _, binary = cv2.threshold(img, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
        contours, _ = cv2.findContours(binary, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours and len(max(contours, key=cv2.contourArea)) >= 10:
            cnt = max(contours, key=cv2.contourArea).squeeze()
            if len(cnt.shape) == 1: cnt = cnt.reshape(-1, 2)
            n = len(cnt)
            if n < 10:
                tile['curvature_irregularity'] = 0
                features.append([0, 0, 0, 0, 0])
                continue
            
            dx, dy = np.gradient(cnt[:, 0].astype(float)), np.gradient(cnt[:, 1].astype(float))
            ddx, ddy = np.gradient(dx), np.gradient(dy)
            denom = (dx**2 + dy**2)**1.5 + 1e-10
            kappa = (dx * ddy - dy * ddx) / denom
            
            kappa_abs = np.abs(kappa)
            kurtosis = np.mean(kappa_abs**4) / (np.mean(kappa_abs**2)**2 + 1e-10) - 3
            max_curv, p95_curv = np.max(kappa_abs), np.percentile(kappa_abs, 95)
            zero_crossings = np.sum(np.diff(np.sign(kappa)) != 0) / n
            curv_var = np.var(kappa)
            
            tile['curvature_irregularity'] = kurtosis
            features.append([kurtosis, max_curv, p95_curv, zero_crossings, curv_var])
        else:
            tile['curvature_irregularity'] = 0
            features.append([0, 0, 0, 0, 0])
    return np.array(features)

print("Computing curvature irregularity...")
X_curvature = compute_curvature_irregularity(layer4_tiles)
print(f"  Kurtosis range: {X_curvature[:,0].min():.2f} - {X_curvature[:,0].max():.2f}")

## Feature 5: Radial Deviation

In [None]:
def compute_radial_profile(img, n_bins=8):
    h, w = img.shape
    cy, cx = h // 2, w // 2
    y, x = np.ogrid[:h, :w]
    r = np.sqrt((x - cx)**2 + (y - cy)**2)
    max_r = np.sqrt(cx**2 + cy**2)
    bin_edges = np.linspace(0, max_r, n_bins + 1)
    profile = []
    for i in range(n_bins):
        mask = (r >= bin_edges[i]) & (r < bin_edges[i+1])
        profile.append(np.mean(img[mask]) if np.sum(mask) > 0 else 0)
    return np.array(profile)

def compute_radial_deviation(tiles):
    profiles = np.array([compute_radial_profile(t['image'].astype(float)) for t in tiles])
    mean_profile = np.mean(profiles, axis=0)
    
    features = []
    for i, t in enumerate(tiles):
        deviation = profiles[i] - mean_profile
        dev_norm = np.linalg.norm(deviation)
        t['radial_deviation'] = dev_norm
        features.append([dev_norm, np.max(np.abs(deviation)), np.abs(deviation[0]), np.abs(deviation[-1])])
    return np.array(features)

print("Computing radial deviation...")
X_radial_dev = compute_radial_deviation(layer4_tiles)
print(f"  Radial deviation range: {X_radial_dev[:,0].min():.2f} - {X_radial_dev[:,0].max():.2f}")

## Combine + Compute Inconsistency Score

In [None]:
X_contextual = np.hstack([X_rotation, X_neighbor, X_aniso, X_curvature, X_radial_dev])
print(f"\nCombined contextual features: {X_contextual.shape}")

X_contextual_clean = np.nan_to_num(X_contextual, nan=0, posinf=0, neginf=0)
scaler = StandardScaler()
X_norm = scaler.fit_transform(X_contextual_clean)

print("Computing LOF scores...")
lof = LocalOutlierFactor(n_neighbors=20, contamination='auto')
lof_labels = lof.fit_predict(X_norm)
lof_scores = -lof.negative_outlier_factor_

for i, t in enumerate(layer4_tiles):
    t['inconsistency_score'] = lof_scores[i]
    t['is_outlier'] = lof_labels[i] == -1

n_outliers = np.sum(lof_labels == -1)
print(f"\nLOF outliers: {n_outliers} ({100*n_outliers/len(layer4_tiles):.1f}%)")

In [None]:
# Visualize
print("Computing t-SNE...")
pca = PCA(n_components=min(10, X_norm.shape[1]))
X_pca = pca.fit_transform(X_norm)
tsne = TSNE(n_components=2, perplexity=30, random_state=42, n_iter=500)
X_tsne = tsne.fit_transform(X_pca[:, :5])

fig, axes = plt.subplots(1, 3, figsize=(15, 4))
sc = axes[0].scatter(X_tsne[:, 0], X_tsne[:, 1], c=lof_scores, cmap='RdYlGn_r', alpha=0.6, s=15)
plt.colorbar(sc, ax=axes[0], label='Inconsistency')
axes[0].set_title('t-SNE by Inconsistency')

inliers, outliers = lof_labels == 1, lof_labels == -1
axes[1].scatter(X_tsne[inliers, 0], X_tsne[inliers, 1], c='green', alpha=0.4, s=10, label=f'Inliers ({inliers.sum()})')
axes[1].scatter(X_tsne[outliers, 0], X_tsne[outliers, 1], c='red', alpha=0.8, s=20, label=f'Outliers ({outliers.sum()})')
axes[1].legend(); axes[1].set_title('LOF Detection')

axes[2].hist(lof_scores, bins=50, color='steelblue', edgecolor='black', alpha=0.7)
axes[2].axvline(np.percentile(lof_scores, 90), color='red', linestyle='--', label='90th %')
axes[2].set_xlabel('Score'); axes[2].legend(); axes[2].set_title('Score Distribution')

plt.suptitle('Layer 4: Contextual Inconsistency', fontsize=14, fontweight='bold')
plt.tight_layout(); plt.show()

In [None]:
# Top inconsistent
sorted_tiles = sorted(layer4_tiles, key=lambda t: t['inconsistency_score'], reverse=True)

fig, axes = plt.subplots(10, 10, figsize=(10, 10))
for i, ax in enumerate(axes.flatten()):
    if i < len(sorted_tiles):
        ax.imshow(sorted_tiles[i]['image'], cmap='gray')
        ax.set_title(f"{sorted_tiles[i]['inconsistency_score']:.1f}", fontsize=7)
    ax.axis('off')
plt.suptitle('Top 100 Most Inconsistent', fontsize=14, fontweight='bold', color='red')
plt.tight_layout(); plt.show()

In [None]:
# Most consistent (good reference)
fig, axes = plt.subplots(10, 10, figsize=(10, 10))
for i, ax in enumerate(axes.flatten()):
    idx = -(i+1)
    if abs(idx) <= len(sorted_tiles):
        ax.imshow(sorted_tiles[idx]['image'], cmap='gray')
        ax.set_title(f"{sorted_tiles[idx]['inconsistency_score']:.1f}", fontsize=7)
    ax.axis('off')
plt.suptitle('Top 100 Most Consistent (Good)', fontsize=14, fontweight='bold', color='green')
plt.tight_layout(); plt.show()

---
# Summary

In [None]:
print("\n" + "="*70)
print("LAYERED CLUSTERING SUMMARY")
print("="*70)
print(f"Layer 1 - MISSING: {len(missing_tiles)}")
print(f"Layer 2 - {DEFECT2_NAME}: {len(defect2_tiles)}")
print(f"Layer 3 - STITCHING: {len(stitching_tiles)}")
print(f"Layer 4 - Remaining: {len(layer4_tiles)} (LOF outliers: {n_outliers})")