In [1]:
%load_ext autoreload
%autoreload 2
import notebook_setup
from src.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR, RAW_DATA_DIR, EXTERNAL_DATA_DIR
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"

[32m2025-09-16 10:44:50.113[0m | [1mINFO    [0m | [36msrc.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: D:\workspace\projects\freelance\Fusion3DNet[0m


Project root added to path: d:\workspace\projects\freelance\Fusion3DNet


In [2]:
BREPNET_NPZ_DIR = INTERIM_DATA_DIR / "features" / "brepnet"
DINO_NPZ_DIR = INTERIM_DATA_DIR / "features" / "dino"
STEP_DIR = PROCESSED_DATA_DIR / "dataset_129" / "stp"
STATS_BREPNET = INTERIM_DATA_DIR / 'features' / "brepnet_feature_standard.json"

In [3]:
from src.data_loader.custom_dataset import CustomDataset

dataset = CustomDataset(
    step_data=STEP_DIR,
    brepnet_data=BREPNET_NPZ_DIR,
    dino_data=DINO_NPZ_DIR,
    compute_stats=True,
    apply_standardization=True,
)

print(f"Dataset length: {len(dataset)}")


Loading dataset: 100%|██████████| 129/129 [00:03<00:00, 36.15it/s]

Dataset length: 129





In [58]:
query_id = 37

dataset_item = dataset[query_id]
print(f"Dataset item keys: {list(dataset_item.keys())}")
print(f'Model ID: {dataset_item["model_id"]}')
print(f'STEP file path: {dataset_item["step_path"]}')

print(f"BRepNet features shape: {dataset_item['brepnet_features'].shape}")
print(f"DINO features shape: {dataset_item['dino_features'].shape}")


Dataset item keys: ['model_id', 'step_path', 'brepnet_features', 'dino_features']
Model ID: 44. Extractor Pin-04
STEP file path: D:\workspace\projects\freelance\Fusion3DNet\data\processed\dataset_129\stp\44. Extractor Pin-04.prt.stp
BRepNet features shape: (7, 17)
DINO features shape: (8, 384)


In [59]:
from src.visualization.jupyter_segmentation_viewer import JupyterSegmentationViewer
from pathlib import Path

viewer = JupyterSegmentationViewer(Path(dataset_item["step_path"]))

assert dataset_item['brepnet_features'].shape[0] == len(viewer.entity_mapper.face_map), "Embedding size doesn't match solid"

all_face_indices = list(range(dataset_item['brepnet_features'].shape[0]))
viewer.selection_list.extend(all_face_indices)
selected_faces_features = dataset_item['brepnet_features'][viewer.selection_list]

assert len(viewer.selection_list) > 0, "Please select some faces on the solid"

# viewer.view_solid()

print(f"Selected faces indices: {selected_faces_features.shape}")
print(f"Selected faces features: {dataset_item['brepnet_features'][viewer.selection_list].shape}")


Selected faces indices: (7, 17)
Selected faces features: (7, 17)


In [60]:
import numpy as np

def cosine_sim_matrix(A, B):
    """
    A: (N1, D)
    B: (N2, D)
    Возвращает матрицу косинусных сходств (N1, N2)
    """
    A_norm = A / (np.linalg.norm(A, axis=1, keepdims=True) + 1e-8)
    B_norm = B / (np.linalg.norm(B, axis=1, keepdims=True) + 1e-8)
    return np.dot(A_norm, B_norm.T)

def top_k_brepnet_euclidean_similarities(query_faces, dataset, topk=5):
    """
    Возвращает топ-k наиболее похожих моделей из датасета по BRepNet признакам (евклидово расстояние).
    :param query_faces: np.ndarray, shape (N_query_faces, feature_dim)
    :param dataset: список или объект с .__getitem__ и .__len__, где каждый элемент содержит 'brepnet_features' и 'model_id'
    :param topk: int, сколько топовых моделей возвращать
    :return: interval, percent_similarities, indices_of_smallest, model_ids
    """
    sum_min_dists_for_each_solid = []
    min_dists_for_each_face = []

    for item in dataset:
        min_dists = []
        min_dists_for_each_face_in_this_solid = []
        for query_face_feature in query_faces:
            vec = item['brepnet_features'] - query_face_feature  # (N_faces_in_item, feature_dim)
            dist = np.linalg.norm(vec, axis=1)  # (N_faces_in_item,)
            min_dists_for_each_face_in_this_solid.append(dist)
            min_dists.append(np.min(dist))
        sum_dists = np.sum(min_dists)
        sum_min_dists_for_each_solid.append(sum_dists)
        min_dists_for_each_face_in_this_solid = np.min(np.stack(min_dists_for_each_face_in_this_solid), axis=0)
        min_dists_for_each_face.append(min_dists_for_each_face_in_this_solid)

    sum_min_dists_for_each_solid = np.array(sum_min_dists_for_each_solid)
    indices_of_smallest = np.argpartition(sum_min_dists_for_each_solid, kth=range(topk))[:topk]

    # Для отображения диапазона расстояний по топ-k
    all_dists_top_k = [min_dists_for_each_face[i] for i in indices_of_smallest]
    all_dists_top_k = np.concatenate(all_dists_top_k)
    interval = [all_dists_top_k.min(), all_dists_top_k.max()]

    min_dist = sum_min_dists_for_each_solid.min()
    max_dist = sum_min_dists_for_each_solid.max()
    percent_similarities = 1 - (sum_min_dists_for_each_solid - min_dist) / (max_dist - min_dist + 1e-8)
    percent_similarities = percent_similarities * 100  # в проценты

    model_ids = [dataset.data[index]["model_id"] for index in indices_of_smallest]

    return interval, percent_similarities, indices_of_smallest, model_ids, min_dists_for_each_face

def top_k_brepnet_cosine_similarities(query_faces, dataset, topk=5):
    """
    Возвращает топ-k наиболее похожих моделей из датасета по BRepNet признакам (косинусное сходство).
    :param query_faces: np.ndarray, shape (N_query_faces, feature_dim)
    :param dataset: список или объект с .__getitem__ и .__len__, где каждый элемент содержит 'brepnet_features' и 'model_id'
    :param topk: int, сколько топовых моделей возвращать
    :return: interval, percent_similarities, indices_of_largest, model_ids
    """
    mean_max_similarities = []
    all_max_similarities = []

    for item in dataset:
        sim_matrix = cosine_sim_matrix(query_faces, item['brepnet_features'])  # (N_query, N_item)
        max_similarities = sim_matrix.max(axis=1)  # для каждой query-грани — максимум по solid
        mean_max_similarities.append(max_similarities.mean())
        all_max_similarities.append(max_similarities)

    mean_max_similarities = np.array(mean_max_similarities)
    indices_of_largest = np.argpartition(-mean_max_similarities, kth=range(topk))[:topk]

    # Для отображения диапазона по топ-k
    all_max_similarities_top_k = [all_max_similarities[i] for i in indices_of_largest]
    all_max_similarities_top_k = np.concatenate(all_max_similarities_top_k)
    interval = [all_max_similarities_top_k.min(), all_max_similarities_top_k.max()]

    percent_similarities = mean_max_similarities * 100  # в проценты
    model_ids = [dataset.data[index]["model_id"] for index in indices_of_largest]

    return interval, percent_similarities, indices_of_largest, model_ids

def top_k_dino_similarities(query_dino, dataset, topk=5):
    """
    Возвращает топ-k наиболее похожих моделей из датасета по DINO признакам.
    :param query_dino: np.ndarray, shape (8, 384) — признаки запроса
    :param dataset: объект с .__getitem__ и .__len__, где каждый элемент содержит 'dino_features' и 'model_id'
    :param topk: int, сколько топовых моделей возвращать
    :return: similarities, indices_of_largest, model_ids
    """
    similarities = []
    for item in dataset:
        # Косинусное сходство между всеми проекциями (8x8)
        sim_matrix = cosine_sim_matrix(query_dino, item['dino_features'])  # (8, 8)
        sim_score = sim_matrix.max(axis=1).mean()
        similarities.append(sim_score)
    similarities = np.array(similarities)
    indices_of_largest = np.argpartition(-similarities, kth=range(topk))[:topk]
    model_ids = [dataset.data[index]["model_id"] for index in indices_of_largest]
    return similarities, indices_of_largest, model_ids

In [61]:
similarities, indices_of_largest, model_ids = top_k_dino_similarities(
    dataset_item['dino_features'], dataset, topk=20
)
for i, (idx, sim, mid) in enumerate(zip(indices_of_largest, similarities[indices_of_largest], model_ids)):
    print(f"{i}. {mid} | similarity: {sim*100:.1f}%")

0. 44. Extractor Pin-04 | similarity: 100.0%
1. 44. Extractor Pin-05 | similarity: 96.1%
2. 44. Extractor Pin-06 | similarity: 95.7%
3. 44. Extractor Pin-07 | similarity: 95.2%
4. 44. Extractor Pin-02 | similarity: 89.9%
5. 44. Extractor Pin-03 | similarity: 89.9%
6. 44. Extractor Pin | similarity: 89.8%
7. 44. Extractor Pin-01 | similarity: 86.7%
8. 44. Extractor Pin-10 | similarity: 86.5%
9. 44. Extractor Pin-08 | similarity: 86.0%
10. 42. Silencer Fix-09 | similarity: 85.6%
11. 44. Extractor Pin-09 | similarity: 85.2%
12. 42. Silencer Fix-06 | similarity: 83.7%
13. 42. Silencer Fix-07 | similarity: 83.5%
14. 42. Silencer Fix-04 | similarity: 83.2%
15. Кожух 10 | similarity: 83.2%
16. 42. Silencer Fix-05 | similarity: 83.1%
17. Кожух 6 | similarity: 82.9%
18. 42. Silencer Fix-08 | similarity: 82.8%
19. 42. Silencer Fix-10 | similarity: 82.7%


In [None]:
import numpy as np
from functools import lru_cache

def brep_scores_all_for_query(query_idx, dataset):
    """
    Возвращает:
      - sim: np.ndarray, shape (N,) — нормализованный (min->0, max->1) скор сходства BRepNet по всем объектам
      - per_item_face_mins: list[np.ndarray], длина N, для каждого объекта — массив (N_faces_in_item,)
        с минимальными расстояниями от любой query-граня к каждой face кандидата (для теплокарты).
    """
    q_item = dataset[query_idx]
    q_faces = q_item['brepnet_features']  # (Nq, D)

    sum_min_dists_for_each_solid = []
    per_item_face_mins = []  # для визуализации: min по query-граням для каждой face кандидата

    for item in dataset:
        cand_faces = item['brepnet_features']  # (Ni, D)
        # dists: (Nq, Ni)
        dists = np.linalg.norm(cand_faces[None, :, :] - q_faces[:, None, :], axis=2)
        # Для интегрального расстояния: мин по кандидат-фейсам для каждой query-грани и сумма
        min_over_item_for_each_query_face = dists.min(axis=1)  # (Nq,)
        sum_min = min_over_item_for_each_query_face.sum()
        sum_min_dists_for_each_solid.append(sum_min)

        # Для теплокарты: мин по query-граням для каждой face кандидата
        min_over_query_for_each_item_face = dists.min(axis=0)  # (Ni,)
        per_item_face_mins.append(min_over_query_for_each_item_face)

    sum_min_dists_for_each_solid = np.array(sum_min_dists_for_each_solid)
    # Преобразуем расстояние в сходство [0, 1]
    mn, mx = sum_min_dists_for_each_solid.min(), sum_min_dists_for_each_solid.max()
    sim = 1.0 - (sum_min_dists_for_each_solid - mn) / (mx - mn + 1e-8)
    return sim, per_item_face_mins


def dino_scores_all_for_query(query_idx, dataset):
    """
    Возвращает:
      - sims: np.ndarray, shape (N,) — средний максимум косинусного сходства по 8x8 проекциям
    """
    q_item = dataset[query_idx]
    q_dino = q_item['dino_features']  # (8, D)
    sims = []
    for item in dataset:
        sim_matrix = cosine_sim_matrix(q_dino, item['dino_features'])  # (8, 8)
        sims.append(sim_matrix.max(axis=1).mean())
    return np.array(sims)


def normalize_scores(s, method='rank'):
    if method == 'minmax':
        mn, mx = s.min(), s.max()
        return (s - mn) / (mx - mn + 1e-8)
    if method == 'z':
        m, sd = s.mean(), s.std() + 1e-8
        z = (s - m) / sd
        return 1.0 / (1.0 + np.exp(-z))  # в (0,1)
    if method == 'rank':
        ranks = s.argsort().argsort().astype(float)
        return ranks / (len(s) - 1 + 1e-8)
    return s


# Кэш, чтобы не пересчитывать для одного и того же query
_brep_cache = {}
_dino_cache = {}

def get_brep_scores(query_idx, dataset):
    if query_idx not in _brep_cache:
        _brep_cache[query_idx] = brep_scores_all_for_query(query_idx, dataset)
    return _brep_cache[query_idx]

def get_dino_scores(query_idx, dataset):
    if query_idx not in _dino_cache:
        _dino_cache[query_idx] = dino_scores_all_for_query(query_idx, dataset)
    return _dino_cache[query_idx]


def fuse_and_rank(query_idx, dataset, alpha=0.5, norm='rank', exclude_self=True):
    """
    Возвращает:
      - order: индексы кандидатов по убыванию скоров
      - fused: массив фьюжн-скоров
      - sb, sd: нормированные скоры отдельных моделей
      - per_item_face_mins: для теплокарты BRep по top-1 и т.д.
    """
    s_brep_raw, per_item_face_mins = get_brep_scores(query_idx, dataset)
    s_dino_raw = get_dino_scores(query_idx, dataset)

    sb = normalize_scores(s_brep_raw, norm)
    sd = normalize_scores(s_dino_raw, norm)

    fused = alpha * sb + (1 - alpha) * sd

    if exclude_self:
        fused[query_idx] = -np.inf

    order = np.argsort(-fused)
    return order, fused, sb, sd, per_item_face_mins

In [22]:
query_idx = 56
order, fused, sb, sd, per_item_face_mins = fuse_and_rank(query_idx, dataset, alpha=0.8, norm='rank', exclude_self=False)

topk = 20
for i, idx in enumerate(order[:topk]):
    mid = dataset.data[idx]["model_id"]
    print(f"{i}. fused={fused[idx]:.3f} brep={sb[idx]:.3f} dino={sd[idx]:.3f} | {mid}")

# Визуализация топ-1
top1_idx = order[2]
top1_item = dataset[top1_idx]
print(f"Top-1 model ID: {top1_item['model_id']}")

viewer_draw = JupyterSegmentationViewer(top1_item["step_path"])
heat = per_item_face_mins[top1_idx]  # длина = числу граней топ-1 объекта
viewer_draw.display_faces_with_heatmap(heat, [float(heat.min()), float(heat.max())])

0. fused=1.000 brep=1.000 dino=1.000 | Зацеп трубки направляющий 10
1. fused=0.923 brep=0.906 dino=0.992 | Зацеп трубки направляющий 9
2. fused=0.898 brep=0.930 dino=0.773 | Камера газовая 9
3. fused=0.847 brep=0.875 dino=0.734 | Защелка 8
4. fused=0.844 brep=0.867 dino=0.750 | Камера газовая 10
5. fused=0.841 brep=0.961 dino=0.359 | Колодка прицельная 2
6. fused=0.830 brep=0.984 dino=0.211 | Камера газовая 2
7. fused=0.822 brep=0.953 dino=0.297 | Колодка прицельная 3
8. fused=0.820 brep=0.977 dino=0.195 | Камера газовая
9. fused=0.811 brep=0.945 dino=0.273 | Колодка прицельная 1
10. fused=0.808 brep=0.992 dino=0.070 | Камера газовая 3
11. fused=0.806 brep=0.891 dino=0.469 | Колодка прицельная 4
12. fused=0.802 brep=0.836 dino=0.664 | Защелка
13. fused=0.797 brep=0.844 dino=0.609 | Защелка 9
14. fused=0.795 brep=0.820 dino=0.695 | Защелка 7
15. fused=0.794 brep=0.828 dino=0.656 | Защелка 1
16. fused=0.794 brep=0.898 dino=0.375 | Колодка прицельная 5
17. fused=0.791 brep=0.937 dino=0.20

HBox(children=(VBox(children=(HBox(children=(Checkbox(value=True, description='Axes', layout=Layout(height='au…

TraitError: The 'rotation' trait of a GridHelper instance contains an Enum of an Euler which expected any of ['XYZ', 'YZX', 'ZXY', 'XZY', 'YXZ', 'ZYX'], not the str 'xyz'.

In [167]:
dataset[0]['brepnet_features'].shape

(7, 17)

In [159]:
import numpy as np
from sklearn.cluster import KMeans

# -------- Кодбук и гистограммы (глобальная форма) --------
def sample_brep_faces(dataset, per_solid=400, seed=42):
    rng = np.random.default_rng(seed)
    samples = []
    for i in range(len(dataset)):
        X = dataset[i]['brepnet_features']  # (Ni, D=17)
        if len(X) > per_solid:
            idx = rng.choice(len(X), size=per_solid, replace=False)
            X = X[idx]
        samples.append(X)
    return np.vstack(samples)

def train_codebook(dataset, n_clusters=64, per_solid=400, seed=42):
    X = sample_brep_faces(dataset, per_solid=per_solid, seed=seed)
    km = KMeans(n_clusters=n_clusters, random_state=seed, n_init='auto')
    km.fit(X)
    return km

def brep_histogram_for_idx(dataset, idx, kmeans, area=None, l2_normalize=True):
    X = dataset[idx]['brepnet_features']
    labels = kmeans.predict(X)
    K = kmeans.n_clusters
    h = np.zeros(K, dtype=np.float32)
    if area is None:
        w = np.ones(len(labels), dtype=np.float32)
    else:
        w = np.asarray(area, dtype=np.float32)
        if w.shape[0] != len(labels):
            w = np.ones(len(labels), dtype=np.float32)
    np.add.at(h, labels, w)
    h = h / (h.sum() + 1e-8)
    if l2_normalize:
        h = h / (np.linalg.norm(h) + 1e-8)
    return h

def precompute_histograms(dataset, kmeans):
    H = [brep_histogram_for_idx(dataset, i, kmeans, area=None, l2_normalize=True)
         for i in range(len(dataset))]
    return np.stack(H, axis=0)  # (N, K)

def coarse_rank_by_hist(query_idx, H, topM=200):
    q = H[query_idx]
    sims = H @ q  # L2-нормированные гистограммы → косинус
    order = np.argsort(-sims)[:topM]
    return order, sims

# -------- Точный скор (симметричный soft-Chamfer по граням) --------
def l2_normalize_rows(X):
    return X / (np.linalg.norm(X, axis=1, keepdims=True) + 1e-8)

def pairwise_dist(A, B, metric='cosine'):
    if metric == 'cosine':
        A_ = l2_normalize_rows(A)
        B_ = l2_normalize_rows(B)
        S = A_ @ B_.T
        return 1.0 - S
    # euclidean
    aa = (A*A).sum(1, keepdims=True)
    bb = (B*B).sum(1, keepdims=True).T
    d2 = aa + bb - 2*A.dot(B.T)
    d2 = np.maximum(d2, 0.0)
    return np.sqrt(d2 + 1e-8)

def softmin_values(D, axis=1, tau=0.5, top_p=0.8):
    D = np.asarray(D, dtype=np.float32)
    # softmin
    if tau is None or tau <= 0:
        v = D.min(axis=axis)
    else:
        exps = np.exp(-D / max(tau, 1e-6))
        w = exps / (exps.sum(axis=axis, keepdims=True) + 1e-8)
        v = (w * D).sum(axis=axis)
    # усечённое усреднение лучших p
    if top_p is not None and 0 < top_p < 1:
        if axis == 1:
            m = D.shape[1]
            k = max(1, int(np.ceil(m * top_p)))
            vals = []
            for i in range(D.shape[0]):
                row = D[i]
                idx = np.argpartition(row, k-1)[:k]
                vals.append(row[idx].mean())
            v = np.minimum(v, np.array(vals, dtype=np.float32))
        else:
            n = D.shape[0]
            k = max(1, int(np.ceil(n * top_p)))
            vals = []
            for j in range(D.shape[1]):
                col = D[:, j]
                idx = np.argpartition(col, k-1)[:k]
                vals.append(col[idx].mean())
            v = np.minimum(v, np.array(vals, dtype=np.float32))
    return v

def symmetric_soft_chamfer(q_faces, c_faces, metric='cosine', tau=0.4, top_p=0.4):
    D = pairwise_dist(q_faces, c_faces, metric=metric)
    q_to_c = softmin_values(D, axis=1, tau=tau, top_p=top_p).mean()
    c_to_q = softmin_values(D, axis=0, tau=tau, top_p=top_p).mean()
    dist = 0.5 * (q_to_c + c_to_q)
    sim = 1.0 / (1.0 + dist)
    return sim, dist

def rerank_fine_brep(query_idx, dataset, coarse_order, metric='cosine', tau=0.4, top_p=0.4):
    q_faces = dataset[query_idx]['brepnet_features']
    scores = []
    for idx in coarse_order:
        c_faces = dataset[idx]['brepnet_features']
        sim, _ = symmetric_soft_chamfer(q_faces, c_faces, metric=metric, tau=tau, top_p=top_p)
        scores.append(sim)
    scores = np.array(scores, dtype=np.float32)
    reranked = np.array(coarse_order)[np.argsort(-scores)]
    return reranked, scores

def idf_weights(H):
    N, K = H.shape
    df = (H > 0).sum(axis=0)
    idf = np.log((N + 1) / (df + 1)) + 1.0
    return idf.astype(np.float32)

def tfidf_normalize(H, idf=None, l2=True):
    if idf is None:
        idf = idf_weights(H)
    W = H * idf[None, :]
    if l2:
        W = W / (np.linalg.norm(W, axis=1, keepdims=True) + 1e-8)
    return W, idf

def coarse_rank_by_hist(query_idx, H, topM=200, use_tfidf=True, idf=None):
    if use_tfidf:
        Hn, idf = tfidf_normalize(H, idf)
    else:
        Hn, idf = H, None
    q = Hn[query_idx]
    sims = Hn @ q  # косинус с TF‑IDF
    order = np.argsort(-sims)[:topM]
    return order, sims, idf

# -------- Shape-first переупорядочивание с гарантийным минимумом on-shape --------
def shape_first_reorder_strict(query_idx, dataset, H, sims_hist, coarse_order, fine_scores,
                               T=4, shape_thr=None, shape_thr_q=0.80, g_thr=0.60,
                               min_onshape=None,  # None → не добирать насильно
                               lambda_size=0.0,   # можно включить позже
                               scale_on=1.0, scale_off=0.10):
    def top_shape_clusters(hq, T=3):
        T = max(1, int(T))
        return np.argsort(-hq)[:T]

    def shape_score(h_cand, topc):
        return float(h_cand[topc].sum())

    def bbox_diag_from_item(raw_item):
        try:
            if 'bbox_diag' in raw_item:
                val = float(raw_item['bbox_diag'])
                if val > 0:
                    return val
        except Exception:
            pass
        return None

    def size_sim(size_q, size_c, lam=0.25):
        if not size_q or not size_c or size_q <= 0 or size_c <= 0:
            return 1.0
        r = abs(np.log((size_c + 1e-8) / (size_q + 1e-8)))
        return float(np.exp(-lam * r))

    hq = H[query_idx]
    topc = top_shape_clusters(hq, T=T)
    size_q = bbox_diag_from_item(dataset.data[query_idx])

    # shape и global для кандидатов
    sc_vals = {idx: shape_score(H[idx], topc) for idx in coarse_order}
    g_vals = {idx: float(sims_hist[idx]) for idx in coarse_order}

    # автопорог по квантили среди кандидатов, прошедших global g_thr
    pool = [sc_vals[i] for i in coarse_order if g_vals[i] >= g_thr]
    if not pool:
        # если никто не прошёл g_thr, ослабим g_thr до медианы
        g_thr_dyn = float(np.median([g_vals[i] for i in coarse_order]))
        pool = [sc_vals[i] for i in coarse_order if g_vals[i] >= g_thr_dyn]
        g_thr_use = g_thr_dyn
    else:
        g_thr_use = g_thr
    if shape_thr is None:
        q = np.clip(shape_thr_q, 0.0, 1.0)
        shape_thr = float(np.quantile(np.array(pool, dtype=np.float32), q))

    # on-shape: оба условия — global >= g_thr_use и shape_score >= shape_thr
    on_shape = [idx for idx in coarse_order if (g_vals[idx] >= g_thr_use and sc_vals[idx] >= shape_thr)]

    # НИКОГО не добираем насильно вне порога (min_onshape=None)
    if isinstance(min_onshape, int) and len(on_shape) < min_onshape:
        # можно мягко опустить shape_thr, но НЕ g_thr_use
        needed = min_onshape - len(on_shape)
        cand = [j for j in coarse_order if (g_vals[j] >= g_thr_use and j not in on_shape)]
        cand.sort(key=lambda j: sc_vals[j], reverse=True)
        on_shape += cand[:max(0, needed)]
    on_shape_set = set(on_shape)

    rows = []
    for idx, s in zip(coarse_order, fine_scores):
        sz = 1.0
        if lambda_size and lambda_size > 0:
            size_c = bbox_diag_from_item(dataset.data[idx])
            sz = size_sim(size_q, size_c, lam=lambda_size)
        final = (scale_on * s * sz) if idx in on_shape_set else (scale_off * s * sz)
        rows.append((idx, final, s, sc_vals[idx], g_vals[idx], idx in on_shape_set))

    # Сначала все on-shape, внутри — по final; затем off-shape по final
    rows.sort(key=lambda x: (x[5], x[1]), reverse=True)

    order = np.array([r[0] for r in rows], dtype=int)
    final_scores = np.array([r[1] for r in rows], dtype=np.float32)
    debug = [{
        "idx": int(r[0]),
        "final": float(r[1]),
        "fine": float(r[2]),
        "shape_score": float(r[3]),
        "global_sim": float(r[4]),
        "on_shape": bool(r[5]),
    } for r in rows]
    return order, final_scores, debug

In [169]:
# ==== Запуск shape-first → detail (BRep-only) ====
query_idx = 37  # выберите нужный индекс

# 1) Кодбук + гистограммы
kmeans = train_codebook(dataset, n_clusters=128, per_solid=200, seed=42)
H = precompute_histograms(dataset, kmeans)

# 2) Грубый отбор по форме
topM = 300
coarse, sims_hist, idf = coarse_rank_by_hist(query_idx, H, topM=topM, use_tfidf=True)

# 3) Точный скор по граней
fine_order, fine_scores = rerank_fine_brep(query_idx, dataset, coarse, metric='cosine', tau=0.6, top_p=0.75)
fine_map = {idx: fs for idx, fs in zip(coarse, fine_scores)}

# 4) Shape-first переупорядочивание (гарантируем минимум on-shape)
shape_order, final_scores, debug = shape_first_reorder_strict(
    query_idx, dataset, H, sims_hist, coarse, fine_scores,
    T=6, shape_thr=None, shape_thr_q=0.80, g_thr=0.60,
    min_onshape=20, lambda_size=0.0,  # размер выключен
    scale_on=1.0, scale_off=0.0
)


print(f"Query model: {dataset.data[query_idx]['model_id']}")
for i, idx in enumerate(shape_order[:20]):
    mid = dataset.data[idx]['model_id']
    tag = " [QUERY]" if idx == query_idx else ""
    g = float(sims_hist[idx])
    f = float(fine_map.get(idx, 0.0))
    print(f"{i}. BRep-only | global={g:.3f} fine={f:.3f} | {mid}{tag}")

top20_indices = np.array(shape_order[:20], dtype=int)
top20_debug = debug[:20]
print("Saved: top20_indices, top20_debug.")

Query model: 44. Extractor Pin-04
0. BRep-only | global=1.000 fine=0.702 | 44. Extractor Pin-04 [QUERY]
1. BRep-only | global=0.622 fine=0.688 | 44. Extractor Pin
2. BRep-only | global=0.622 fine=0.687 | 44. Extractor Pin-01
3. BRep-only | global=0.644 fine=0.677 | 44. Extractor Pin-07
4. BRep-only | global=0.644 fine=0.676 | 44. Extractor Pin-06
5. BRep-only | global=0.547 fine=0.641 | 44. Extractor Pin-05
6. BRep-only | global=0.490 fine=0.657 | 44. Extractor Pin-08
7. BRep-only | global=0.451 fine=0.655 | 44. Extractor Pin-10
8. BRep-only | global=0.402 fine=0.650 | 44. Extractor Pin-09
9. BRep-only | global=0.356 fine=0.608 | 44. Extractor Pin-03
10. BRep-only | global=0.356 fine=0.609 | 44. Extractor Pin-02
11. BRep-only | global=0.307 fine=0.612 | Камера газовая
12. BRep-only | global=0.292 fine=0.612 | Камера газовая 1
13. BRep-only | global=0.286 fine=0.619 | Камера газовая 2
14. BRep-only | global=0.217 fine=0.623 | Камера газовая 9
15. BRep-only | global=0.212 fine=0.572 | За

In [158]:
from pathlib import Path
from src.visualization.jupyter_segmentation_viewer import JupyterSegmentationViewer

def compute_brep_heat_cache(query_idx, dataset, indices=None):

    sim, per_item_face_mins = brep_scores_all_for_query(query_idx, dataset)  
    if indices is None:
        indices = range(len(dataset))
    mins, maxs = [], []
    for i in indices:
        h = per_item_face_mins[i]
        mins.append(h.min())
        maxs.append(h.max())
    interval = [float(np.min(mins)), float(np.max(maxs))]
    return per_item_face_mins, interval

assert 'top20_indices' in globals(), "Сначала выполните ранжирование и сохраните top20_indices."
pick = 4  
idx = int(top20_indices[pick])

# Общий кэш теплокарт и единый интервал (как раньше из top_k_brepnet_euclidean_similarities)
per_item_face_mins_cache, heat_interval = compute_brep_heat_cache(query_idx, dataset, indices=top20_indices)

print(f"Query: [{query_idx}] {dataset[query_idx]['model_id']}")
print(f"Selected rank={pick}: [{idx}] {dataset[idx]['model_id']}")

# Визуализация запроса
viewer_query = JupyterSegmentationViewer(Path(dataset[query_idx]['step_path']))
viewer_query.view_solid()

# Теплокарта кандидата: min Euclidean distance от любой query-грани к каждой грани кандидата
heat = per_item_face_mins_cache[idx]  # np.ndarray (N_faces_cand,)
viewer_cand = JupyterSegmentationViewer(Path(dataset[idx]['step_path']))
viewer_cand.display_faces_with_heatmap(heat, heat_interval)

Query: [37] 44. Extractor Pin-04
Selected rank=4: [39] 44. Extractor Pin-06


HBox(children=(VBox(children=(HBox(children=(Checkbox(value=True, description='Axes', layout=Layout(height='au…

HBox(children=(VBox(children=(HBox(children=(Checkbox(value=True, description='Axes', layout=Layout(height='au…

TraitError: The 'rotation' trait of a GridHelper instance contains an Enum of an Euler which expected any of ['XYZ', 'YZX', 'ZXY', 'XZY', 'YXZ', 'ZYX'], not the str 'xyz'.

TraitError: The 'rotation' trait of a GridHelper instance contains an Enum of an Euler which expected any of ['XYZ', 'YZX', 'ZXY', 'XZY', 'YXZ', 'ZYX'], not the str 'xyz'.