In [1]:
%load_ext autoreload
%autoreload 2
import notebook_setup
from src.config import INTERIM_DATA_DIR, PROCESSED_DATA_DIR, REPORTS_DIR, EXTERNAL_DATA_DIR, MODELS_DIR
import os
os.environ["KMP_DUPLICATE_LIB_OK"] = "TRUE"
os.environ["OMP_NUM_THREADS"] = "4"
os.environ["MKL_NUM_THREADS"] = "4"

BREPNET_NPZ_DIR = INTERIM_DATA_DIR / "features" / "brepnet"

[32m2025-09-18 19:13:31.879[0m | [1mINFO    [0m | [36msrc.config[0m:[36m<module>[0m:[36m11[0m - [1mPROJ_ROOT path is: D:\workspace\projects\freelance\Fusion3DNet[0m


Project root added to path: d:\workspace\projects\freelance\Fusion3DNet


In [2]:
import numpy as np, pathlib as p
f = next(p.Path(INTERIM_DATA_DIR/'features/brepnet').glob('*.npz'))  # подставьте конкретный файл, на котором падает
with np.load(f) as z:
    print(f, z.files)
    for k in z.files:
        print(k, z[k].shape, z[k].dtype)

D:\workspace\projects\freelance\Fusion3DNet\data\interim\features\brepnet\42. Ejector-01.prt.npz ['face_features', 'face_point_grids', 'edge_features', 'coedge_point_grids', 'coedge_features', 'coedge_lcs', 'coedge_scale_factors', 'coedge_reverse_flags', 'next', 'mate', 'face', 'edge']
face_features (9, 7) float64
face_point_grids (9, 7, 10, 10) float64
edge_features (16, 10) float64
coedge_point_grids (31, 12, 10) float64
coedge_features (31, 1) float64
coedge_lcs (31, 4, 4) float64
coedge_scale_factors (31,) float64
coedge_reverse_flags (31,) float64
next (31,) uint32
mate (31,) uint32
face (31,) uint32
edge (31,) uint32


In [3]:
from pathlib import Path
import torch
from torch.utils.data import DataLoader
from typing import List


# Импортируем классы из вашего файла
from src.modeling.vit_brep_ensemble.data_module.dataset import (
    CADItem,
    FusionCADDataset,
    build_brep_standardizer,
    save_stats,
    load_stats,
)

def get_clean_id(filename: str):
    name = Path(filename).stem
    if name.endswith('.prt'):
        name = name[:-4]
    return name

brep_features_dir = Path(INTERIM_DATA_DIR / "features/brepnet")
dino_features_dir = Path(INTERIM_DATA_DIR / "features/dino")
stats_path = Path(INTERIM_DATA_DIR / "features/pooled_brep.json")

# 2. Собираем объекты CADItem, находя общие файлы
brep_files = {p.stem: p for p in brep_features_dir.glob("*.npz")}
dino_files = {p.stem: p for p in dino_features_dir.glob("*.npz")}

# очистим id от лишних суффиксов
brep_files = {get_clean_id(k): v for k, v in brep_files.items()}

common_ids = sorted(brep_files.keys() & dino_files.keys())

print(f"Всего файлов BREP: {len(brep_files)}")
print(f"Всего файлов DINO: {len(dino_files)}")

all_items: List[CADItem] = []
for item_id in common_ids:
    item = CADItem(
        item_id=item_id,
        brep_npz_path=brep_files[item_id],
        dino_path=dino_files[item_id],
    )
    all_items.append(item)

print(f"Найдено {len(all_items)} общих элементов.")

# 3. Разделяем на обучающую и валидационную выборки (например, 80/20)
train_size = int(0.8 * len(all_items))
train_items = all_items[:train_size]
val_items = all_items[train_size:]

print("Вычисление статистики на основе тренировочных данных...")
standardizer = build_brep_standardizer(train_items)
print(f"Сохранение статистики в {stats_path}...")
save_stats(standardizer, stats_path)

if stats_path.exists():
    print(f"Загрузка статистики из {stats_path}")
    standardizer = load_stats(stats_path)
else:
    print("Создание и сохранение статистики...")
    standardizer = build_brep_standardizer(train_items)
    save_stats(standardizer, stats_path)

# 5. Создаем экземпляры датасета
train_dataset = FusionCADDataset(
    items=train_items,
    standardizer=standardizer
)
val_dataset = FusionCADDataset(
    items=val_items,
    standardizer=standardizer
)

print(f"Размер обучающего датасета: {len(train_dataset)}")
print(f"Размер валидационного датасета: {len(val_dataset)}")

def custom_collate_fn(batch):
    """
    Собирает батч, оставляя тензоры переменной длины (face_matrix) в виде списка.
    """
    views_list = [item['views'] for item in batch]
    face_matrix_list = [item['face_matrix'] for item in batch]
    item_id_list = [item['item_id'] for item in batch]

    
    views_batch = torch.stack(views_list, dim=0)

    return {
        'views': views_batch,
        'face_matrix': face_matrix_list,
        'item_id': item_id_list
    }

# 6. Используем DataLoader для итерации по данным
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True, collate_fn=custom_collate_fn)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False, collate_fn=custom_collate_fn)

print(f'Shape dino features: {train_dataset[1]["views"].shape}')

batch = next(iter(train_loader))
print("Ключи в батче:", list(batch.keys()))
print("Размер 'views':", batch["views"].shape)
# 'face_matrix' теперь - это список, выведем размер первого элемента
print("'face_matrix' - это список из", len(batch["face_matrix"]), "тензоров.")
print("Размер первого 'face_matrix' в батче:", batch["face_matrix"][0].shape)
print("ID элементов:", batch["item_id"])

Всего файлов BREP: 129
Всего файлов DINO: 129
Найдено 128 общих элементов.
Вычисление статистики на основе тренировочных данных...


Вычисление статистики: 100%|██████████| 102/102 [00:01<00:00, 51.42it/s]


Сохранение статистики в D:\workspace\projects\freelance\Fusion3DNet\data\interim\features\pooled_brep.json...
Загрузка статистики из D:\workspace\projects\freelance\Fusion3DNet\data\interim\features\pooled_brep.json
Размер обучающего датасета: 102
Размер валидационного датасета: 26
Shape dino features: torch.Size([8, 384])
Ключи в батче: ['views', 'face_matrix', 'item_id']
Размер 'views': torch.Size([102, 8, 384])
'face_matrix' - это список из 102 тензоров.
Размер первого 'face_matrix' в батче: torch.Size([8, 7])
ID элементов: ['42. Ejector-09', '43. Extractor-06', '42. Ejector-06', 'Защелка 3', '43. Extractor-03', 'Защелка АК', 'Зацеп трубки направляющий 2', '42. Silencer Fix-07', '42. Silencer Fix-01', 'Камера газовая 3', 'Зацеп трубки направляющий 8', '44. Extractor Pin-10', '42. Silencer Fix-09', '43. Extractor-01', 'Затвор', '44. Extractor Pin-06', 'Защелка 4', 'Зацеп трубки направляющий 4', 'Камера газовая', '44. Extractor Pin-05', '44. Extractor Pin-01', '43. Extractor-10', 'Кам

In [4]:
from src.modeling.vit_brep_ensemble.models.ensemble import ContrastiveFusionModel
from src.modeling.vit_brep_ensemble.data_module.data_loader import FusionDataModule 

data_module = FusionDataModule(
    brep_features_dir=brep_features_dir,
    dino_features_dir=dino_features_dir,
    stats_path=stats_path,
    batch_size=32,    
)
data_module.setup()
batch = next(iter(data_module.train_dataloader()))
model = ContrastiveFusionModel(embed_dim=256, learning_rate=1e-4)

with torch.no_grad():
    embeddings = model(batch)  
    dino_embed = embeddings["dino_embed"]
    brep_embed = embeddings["brep_embed"]
    print("dino:", dino_embed.shape, "brep:", brep_embed.shape)

  warn(


Загрузка статистики из D:\workspace\projects\freelance\Fusion3DNet\data\interim\features\pooled_brep.json
dino: torch.Size([32, 256]) brep: torch.Size([32, 256])


In [5]:
from src.modeling.vit_brep_ensemble import train

train.main()

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
d:\dev\.miniconda\envs\brepnet\Lib\site-packages\pytorch_lightning\callbacks\model_checkpoint.py:701: Checkpoint directory D:\workspace\projects\freelance\Fusion3DNet\models\vit_brep_ensemble exists and is not empty.

  | Name         | Type        | Params | Mode 
-----------------------------------------------------
0 | dino_encoder | DINOEncoder | 362 K  | train
1 | brep_encoder | BRepEncoder | 3.2 M  | train
-----------------------------------------------------
3.5 M     Trainable params
0         Non-trainable params
3.5 M     Total params
14.096    Total estimated model params size (MB)
50        Modules in train mode
0         Modules in eval mode


Загрузка статистики из D:\workspace\projects\freelance\Fusion3DNet\data\interim\features\pooled_brep.json


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

d:\dev\.miniconda\envs\brepnet\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
d:\dev\.miniconda\envs\brepnet\Lib\site-packages\pytorch_lightning\utilities\data.py:79: Trying to infer the `batch_size` from an ambiguous collection. The batch size we found is 26. To avoid any miscalculations, use `self.log(..., batch_size=batch_size)`.
d:\dev\.miniconda\envs\brepnet\Lib\site-packages\pytorch_lightning\trainer\connectors\data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=7` in the `DataLoader` to improve performance.
d:\dev\.miniconda\envs\brepnet\Lib\site-packages\pytorch_lightning\loops\fit_loop.py:310: The number of training batches (4) 

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Detected KeyboardInterrupt, attempting graceful shutdown ...


SystemExit: 1

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)
