In [32]:
import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [33]:
from src.models.modules import *
from src.models.loss import L1_epsilon_lambda
from dataclasses import dataclass
import torch

torch.manual_seed(42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

@dataclass
class SDFTransformerConfig:
    dim_context: int = 4
    dim_input: int = 3
    num_outputs: int = 1
    dim_output: int = 1
    delta: float = 0.1
    dim_hidden: int = 128
    num_ctx_seeds: int = 32
    num_x_seeds: int = 32
    num_heads: int = 4

class SDFTransformer(nn.Module):
    def __init__(self, config: SDFTransformerConfig):
        super(SDFTransformer, self).__init__()
        self.config = config
        self.epsilon = None
        self.lambdaa = None
        self.proj_ctx = nn.Linear(config.dim_context, config.dim_hidden)
        self.pool_ctx = PMA(config.dim_hidden, config.num_heads, config.num_ctx_seeds)
        self.pool = nn.AdaptiveAvgPool1d(config.num_outputs)
        mlp_hidden = config.dim_hidden + config.dim_input
        self.mlp = nn.Sequential(
            nn.Linear(mlp_hidden, mlp_hidden),
            nn.SiLU(),
            nn.Linear(mlp_hidden, mlp_hidden),
            nn.SiLU(),
            nn.Linear(mlp_hidden, config.dim_output),
            nn.Tanh()
        )
        self.silu = nn.SiLU()

    def forward(self, context: torch.Tensor, x: torch.Tensor, labels: torch.Tensor = None):
        ctx = self.proj_ctx(context)                        # [B, CTX, H]
        ctx = self.silu(ctx)
        ctx = self.pool_ctx(ctx)                            # [B, Y, H]
        ctx = self.silu(ctx)
        ctx = ctx.permute(0, 2, 1)                          # [B, H, Y]
        ctx = self.pool(ctx)                                # [B, H, O]
        ctx = ctx.permute(0, 2, 1)                          # [B, O, H]
        x = x.expand(-1, self.config.num_outputs, -1)       # [B, O, C]
        x = torch.concat((x, ctx), dim=-1)                  # [B, O, H + C]
        y = self.mlp(x)                                     # [B, O, O]

        loss = None
        if labels is not None:
            loss = L1_epsilon_lambda(y, labels, self.epsilon, self.lambdaa, self.config.delta)
        return {'loss': loss, 'logits': y}

config = SDFTransformerConfig()
model = SDFTransformer(config).to(device)
print(device)

cuda


In [34]:
from src.models.dataset import LazySampleDataset
from pathlib import Path

project_dir = Path(os.path.abspath('')).resolve().parent
procesed_dir = project_dir / 'data' / 'processed'

train_files = list(procesed_dir.rglob('*_train.hdf5'))
val_files = list(procesed_dir.rglob('*_val.hdf5'))

train_dataset = LazySampleDataset(train_files)
val_dataset = LazySampleDataset(val_files)

In [35]:
from src.data.load_data import get_results_dir
from datetime import datetime

notebook_name = '2025_01_17_linear_dec'
current_date = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
folder_name = f"{notebook_name}-{current_date}"
result_dir = get_results_dir() / folder_name
result_dir.mkdir(parents=True, exist_ok=True)
print(result_dir)

C:\_prog\vm_shared\attention-sdf\results\2025_01_17_linear_dec-2025-01-17-21-46-23


In [36]:
from transformers import Trainer, TrainingArguments

batch_size = 64
training_args = TrainingArguments(
    output_dir=result_dir / "results",
    eval_strategy="epoch",
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=1,
    logging_dir=result_dir / "logs",
    logging_steps=10,
    weight_decay=0.01,
    save_total_limit=3,
    seed=42
)

trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

curriculum_schedule = [
    {"epochs": 2, "epsilon": 0.02,   "lambda": 0.0,  'learning_rate': 5e-5, 'resolution': 100},
    {"epochs": 2, "epsilon": 0.0075, "lambda": 0.15, 'learning_rate': 4e-5, 'resolution': 100},
    {"epochs": 2, "epsilon": 0.004,  "lambda": 0.3,  'learning_rate': 3e-5, 'resolution': 100},
    {"epochs": 2, "epsilon": 0.002,  "lambda": 0.4,  'learning_rate': 2e-5, 'resolution': 100},
    {"epochs": 2, "epsilon": 0.0,    "lambda": 0.5,  'learning_rate': 1e-5, 'resolution': 256}
]

In [37]:
from src.visualization.generate_mesh import generate_meshes
from src.data.load_data import get_data_dir

obj_dir = get_data_dir() / 'intermediate'
format_string_base = "{name}-" + current_date + "-curriculum-"

for i, stage in enumerate(curriculum_schedule):
    model.epsilon = stage['epsilon']
    model.lambdaa = stage['lambda']
    trainer.args.num_train_epochs = stage['epochs']
    trainer.args.learning_rate = stage['learning_rate']
    trainer.train()
    format_string = format_string_base + str(i) + ".obj"
    generate_meshes(model, obj_dir, result_dir, format_string, device,
        batch_size, resolution=stage['resolution'], context_size=256)
train_dataset.close()
val_dataset.close()

  0%|          | 0/23438 [00:00<?, ?it/s]

{'loss': 0.0036, 'grad_norm': 0.2062477320432663, 'learning_rate': 4.997866712176807e-05, 'epoch': 0.0}
{'loss': 0.0036, 'grad_norm': 0.06915193796157837, 'learning_rate': 4.995733424353614e-05, 'epoch': 0.0}
{'loss': 0.0031, 'grad_norm': 0.06873846054077148, 'learning_rate': 4.993600136530421e-05, 'epoch': 0.0}
{'loss': 0.0032, 'grad_norm': 0.13737750053405762, 'learning_rate': 4.991466848707228e-05, 'epoch': 0.0}
{'loss': 0.0036, 'grad_norm': 0.018010739237070084, 'learning_rate': 4.9893335608840345e-05, 'epoch': 0.0}
{'loss': 0.0042, 'grad_norm': 0.03516695275902748, 'learning_rate': 4.9872002730608416e-05, 'epoch': 0.01}
{'loss': 0.0025, 'grad_norm': 0.03435738757252693, 'learning_rate': 4.985066985237649e-05, 'epoch': 0.01}
{'loss': 0.0041, 'grad_norm': 0.03460470214486122, 'learning_rate': 4.982933697414455e-05, 'epoch': 0.01}
{'loss': 0.0028, 'grad_norm': 0.06885584443807602, 'learning_rate': 4.980800409591262e-05, 'epoch': 0.01}
{'loss': 0.0042, 'grad_norm': 0.03473550081253052

  0%|          | 0/2344 [00:00<?, ?it/s]

{'eval_loss': 0.0032849684357643127, 'eval_runtime': 117.5632, 'eval_samples_per_second': 1275.91, 'eval_steps_per_second': 19.938, 'epoch': 1.0}
{'loss': 0.0032, 'grad_norm': 0.01746467314660549, 'learning_rate': 2.499786671217681e-05, 'epoch': 1.0}
{'loss': 0.0028, 'grad_norm': 0.06785739958286285, 'learning_rate': 2.4976533833944876e-05, 'epoch': 1.0}
{'loss': 0.003, 'grad_norm': 0.01740686595439911, 'learning_rate': 2.4955200955712947e-05, 'epoch': 1.0}
{'loss': 0.0031, 'grad_norm': 0.050982922315597534, 'learning_rate': 2.4933868077481015e-05, 'epoch': 1.0}
{'loss': 0.0036, 'grad_norm': 0.1358174830675125, 'learning_rate': 2.4912535199249083e-05, 'epoch': 1.0}
{'loss': 0.0029, 'grad_norm': 0.06801114231348038, 'learning_rate': 2.4891202321017154e-05, 'epoch': 1.0}
{'loss': 0.0033, 'grad_norm': 0.004385672509670258, 'learning_rate': 2.4869869442785222e-05, 'epoch': 1.01}
{'loss': 0.0028, 'grad_norm': 0.0849919393658638, 'learning_rate': 2.484853656455329e-05, 'epoch': 1.01}
{'loss'

  0%|          | 0/2344 [00:00<?, ?it/s]

{'eval_loss': 0.003273337846621871, 'eval_runtime': 118.3361, 'eval_samples_per_second': 1267.577, 'eval_steps_per_second': 19.808, 'epoch': 2.0}
{'train_runtime': 1849.9546, 'train_samples_per_second': 810.831, 'train_steps_per_second': 12.669, 'train_loss': 0.00326954601203802, 'epoch': 2.0}


Processing models:   0%|          | 0/3 [00:00<?, ?it/s]

Processing batches:   0%|          | 0/15625 [00:00<?, ?it/s]

ValueError: Surface level must be within volume data range.

In [8]:
import json
from dataclasses import asdict

current_date = datetime.now().strftime("%Y-%m-%d")
model_name = f"{current_date}-model"
config_name = f"{current_date}-config.json"
trainer.save_model(result_dir / model_name)

with open(result_dir / config_name, 'w') as f:
    json.dump(asdict(config), f)