In [56]:
# arranger.py
# ------------
# Build a PyTorch Dataset & DataLoader for jumper‑unity prediction
# Author: ChatGPT • May‑2025
# -------------------------------------------

import glob
import os
from pathlib import Path
from typing import List, Tuple

import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader

# ------------------------------------------------------------------
# 0.  Set the root folders (EDIT if your paths are different)
# ------------------------------------------------------------------
ROOT = Path(r"C:\Users\cyang\Desktop\Chi\functions\Jumper_ML")
INPUT_GLOB   = str(ROOT / "inputs_*.txt")

# Unity_elements_{batch}_{model}.txt

# ------------------------------------------------------------------
# 1.  Small helpers
# ------------------------------------------------------------------
def read_inputs_file(path: Path) -> np.ndarray:
    """Return a (N, 32) float array of global inputs *and* the file’s batch‑id."""
    batch_id = int(path.stem.split("_")[1])          # 'inputs_12.txt' -> 12
    status_dir = ROOT / f"Abaqus_files_{batch_id}\\model_status.txt"


    rows = []
    with open(path, "r") as f:
        for line in f:
            if not line.strip():
                continue
            parts = line.split()
            rows.append([float(x) for x in parts[0:32]])   # skip model_name
    return batch_id, np.asarray(rows, dtype=np.float32)


def read_unity_file(path: Path) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
    """
    Return (elem_type_ids [L], unity_long [L], unity_comb [L])
    """
    et, ul, uc = [], [], []
    with open(path, "r") as f:
        for row in f:
            if not row.strip():
                continue
            cols = row.split()
            # Map element type strings to integer codes
            elem_type = 0 if cols[0] == "PIPE31H" else 1  # PIPE31H -> 0, ELBOW31 -> 1
            et.append(elem_type)
            ul.append(float(cols[1]))
            uc.append(float(cols[2]))
        # print(et)
    return (np.asarray(et, dtype=np.int64),
            np.asarray(ul, dtype=np.float32),
            np.asarray(uc, dtype=np.float32))


# ------------------------------------------------------------------
# 2.  Build memory‑light index of every run(model)
# ------------------------------------------------------------------
class RunIndex:
    """Maps an integer idx -> (global_input, elem_type[], unity_targets[])"""

    def __init__(self):
        self.globals:   List[np.ndarray] = []
        self.elemtypes: List[np.ndarray] = []
        self.targets:   List[np.ndarray] = []

        for batch_id in range(1,65):
            in_path = ROOT/f"inputs_{batch_id}.txt"
            batch_id, g_matrix = read_inputs_file(Path(in_path))
            n_models = g_matrix.shape[0]

            for m_idx in range(n_models):
                ELEM_DIR     = ROOT / f"Abaqus_files_{batch_id}\Step2\elements"
                unity_path = ELEM_DIR / f"Unity_elements_{batch_id}_{m_idx+1}.txt"
                if not unity_path.exists():
                    continue

                et, ul, uc = read_unity_file(unity_path)
                tgt = np.stack([ul, uc], axis=1)          # (L, 2)

                self.globals.append(g_matrix[m_idx])
                self.elemtypes.append(et)
                self.targets.append(tgt)

    def __len__(self):
        return len(self.globals)

    def __getitem__(self, idx: int):
        return (self.globals[idx],     # (29,)
                self.elemtypes[idx],   # (L,)
                self.targets[idx])     # (L, 2)


# ------------------------------------------------------------------
# 3.  Convert RunIndex into a torch Dataset
# ------------------------------------------------------------------
class JumperDataset(Dataset):
    def __init__(self, run_index: RunIndex):
        self.ri = run_index

    def __len__(self):
        return len(self.ri)

    def __getitem__(self, idx):
        g, et, tgt = self.ri[idx]
        return (torch.from_numpy(g),
                torch.from_numpy(et),
                torch.from_numpy(tgt),
                torch.tensor(len(et), dtype=torch.long))


# ------------------------------------------------------------------
# 4.  collate_fn  → pads to L_max and builds length tensor
# ------------------------------------------------------------------
def collate_fn(batch):
    g_list, et_list, tgt_list, len_list = zip(*batch)
    L_max = max(l.item() for l in len_list)

    # Pad element types with 0, targets with 0.0
    et_pad = torch.zeros((len(batch), L_max), dtype=torch.long)
    tgt_pad = torch.zeros((len(batch), L_max, 2), dtype=torch.float32)

    for i, (et, tgt) in enumerate(zip(et_list, tgt_list)):
        L = et.shape[0]
        et_pad[i, :L] = et
        tgt_pad[i, :L, :] = tgt

    g_batch   = torch.stack(g_list, dim=0)               # (B, 29)
    lengths   = torch.stack(len_list, dim=0)             # (B,)
    return g_batch, et_pad, tgt_pad, lengths


# ------------------------------------------------------------------
# 5.  Usage example
# ------------------------------------------------------------------
if __name__ == "__main__":
    runs  = RunIndex()
    ds    = JumperDataset(runs)
    dl    = DataLoader(ds,
                       batch_size=16,
                       shuffle=True,
                       num_workers=0,
                       collate_fn=collate_fn)

    # quick sanity check
    for g, et, tgt, lengths in dl:
        print(f"globals  : {g.shape}")        # (B, 29)
        print(f"elem_type: {et.shape}")       # (B, L_max)
        print(f"targets  : {tgt.shape}")      # (B, L_max, 2)
        print(f"lengths  : {lengths}")        # (B,)
        break



globals  : torch.Size([16, 32])
elem_type: torch.Size([16, 508])
targets  : torch.Size([16, 508, 2])
lengths  : tensor([365, 214, 255, 286, 508, 262, 186, 239, 432, 182, 231, 140, 277, 224,
        237, 473])


In [57]:
def find_model_in_runindex(run_index: RunIndex, batch_id: int, model_idx: int):
    """
    Find a specific model in the RunIndex by batch_id and model_idx.
    Returns (idx, global_inputs, elem_types, targets) if found, or (None, None, None, None) if not found.
    """
    # Read the inputs file to get the global inputs for this batch
    try:
        in_path = ROOT/f"inputs_{batch_id}.txt"
        _, g_matrix = read_inputs_file(Path(in_path))
        target_global = g_matrix[model_idx-1]  # model_idx is 1-based in filenames
        
        # Search through RunIndex to find matching global inputs
        for idx in range(len(run_index)):
            global_inputs, elem_types, targets = run_index[idx]
            # Check if this is the model we're looking for by comparing global inputs
            if np.array_equal(global_inputs, target_global):
                return idx, global_inputs, elem_types, targets
        
        # If we get here, the model wasn't found
        print(f"Model with batch_id={batch_id}, model_idx={model_idx} not found in RunIndex")
        return None, None, None, None
    except Exception as e:
        print(f"Error finding model: {e}")
        return None, None, None, None



In [58]:

# Usage example:
if __name__ == "__main__":
    # Original sanity check
    runs = RunIndex()
    ds = JumperDataset(runs)
    dl = DataLoader(ds,
                   batch_size=16,
                   shuffle=True,
                   num_workers=0,
                   collate_fn=collate_fn)

    # Quick sanity check
    for g, et, tgt, lengths in dl:
        print(f"globals  : {g.shape}")        # (B, 29)
        print(f"elem_type: {et.shape}")       # (B, L_max)
        print(f"targets  : {tgt.shape}")      # (B, L_max, 2)
        print(f"lengths  : {lengths}")        # (B,)
        break
    
    # Find a specific model in the RunIndex
    # Change these values to inspect different models
    idx, g, et, tgt = find_model_in_runindex(runs, batch_id=4, model_idx=1)
    
    if idx is not None:
        # Convert to tensors to verify the exact data that would go into the model
        g_tensor = torch.from_numpy(g)
        et_tensor = torch.from_numpy(et)
        tgt_tensor = torch.from_numpy(tgt)
        length_tensor = torch.tensor(len(et), dtype=torch.long)
        
        print("\nAs tensors (what would go into the model):")
        print(f"  Global tensor: {g_tensor}")
        print(f"  Element types tensor: {et_tensor[:10]}...")
        print(f"  Targets tensor: {tgt_tensor[:5]}")
        print(f"  Length: {length_tensor}")

globals  : torch.Size([16, 32])
elem_type: torch.Size([16, 467])
targets  : torch.Size([16, 467, 2])
lengths  : tensor([218, 231, 210, 272, 177, 385, 266, 172, 467, 199, 266, 165, 284, 204,
        340, 361])

As tensors (what would go into the model):
  Global tensor: tensor([ 4.1598e+01,  1.5302e+01,  4.1554e+01,  6.1456e+01,  4.1553e+01,
         1.5302e+01,  5.1720e+01,  5.6464e+01,  1.6450e+02,  5.0576e+03,
         4.7247e+05,  1.1458e-01,  7.1875e-01,  1.9000e+01, -1.2500e-01,
        -1.2500e-01,  1.2500e-01,  1.7450e-02, -8.7300e-03,  0.0000e+00,
         1.2500e-01, -1.2500e-01,  1.2500e-01,  8.7300e-03, -8.7300e-03,
         0.0000e+00, -2.0786e+00, -1.0772e+00, -4.2591e+00,  1.2198e+00,
        -9.2955e-01, -1.4098e+00])
  Element types tensor: tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])...
  Targets tensor: tensor([[0.3895, 0.4063],
        [0.3916, 0.4082],
        [0.3937, 0.4102],
        [0.3957, 0.4121],
        [0.3976, 0.4139]])
  Length: 362


In [59]:
# train_unity.py
# ---------------
# Train a bi‑LSTM to predict (unity_long, unity_comb) per element
# ---------------------------------------------------------------
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE }")
# ----------------------------------------------------------------------
# 1.  Hyper‑parameters – tweak as you like
# ----------------------------------------------------------------------
BATCH_SIZE      = 32
EMBED_DIM       = 8        # element‑type embedding size
HIDDEN_DIM      = 64       # per‑direction LSTM hidden
NUM_LAYERS      = 2
DROPOUT         = 0.2
LR              = 3e-4
EPOCHS          = 20

# ----------------------------------------------------------------------
# 2.  Model definition
# ----------------------------------------------------------------------
class UnityBiLSTM(nn.Module):
    def __init__(self, n_elem_types: int, n_globals: int = 32):
        super().__init__()
        self.elem_embed = nn.Embedding(n_elem_types, EMBED_DIM, padding_idx=0)

        self.lstm = nn.LSTM(
            input_size=EMBED_DIM + n_globals,
            hidden_size=HIDDEN_DIM,
            num_layers=NUM_LAYERS,
            batch_first=True,
            bidirectional=True,
            dropout=DROPOUT if NUM_LAYERS > 1 else 0.0,
        )

        self.head = nn.Sequential(
            nn.Linear(HIDDEN_DIM * 2, 64),
            nn.ReLU(),
            nn.Linear(64, 2)           # outputs: unity_long, unity_comb
        )

    def forward(self, g, et, lengths):
        """
        g       : (B, 32)         global scalars
        et      : (B, L_max)      element‑type IDs (0=PAD)
        lengths : (B,)            real sequence lengths
        """
        B, L = et.size()

        # 1) embed element types
        et_emb = self.elem_embed(et)                  # (B, L, EMBED_DIM)

        # 2) repeat globals along time axis
        g_rep = g.unsqueeze(1).expand(-1, L, -1)      # (B, L, 32)

        # 3) concat & pack
        x = torch.cat([et_emb, g_rep], dim=-1)        # (B, L, D_in)
        packed = nn.utils.rnn.pack_padded_sequence(
            x, lengths.cpu(), batch_first=True, enforce_sorted=False
        )

        # 4) bi‑LSTM
        packed_out, _ = self.lstm(packed)
        h, _ = nn.utils.rnn.pad_packed_sequence(
            packed_out, batch_first=True, total_length=L
        )                                             # (B, L, 2*HIDDEN_DIM)

        # 5) per‑time‑step head
        y_hat = self.head(h)                          # (B, L, 2)
        return y_hat


# ----------------------------------------------------------------------
# 3.  Utilities – masked MSE so PAD steps don’t count
# ----------------------------------------------------------------------
def masked_mse(pred, target, lengths):
    """
    pred, target : (B, L_max, 2)
    lengths      : (B,)
    """
    B, L, _ = pred.shape
    mask = torch.arange(L, device=lengths.device).expand(B, L) < lengths.unsqueeze(1)
    mask = mask.unsqueeze(-1)                        # (B, L, 1)

    diff2 = (pred - target) ** 2 * mask
    mse = diff2.sum() / mask.sum()                  # mean over real steps
    return mse


# ----------------------------------------------------------------------
# 4.  Data
# ----------------------------------------------------------------------
runs = RunIndex()
ds   = JumperDataset(runs)
dl   = DataLoader(ds,
                  batch_size=BATCH_SIZE,
                  shuffle=True,
                  num_workers=0,
                  collate_fn=collate_fn,
                  pin_memory=True)

# how many element‑type IDs? 0 = PAD plus real ones (here 0/1 → 2)
N_ELEM_TYPES = max([et.max() for et in runs.elemtypes]) + 1

# ----------------------------------------------------------------------
# 5.  Train loop
# ----------------------------------------------------------------------
model = UnityBiLSTM(n_elem_types=N_ELEM_TYPES, n_globals=32).to(DEVICE)
opt   = torch.optim.AdamW(model.parameters(), lr=LR)

for epoch in range(1, EPOCHS + 1):
    model.train()
    running = 0.0
    for g, et, tgt, lengths in dl:
        g, et, tgt, lengths = g.to(DEVICE), et.to(DEVICE), tgt.to(DEVICE), lengths.to(DEVICE)

        opt.zero_grad()
        y_hat = model(g, et, lengths)
        loss  = masked_mse(y_hat, tgt, lengths)
        loss.backward()
        opt.step()

        running += loss.item()

    print(f"Epoch {epoch:02d}   train-loss: {running / len(dl):.4f}")

print("Done.")


Using device: cuda
Epoch 01   train-loss: 0.2347
Epoch 02   train-loss: 0.1983
Epoch 03   train-loss: 0.1905
Epoch 04   train-loss: 0.1883
Epoch 05   train-loss: 0.1860
Epoch 06   train-loss: 0.1852
Epoch 07   train-loss: 0.1828
Epoch 08   train-loss: 0.1778
Epoch 09   train-loss: 0.1770
Epoch 10   train-loss: 0.1770
Epoch 11   train-loss: 0.1757
Epoch 12   train-loss: 0.1762
Epoch 13   train-loss: 0.1755
Epoch 14   train-loss: 0.1726
Epoch 15   train-loss: 0.1696
Epoch 16   train-loss: 0.1693
Epoch 17   train-loss: 0.1685
Epoch 18   train-loss: 0.1686
Epoch 19   train-loss: 0.1682
Epoch 20   train-loss: 0.1679
Done.


In [60]:
MODEL_PATH = "unity_bilstm.pt"
torch.save(model.state_dict(), MODEL_PATH)
print(f"Model saved to {MODEL_PATH}")

Model saved to unity_bilstm.pt


In [82]:
import numpy as np
from typing import List
import numpy as np
import torch

PIPE, ELBOW = 0, 1  # consistent with training labels

def build_elem_types(seg_lengths: List[float],
                     outer_diameter: float,
                     bend_angle_rad: float = np.pi / 2,
                     bend_radius_factor: float = 5.0,
                     min_pipe_elements: int = 1) -> np.ndarray:
    H = outer_diameter
    R = bend_radius_factor * outer_diameter
    L_bend = R * bend_angle_rad
    n_elbow = max(1, int(np.ceil(L_bend / H)))
    actual_L_bend = n_elbow * H  # actual length covered by elbow elements

    types = []
    num_segments = len(seg_lengths)

    for k, Lseg in enumerate(seg_lengths):
        num_bends = 1/2 if (k == 0 or k == num_segments - 1) else 1

        available_pipe_length = Lseg - num_bends * 10*H

        # If too short, force a minimal straight section of at least 1 pipe element
        if available_pipe_length < H * min_pipe_elements:
            n_pipe = min_pipe_elements
        else:
            n_pipe = int(np.ceil(available_pipe_length / H))

        types.extend([PIPE] * n_pipe)

        # Add elbow elements after each segment except the last
        if k != num_segments - 1:
            types.extend([ELBOW] * n_elbow)

    return np.asarray(types, dtype=np.int64)




DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
MODEL_FILE = "unity_bilstm.pt"

g_vec = np.array([28.89045, 14.46195, 20.23866, 70.89006, 19.33562, 14.46195,
                  44.69494, 43.41846, 170.3305, 2623.41446, 1061784.706, 0.10417,
                    1.33333, 19, -0.125, 0.125, 0.125, 0.00873, 0.01745, 0, 0.125,
                      0.125, -0.125, -0.00873, -0.01745, 0, 3.28873, 1.4538, 3.15864,
                        -2.18767, 0.44241, -1.93973], dtype=np.float32)

assert g_vec.size == 32

seg_len = g_vec[:7]
outer_d = g_vec[12]

elem_type = build_elem_types(seg_len, outer_d)
L = elem_type.size

runs = RunIndex()
n_typ = max(et.max() for et in runs.elemtypes) + 1
model = UnityBiLSTM(n_typ, n_globals=32).to(DEVICE)
model.load_state_dict(torch.load(MODEL_FILE, map_location=DEVICE))
model.eval()

g_tensor = torch.from_numpy(g_vec).unsqueeze(0).to(DEVICE)
et_tensor = torch.from_numpy(elem_type).unsqueeze(0).to(DEVICE)
lengths = torch.tensor([L], dtype=torch.long).to(DEVICE)

with torch.no_grad():
    y_hat = model(g_tensor, et_tensor, lengths)

pred = y_hat[0, :L].cpu().numpy()
unity_long, unity_comb = pred[:, 0], pred[:, 1]

print(f"\nGenerated {L} elements (OD={outer_d:.3f}, total pipe length={seg_len.sum():.1f}m)\n")
print(" idx │ elem_type │ unity_long │ unity_comb")
print("─────┼───────────┼────────────┼────────────")
for i in range(L):
    et_label = "PIPE" if elem_type[i] == 0 else "ELBOW"
    print(f"{i:4d} │ {et_label:^9} │ {unity_long[i]:10.4f} │ {unity_comb[i]:10.4f}")



Generated 151 elements (OD=1.333, total pipe length=213.0m)

 idx │ elem_type │ unity_long │ unity_comb
─────┼───────────┼────────────┼────────────
   0 │   PIPE    │     0.7816 │     0.9047
   1 │   PIPE    │     0.7682 │     0.8873
   2 │   PIPE    │     0.7427 │     0.8621
   3 │   PIPE    │     0.7117 │     0.8310
   4 │   PIPE    │     0.6842 │     0.8034
   5 │   PIPE    │     0.6602 │     0.7822
   6 │   PIPE    │     0.6413 │     0.7655
   7 │   PIPE    │     0.6309 │     0.7565
   8 │   PIPE    │     0.6249 │     0.7512
   9 │   PIPE    │     0.6217 │     0.7484
  10 │   PIPE    │     0.6277 │     0.7536
  11 │   PIPE    │     0.6514 │     0.7739
  12 │   PIPE    │     0.6803 │     0.8009
  13 │   PIPE    │     0.7087 │     0.8270
  14 │   PIPE    │     0.7372 │     0.8528
  15 │   PIPE    │     0.7668 │     0.8793
  16 │   PIPE    │     0.7984 │     0.9087
  17 │   ELBOW   │     0.8321 │     0.9399
  18 │   ELBOW   │     0.8663 │     0.9714
  19 │   ELBOW   │     0.8978 │   