## Modeling
파일 병합 필수!!!

In [None]:
n_distance = 2
window_size = 60
step = 60

Xa_all, ya_all = window_features_expanded_allP(
    df=train_a_scaled,
    adjacency=adjacency_list_A,
    n=n_distance,
    window_size=window_size,
    step=step
)
print("Xa_all.shape =", Xa_all.shape, "ya_all.shape =", ya_all.shape)

Xb_all, yb_all = window_features_expanded_allP(
    df=train_b_scaled,
    adjacency=adjacency_list_B,
    n=n_distance,
    window_size=window_size,
    step=step
)
print("Xb_all.shape =", Xb_all.shape, "yb_all.shape =", yb_all.shape)

X_train = np.concatenate([Xa_all, Xb_all], axis=0) if Xa_all.size>0 and Xb_all.size>0 else Xa_all
y_train = np.concatenate([ya_all, yb_all], axis=0) if ya_all.size>0 and yb_all.size>0 else ya_all
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)

class WaterDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        x_data = torch.tensor(self.X[idx], dtype=torch.float32)
        y_data = torch.tensor(self.y[idx], dtype=torch.float32)
        return x_data, y_data

train_dataset = WaterDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)


In [None]:
def assoc_discrep(a, b):
    return torch.sum(a * torch.log((a+1e-6)/(b+1e-6) + 1e-6))

class PosEnc(nn.Module):
    def __init__(self, d_model):
        super().__init__()
        max_len = 10000
        pe = torch.zeros(max_len, d_model)
        pos = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
        div = torch.exp(torch.arange(0,d_model,2,dtype=torch.float)*(-math.log(10000.0)/d_model))
        pe[:,0::2] = torch.sin(pos*div)
        pe[:,1::2] = torch.cos(pos*div)
        self.register_buffer('pe', pe.unsqueeze(0))
    def forward(self, x):
        L = x.size(1)
        return x + self.pe[:,:L,:]

class AnomalyBlock(nn.Module):
    def __init__(self, d_model, n_heads):
        super().__init__()
        self.n_heads = n_heads
        self.dk = d_model//n_heads
        self.qA = nn.Linear(d_model,d_model)
        self.kA = nn.Linear(d_model,d_model)
        self.vA = nn.Linear(d_model,d_model)
        self.qB = nn.Linear(d_model,d_model)
        self.kB = nn.Linear(d_model,d_model)
        self.vB = nn.Linear(d_model,d_model)
        self.out_lin = nn.Linear(d_model,d_model)
        self.norm = nn.LayerNorm(d_model)
        self.ffn = nn.Sequential(
            nn.Linear(d_model,4*d_model),
            nn.ReLU(),
            nn.Linear(4*d_model,d_model)
        )
    def forward(self, x):
        B,L,D = x.shape
        qa = self.qA(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        ka = self.kA(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        va = self.vA(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        sc_a = torch.matmul(qa,ka.transpose(-2,-1))/math.sqrt(self.dk)
        attn_a = F.softmax(sc_a, dim=-1)
        out_a = torch.matmul(attn_a, va).permute(0,2,1,3).reshape(B,L,D)

        qb = self.qB(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        kb = self.kB(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        vb = self.vB(x).reshape(B,L,self.n_heads,self.dk).permute(0,2,1,3)
        sc_b = torch.matmul(qb,kb.transpose(-2,-1))/math.sqrt(self.dk)
        attn_b = F.softmax(sc_b, dim=-1)
        out_b = torch.matmul(attn_b, vb).permute(0,2,1,3).reshape(B,L,D)

        out = (out_a+out_b)/2.0
        out = self.out_lin(out)
        x2 = self.norm(x+out)
        x3 = self.norm(x2+self.ffn(x2))
        return x3, attn_a, attn_b

class AnomalyEncoder(nn.Module):
    def __init__(self, d_model, n_heads, num_layers=2):
        super().__init__()
        self.layers = nn.ModuleList([
            AnomalyBlock(d_model, n_heads) for _ in range(num_layers)
        ])
    def forward(self, x):
        a_list=[]
        b_list=[]
        out = x
        for layer in self.layers:
            out, A, B = layer(out)
            a_list.append(A)
            b_list.append(B)
        return out, a_list, b_list

class AnomalyTransformer(nn.Module):
    def __init__(self, input_dim=7, d_model=64, n_heads=4, num_layers=2, lambd=0.01):
        super().__init__()
        self.input_fc = nn.Linear(input_dim, d_model)
        self.pos = PosEnc(d_model)
        self.encoder = AnomalyEncoder(d_model, n_heads, num_layers)
        self.recon = nn.Linear(d_model, input_dim)
        self.cls_head = nn.Linear(d_model, 1)
        self.lambd = lambd
    def forward(self, x):
        # x: (B, L, input_dim)
        x_proj = self.input_fc(x)
        x_pe = self.pos(x_proj)
        enc_out, A_list, B_list = self.encoder(x_pe)
        rec = self.recon(enc_out)
        cls_logit = self.cls_head(enc_out.mean(dim=1))
        return rec, cls_logit, A_list, B_list
    def compute_loss(self, x, rec, logit, y, A_list, B_list):
        r_loss = F.mse_loss(rec, x, reduction='mean')
        d_loss=0
        for A,B in zip(A_list,B_list):
            d_loss+=assoc_discrep(A,B)
        d_loss/= len(A_list)
        bce_loss = F.binary_cross_entropy_with_logits(logit.squeeze(-1), y)
        return r_loss + self.lambd*d_loss + bce_loss

### Training

In [None]:

class EarlyStopper:
    def __init__(self, patience=5, delta=1e-4, path="best_anomaly_transformer.pth"):
        self.patience = patience
        self.delta = delta
        self.path = path
        self.best_score=None
        self.counter=0
        self.early_stop=False
    def __call__(self, val_loss, model):
        score=-val_loss
        if self.best_score is None:
            self.best_score=score
            torch.save(model.state_dict(), self.path)
        elif score<self.best_score+self.delta:
            self.counter+=1
            if self.counter>=self.patience:
                self.early_stop=True
        else:
            self.best_score=score
            torch.save(model.state_dict(), self.path)
            self.counter=0


model = AnomalyTransformer(input_dim=7, d_model=64, n_heads=4, num_layers=2, lambd=0.01).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
stopper = EarlyStopper(patience=5, delta=1e-4)
num_epochs=30

for epoch in range(num_epochs):
    model.train()
    total_loss=0
    pbar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}", leave=False)
    for x_batch, y_batch in pbar:
        xb = x_batch.to(device)
        yb = y_batch.to(device)
        rec_out, cls_logit, As, Bs = model(xb)
        loss = model.compute_loss(xb, rec_out, cls_logit, yb, As, Bs)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()*len(xb)
        pbar.set_postfix({"loss": f"{loss.item():.4f}"})
    pbar.close()

    final_loss = total_loss/len(train_loader.dataset)
    print(f"[Epoch {epoch+1}/{num_epochs}] total_loss={final_loss:.5f}")
    
    stopper(final_loss, model)
    if stopper.early_stop:
        print("Early stopping triggered.")
        break
    

model.load_state_dict(torch.load("best_anomaly_transformer.pth"))
print("Training done.")


### Inference

In [None]:
def make_test_features_expanded_step(df_in, adjacency, p_node, n=2, window_size=24, step=24):

    pre_nodes, post_nodes, in_q_list, out_q_list = find_pre_post_nodes(p_node, adjacency, n)

    feats=[]
    idxs=[]
    ln=len(df_in)
    df_cols = set(df_in.columns)

    for i in range(0, ln - window_size + 1, step):
        chunk = df_in.iloc[i : i+window_size]

        # inQ
        if len(in_q_list)>0:
            q_in_sum = chunk[in_q_list].sum(axis=1).values.reshape(window_size,1)
        else:
            q_in_sum = np.zeros((window_size,1))

        # pre(n)
        pre_feats=[]
        for pn in pre_nodes[:n]:
            if pn in df_cols:
                pre_feats.append(chunk[pn].values.reshape(window_size,1))
            else:
                pre_feats.append(np.zeros((window_size,1)))
        if len(pre_feats)<n:
            for _ in range(n-len(pre_feats)):
                pre_feats.append(np.zeros((window_size,1)))
        pre_cat = np.concatenate(pre_feats, axis=1) if len(pre_feats)>0 else np.zeros((window_size,n))

        # pnode
        if p_node in df_cols:
            p_val = chunk[p_node].values.reshape(window_size,1)
        else:
            p_val = np.zeros((window_size,1))

        # post(n)
        post_feats=[]
        for pn in post_nodes[:n]:
            if pn in df_cols:
                post_feats.append(chunk[pn].values.reshape(window_size,1))
            else:
                post_feats.append(np.zeros((window_size,1)))
        if len(post_feats)<n:
            for _ in range(n-len(post_feats)):
                post_feats.append(np.zeros((window_size,1)))
        post_cat = np.concatenate(post_feats, axis=1) if len(post_feats)>0 else np.zeros((window_size,n))

        # outQ
        if len(out_q_list)>0:
            q_out_sum = chunk[out_q_list].sum(axis=1).values.reshape(window_size,1)
        else:
            q_out_sum = np.zeros((window_size,1))

        arr = np.concatenate([q_in_sum, pre_cat, p_val, post_cat, q_out_sum], axis=1)
        feats.append(arr)
        idxs.append(i)
    return np.array(feats), idxs


In [None]:
def collect_test_windows_batch(df_test, adjacency, p_nodes, n=2, window_size=24, step=24):
    
    df_cols = df_test.columns
    X_list = []
    meta_list = []  

    for p_node in p_nodes:
        fx, idxs = make_test_features_expanded_step(
            df_test, adjacency, p_node,
            n=n, window_size=window_size, step=step
        )
        # fx.shape => (num_windows_for_p, window_size, 2n+3)

        if len(fx) == 0:
            continue

        # 기록
        start_pos = len(X_list)  
        X_list.append(fx)
        for i_idx in idxs:
            meta_list.append( (p_node, i_idx) )

    if len(X_list) == 0:
        return None, None  

    # 한 번에 concatenate
    X_batch = np.concatenate(X_list, axis=0)  # (N_total, window_size, 2n+3)
    return X_batch, meta_list

def collect_test_windows_batch(df_test, adjacency, p_nodes, n=2, window_size=24, step=24):
    
    df_cols = df_test.columns
    X_list = []
    meta_list = []  

    for p_node in p_nodes:
        fx, idxs = make_test_features_expanded_step(
            df_test, adjacency, p_node,
            n=n, window_size=window_size, step=step
        )
        # fx.shape => (num_windows_for_p, window_size, 2n+3)

        if len(fx) == 0:
            continue

        # 기록
        start_pos = len(X_list)  
        X_list.append(fx)
        for i_idx in idxs:
            meta_list.append( (p_node, i_idx) )

    if len(X_list) == 0:
        return None, None  

    # 한 번에 concatenate
    X_batch = np.concatenate(X_list, axis=0)  # (N_total, window_size, 2n+3)
    return X_batch, meta_list

def infer_flags_entire_expanded_batch(
    df_test,
    adjacency,
    p_nodes,
    model,
    min_vals,
    max_vals,
    n=2,
    window_size=60,
    step=60,
    threshold=0.5,
    device=torch.device("cuda")
):

    # 스케일링
    df_norm = df_test.copy()
    for c in df_norm.columns:
        if c in min_vals.index:
            df_norm[c] = (df_norm[c] - min_vals[c]) / (max_vals[c] - min_vals[c])
        else:
            df_norm[c] = 0.0

    X_batch, meta_list = collect_test_windows_batch(
        df_norm, adjacency, p_nodes, n=n, window_size=window_size, step=step
    )
    result_flags = {pn: 0 for pn in p_nodes}

    if X_batch is None:
        return result_flags

    # batch inference
    flags_bool = inference_batch(X_batch, model, threshold=threshold, device=device)
    for i, (pn, idx) in enumerate(meta_list):
        if flags_bool[i]:
            result_flags[pn] = 1  

    return result_flags


In [None]:
predicts = []
test_rows = list(test_info.itertuples(index=False))

for row in tqdm(test_rows, desc="Test Inference", leave=True):
    file_id = row.ID
    file_path = row.path
    df_t = pd.read_csv(file_path).sort_values("timestamp").reset_index(drop=True)

    if "TEST_C" in file_id:
        # 한 번에 batch
        flg_dict = infer_flags_entire_expanded_batch(
            df_test=df_t,
            adjacency=adjacency_list_C,
            p_nodes=nodes_C,
            model=model,
            min_vals=min_vals,
            max_vals=max_vals,
            n=2,
            window_size=60,
            step=60,
            threshold=0.5,
            device=device
        )
        flags_ordered = [flg_dict[p] for p in nodes_C]
        predicts.append((file_id, flags_ordered))

    else:
        flg_dict = infer_flags_entire_expanded_batch(
            df_test=df_t,
            adjacency=adjacency_list_D,
            p_nodes=nodes_D,
            model=model,
            min_vals=min_vals,
            max_vals=max_vals,
            n=2,
            window_size=60,
            step=60,
            threshold=0.5,
            device=device
        )
        flags_ordered = [flg_dict[p] for p in nodes_D]
        predicts.append((file_id, flags_ordered))

pdic = {fid: arr for fid, arr in predicts}
fl = []
for i, row in sample_submission.iterrows():
    idx = row['ID']
    if idx in pdic:
        fl.append(pdic[idx])
    else:
        fl.append([0])
sample_submission['flag_list'] = fl
sample_submission.to_csv("submission.csv", index=False)
