## 상수도 이상 수압계 감지 - 모델기반
총 3개의 코드로 분할되어 있어 병합 필수!!!

### module import

In [None]:
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from tqdm.auto import tqdm
import math
from collections import deque

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

### data & pipe network load

In [None]:
train_a = pd.read_csv("./train/TRAIN_A.csv").sort_values("timestamp").reset_index(drop=True)
train_b = pd.read_csv("./train/TRAIN_B.csv").sort_values("timestamp").reset_index(drop=True)
test_info = pd.read_csv("./test.csv")
sample_submission = pd.read_csv("./sample_submission.csv")

exclude_cols = ["timestamp","anomaly"]
use_cols_a = [c for c in train_a.columns if c not in exclude_cols and not c.endswith("_flag")]
use_cols_b = [c for c in train_b.columns if c not in exclude_cols and not c.endswith("_flag")]
intersect_cols = list(set(use_cols_a).intersection(set(use_cols_b)))

df_all = pd.concat([train_a[intersect_cols], train_b[intersect_cols]], axis=0, ignore_index=True)
min_vals = df_all.min()
max_vals = df_all.max() + 1e-8  # 분모 0 방지용

train_a_scaled = train_a.copy()
train_b_scaled = train_b.copy()

for c in intersect_cols:
    # Min-Max 스케일링
    train_a_scaled[c] = (train_a_scaled[c] - min_vals[c]) / (max_vals[c] - min_vals[c])
    train_b_scaled[c] = (train_b_scaled[c] - min_vals[c]) / (max_vals[c] - min_vals[c])

print("Train A scaled shape:", train_a_scaled.shape)
print("Train B scaled shape:", train_b_scaled.shape)

In [None]:
# 관망 구조
adjacency_list_A = {
    'P1' :  [['P5','P6'], [], ['Q1']],
    'P2' :  [['P4'], [], ['Q3']],
    'P3' :  [['P5','P6'], ['Q2'], ['Q5']],
    'P4' :  [['P5','P6'], ['Q3'], ['Q5']],
    'P5' :  [['P8','P9'], ['Q1','Q2','Q3'], ['Q5']],
    'P6' :  [['P8','P9'], ['Q1','Q2','Q3'], ['Q5']],
    'P7' :  [['P9'], ['Q4'], ['Q5']],
    'P8' :  [['P10','P12','P19','P26'], ['Q1','Q2','Q3'], ['Q5']],
    'P9' :  [['P10','P12','P19','P26'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P10':  [['P11'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P11':  [['P14'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P12':  [['P13'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P13':  [['P18','P14'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P14':  [['P13','P15'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P15':  [['P16'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P16':  [['P17','P19'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P17':  [[], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P18':  [['P19','P20'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P19':  [['P18','P22','P26'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P20':  [['P21'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P21':  [['P24'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P22':  [['P23'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P23':  [['P25'], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P24':  [[], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P25':  [[], ['Q1','Q2','Q3','Q4'], ['Q5']],
    'P26':  [[], ['Q1','Q2','Q3','Q4'], ['Q5']]
}
nodes_A = list(adjacency_list_A.keys())

adjacency_list_B = {
    'P1': [['P2'], [], ['Q1']],
    'P2': [['P3'], ['Q1'], ['Q2','Q3','Q4']],
    'P3': [['P4','P5'], ['Q1'], ['Q2','Q3','Q4']],
    'P4': [[], ['Q1'], ['Q2']],
    'P5': [['P6'], ['Q1'], ['Q3','Q4']],
    'P6': [['P7','P8'], ['Q1'], ['Q3','Q4']],
    'P7': [[], ['Q1'], ['Q3']],
    'P8': [['P9'], ['Q1'], ['Q4']],
    'P9': [['P10'], ['Q1'], ['Q4']],
    'P10': [[], ['Q1'], ['Q4']]
}
nodes_B = list(adjacency_list_B.keys())

adjacency_list_C = {
    'P1':  [['P3'], ['Q1'], ['Q2','Q3','Q4','Q5','Q8']],
    'P2':  [['P4'], ['Q1'], ['Q2','Q3','Q4','Q5','Q8']],
    'P3':  [['P5','P8'], ['Q1'], ['Q2','Q3','Q4','Q5','Q8']],
    'P4':  [['P5','P8'], ['Q1'], ['Q2','Q3','Q4','Q5','Q8']],
    'P5':  [[], ['Q1'], ['Q2','Q3','Q4','Q5']],
    'P6':  [[], ['Q1'], ['Q2','Q3','Q4','Q5']],
    'P7':  [[], ['Q7'], []],
    'P8':  [[], ['Q8'], []]
}
nodes_C = list(adjacency_list_C.keys())

adjacency_list_D = {
    'P1':  [['P3'], ['Q1'], ['Q2','Q3','Q4','Q5']],
    'P2':  [['P3'], ['Q1'], ['Q2','Q3','Q4','Q5']],
    'P3':  [['P4','P6'], ['Q1'], ['Q3','Q5']],
    'P4':  [[], ['Q3'], []],
    'P5':  [[], ['Q1'], ['Q4']],
    'P6':  [[], ['Q1'], ['Q5']]
}
nodes_D = list(adjacency_list_D.keys())

### Dataset Define

In [None]:
from collections import deque

def bfs_n(start_p, adjacency, n=2):
    visited = set([start_p])
    queue = deque([(start_p, 0)])
    res = []
    pbar = tqdm(desc=f"BFS for {start_p}", leave=False)

    while queue:
        cur, depth = queue.popleft()
        pbar.update(1)

        if depth == n:
            continue
        nexts = adjacency[cur][0]
        for nxt in nexts:
            if nxt not in visited:
                visited.add(nxt)
                res.append(nxt)
                queue.append((nxt, depth+1))
    pbar.close()
    return res


In [None]:
def find_pre_post_nodes(pnode, adjacency, n=2):
    pre_nodes = bfs_n(pnode, adjacency, n)
    post_nodes = bfs_n(pnode, adjacency, n)
    in_q_list = adjacency[pnode][1] if len(adjacency[pnode])>1 else []
    out_q_list = adjacency[pnode][2] if len(adjacency[pnode])>2 else []
    return pre_nodes, post_nodes, in_q_list, out_q_list

def window_features_expanded(
    df,
    adjacency,
    pnode,
    n=2,
    window_size=60,
    step=60,
    flag_prefix="P"
):
    pre_nodes, post_nodes, in_q_list, out_q_list = find_pre_post_nodes(pnode, adjacency, n)
    pnode_flag = f"{pnode}_flag" if flag_prefix else f"{pnode}_flag"

    X_list = []
    y_list = []
    ln = len(df)
    df_cols = set(df.columns)

    loop_range = range(0, ln - window_size + 1, step)
    pbar = tqdm(loop_range, desc=f"window_features({pnode})", leave=False)

    for start_idx in pbar:
        chunk = df.iloc[start_idx : start_idx + window_size]

        # inQ 합
        if len(in_q_list)>0:
            q_in_sum = chunk[in_q_list].sum(axis=1).values.reshape(window_size,1)
        else:
            q_in_sum = np.zeros((window_size,1))

        # pre(n개)
        pre_feats = []
        for pn in pre_nodes[:n]:
            if pn in df_cols:
                pre_feats.append(chunk[pn].values.reshape(window_size,1))
            else:
                pre_feats.append(np.zeros((window_size,1)))
        if len(pre_feats)<n:
            for _ in range(n-len(pre_feats)):
                pre_feats.append(np.zeros((window_size,1)))
        pre_cat = np.concatenate(pre_feats, axis=1) if len(pre_feats)>0 else np.zeros((window_size,n))

        # pnode
        if pnode in df_cols:
            pnode_val = chunk[pnode].values.reshape(window_size,1)
        else:
            pnode_val = np.zeros((window_size,1))

        # post(n개)
        post_feats = []
        for pn in post_nodes[:n]:
            if pn in df_cols:
                post_feats.append(chunk[pn].values.reshape(window_size,1))
            else:
                post_feats.append(np.zeros((window_size,1)))
        if len(post_feats)<n:
            for _ in range(n-len(post_feats)):
                post_feats.append(np.zeros((window_size,1)))
        post_cat = np.concatenate(post_feats, axis=1) if len(post_feats)>0 else np.zeros((window_size,n))

        # outQ
        if len(out_q_list)>0:
            q_out_sum = chunk[out_q_list].sum(axis=1).values.reshape(window_size,1)
        else:
            q_out_sum = np.zeros((window_size,1))

        feat = np.concatenate([q_in_sum, pre_cat, pnode_val, post_cat, q_out_sum], axis=1)
        X_list.append(feat)

        # 라벨
        if pnode_flag in df_cols:
            y_chunk = chunk[pnode_flag].values
        else:
            y_chunk = np.zeros(window_size)
        y_val = y_chunk.max()
        y_list.append(y_val)

    pbar.close()

    X_arr = np.array(X_list)
    y_arr = np.array(y_list)
    return X_arr, y_arr

def window_features_expanded_allP(
    df,
    adjacency,
    pnodes=None,
    n=2,
    window_size=60,
    step=60,
    flag_prefix="P"
):
    if pnodes is None:
        pnodes = list(adjacency.keys())

    X_list = []
    y_list = []

    pbar = tqdm(pnodes, desc="All P nodes", leave=True)

    for pnode in pbar:
        X_p, y_p = window_features_expanded(
            df=df,
            adjacency=adjacency,
            pnode=pnode,
            n=n,
            window_size=window_size,
            step=step,
            flag_prefix=flag_prefix
        )
        X_list.append(X_p)
        y_list.append(y_p)

    pbar.close()

    if len(X_list)>0:
        X_all = np.concatenate(X_list, axis=0)
        y_all = np.concatenate(y_list, axis=0)
    else:
        X_all = np.array([])
        y_all = np.array([])

    return X_all, y_all


In [None]:
n_distance = 2
window_size = 60
step = 60

Xa_all, ya_all = window_features_expanded_allP(
    df=train_a_scaled,
    adjacency=adjacency_list_A,
    n=n_distance,
    window_size=window_size,
    step=step
)
print("Xa_all.shape =", Xa_all.shape, "ya_all.shape =", ya_all.shape)

Xb_all, yb_all = window_features_expanded_allP(
    df=train_b_scaled,
    adjacency=adjacency_list_B,
    n=n_distance,
    window_size=window_size,
    step=step
)
print("Xb_all.shape =", Xb_all.shape, "yb_all.shape =", yb_all.shape)

X_train = np.concatenate([Xa_all, Xb_all], axis=0) if Xa_all.size>0 and Xb_all.size>0 else Xa_all
y_train = np.concatenate([ya_all, yb_all], axis=0) if ya_all.size>0 and yb_all.size>0 else ya_all
print("X_train.shape =", X_train.shape)
print("y_train.shape =", y_train.shape)

class WaterDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        x_data = torch.tensor(self.X[idx], dtype=torch.float32)
        y_data = torch.tensor(self.y[idx], dtype=torch.float32)
        return x_data, y_data

train_dataset = WaterDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
