In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("sbc_counter_result.csv")

# 1. Loại bỏ cột không dùng
df = df.drop(columns=['id', 'counter_option'])

# 2. Chuyển trigger_time -> datetime
df['trigger_time'] = pd.to_datetime(df['trigger_time'], unit='s')

# 3. Pivot: mỗi time step là 1 dòng, cột là (node_id, counter_id)
pivot_df = df.pivot_table(
    index='trigger_time',          # trục thời gian
    columns=['node_id', 'counter_id'],
    values='counter_value'
).sort_index()

# Bỏ multiindex, tạo tên cột dạng "nodeX_counterY"
pivot_df.columns = [f"node{n}_c{c}" for (n, c) in pivot_df.columns]
pivot_df = pivot_df.dropna()   # đơn giản: bỏ missing, hoặc bạn có thể fillna
pivot_df.head()

Unnamed: 0_level_0,node1_c1,node1_c2,node1_c3,node1_c4,node1_c5,node1_c6,node1_c7,node1_c8,node1_c9,node1_c10,...,node6_c11,node6_c12,node6_c13,node6_c14,node6_c15,node6_c16,node6_c17,node6_c18,node6_c19,node6_c20
trigger_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-09-07 18:39:27,22695.0,20238.0,20681.0,19287.0,22703.0,18683.0,19338.0,19689.0,17477.0,18901.0,...,23816.0,18616.0,19922.0,20057.0,18213.0,18327.0,17304.0,19542.0,21080.0,22515.0
2025-09-07 18:44:27,21120.0,19911.0,21249.0,21861.0,18355.0,20104.0,22204.0,20710.0,19171.0,21094.0,...,20085.0,18478.0,21396.0,21944.0,20388.0,18512.0,20991.0,19283.0,18880.0,18020.0
2025-09-07 18:49:27,19024.0,20255.0,19361.0,21907.0,18896.0,19823.0,19579.0,18229.0,21886.0,19680.0,...,20821.0,19749.0,17640.0,22354.0,21744.0,16455.0,18865.0,19036.0,20220.0,17023.0
2025-09-07 18:54:27,19199.0,23258.0,20103.0,19764.0,18378.0,19695.0,18769.0,19945.0,20138.0,18877.0,...,17981.0,19731.0,18713.0,19907.0,21435.0,18740.0,21203.0,20388.0,21997.0,20412.0
2025-09-07 18:59:27,20564.0,22984.0,17187.0,19524.0,21141.0,20346.0,18875.0,20374.0,20394.0,17472.0,...,19279.0,19909.0,21264.0,20723.0,19466.0,23496.0,24569.0,19481.0,19262.0,20204.0


In [2]:
from sklearn.preprocessing import MinMaxScaler

all_features = pivot_df.columns.tolist()

# Ví dụ: tất cả cột có counter_id 1,2,3 là target
target_counter_ids = [1, 2, 3]

# Lọc tên cột target dựa trên pattern "nodeX_cY"
target_cols = [col for col in all_features 
               if any(col.endswith(f"_c{cid}") for cid in target_counter_ids)]

print("Số feature tổng:", len(all_features))
print("Số cột target (n_nodes * n_counter):", len(target_cols))

# Chuẩn hóa theo toàn bộ cột (model học quan hệ trên toàn bộ)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(pivot_df.values)
data_scaled = pd.DataFrame(data_scaled, index=pivot_df.index, columns=all_features)


Số feature tổng: 120
Số cột target (n_nodes * n_counter): 18


In [3]:
from sklearn.preprocessing import MinMaxScaler

all_features = pivot_df.columns.tolist()

# Ví dụ: tất cả cột có counter_id 1,2,3 là target
target_counter_ids = [1, 2, 3]

# Lọc tên cột target dựa trên pattern "nodeX_cY"
target_cols = [col for col in all_features 
               if any(col.endswith(f"_c{cid}") for cid in target_counter_ids)]

print("Số feature tổng:", len(all_features))
print("Số cột target (n_nodes * n_counter):", len(target_cols))

# Chuẩn hóa theo toàn bộ cột (model học quan hệ trên toàn bộ)
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(pivot_df.values)
data_scaled = pd.DataFrame(data_scaled, index=pivot_df.index, columns=all_features)


Số feature tổng: 120
Số cột target (n_nodes * n_counter): 18


In [4]:
def create_sequences(data_scaled, target_cols, n_past=24, n_future=1):
    """
    data_scaled: DataFrame (T, n_features_total)
    target_cols: list tên cột target
    return:
        X: (N, n_past, n_features_total)
        Y: (N, n_outputs)  # flatten toàn bộ node & counter
        time_index: index thời gian của Y (lúc dự đoán)
    """
    X, Y, time_index = [], [], []
    feature_values = data_scaled.values
    target_idx = [data_scaled.columns.get_loc(c) for c in target_cols]
    
    for i in range(n_past, len(data_scaled) - n_future + 1):
        # input: [i-n_past, i)
        X.append(feature_values[i - n_past:i, :])   # (n_past, n_features_total)
        
        # output tại time i + n_future - 1
        y_step = feature_values[i + n_future - 1, target_idx]  # (n_outputs,)
        Y.append(y_step)
        
        time_index.append(data_scaled.index[i + n_future - 1])
    
    return np.array(X), np.array(Y), np.array(time_index)

n_past = 24   # ví dụ: 24 bước (5p → 2h), bạn thay theo ý
n_future = 1

X, Y, time_index = create_sequences(data_scaled, target_cols, n_past, n_future)

print("X shape:", X.shape)  # (N, n_past, n_features_total)
print("Y shape:", Y.shape)  # (N, n_nodes * n_counter)


X shape: (26185, 24, 120)
Y shape: (26185, 18)


In [5]:
import tensorflow as tf
from tensorflow.keras.layers import (
    Input, Dense, Embedding, LSTM, Dropout, Reshape,
    TimeDistributed, Concatenate, Lambda
)
from tensorflow.keras.models import Model
from tensorflow.keras.layers import MultiHeadAttention


def build_lstm_node_counter_model(
    n_past: int,
    n_nodes: int,
    n_counters_total: int,
    n_counters_target: int,
    n_nodes_total: int,           # tổng số node_id có thể có (để embedding)
    counter_latent_dim: int = 8,  # H: latent dim cho counter
    node_emb_dim: int = 16,       # E: embedding dim cho node
    lstm_units: int = 64,
    dropout_rate: float = 0.2
):
    """
    Trả về model:
      Input 1: counters_input  (batch, T=n_past, N=n_nodes, C=n_counters_total)
      Input 2: node_ids_input  (N=n_nodes,) int32   -- danh sách node_id cố định

      Output:  (batch, N, K=n_counters_target)
    """

    # ------------------------------------------------------
    # 1. Khai báo input
    # ------------------------------------------------------
    # Dữ liệu counter: (batch, T, N, C)
    counters_input = Input(
        shape=(n_past, n_nodes, n_counters_total),
        name="counters_input"
    )

    # node_ids cho N node, cố định theo batch & time
    # Ví dụ: [0,1,2,3,4,5] hoặc [101, 102, ...]
    node_ids_input = Input(
        shape=(n_nodes,),
        dtype="int32",
        name="node_ids_input"
    )

    # ------------------------------------------------------
    # 2. Counter encoder (giảm chiều C -> H)
    #    TimeDistributed(TimeDistributed(Dense))
    # ------------------------------------------------------
    # Mỗi node, mỗi time-step: Dense(C -> H)
    counter_encoder_dense = Dense(
        counter_latent_dim,
        activation="relu",
        name="counter_encoder_dense"
    )

    # Áp dụng theo trục node (N)
    per_node_encoded = TimeDistributed(
        counter_encoder_dense,
        name="counter_encoder_per_node"   # (batch, T, N, H)
    )

    # Áp dụng theo trục thời gian (T)
    encoded_counters = TimeDistributed(
        per_node_encoded,
        name="counter_encoder_per_time"
    )(counters_input)  # (batch, T, N, H)

    # ------------------------------------------------------
    # 3. Node embedding + broadcast ra (batch, T, N, E)
    # ------------------------------------------------------
    node_embedding_layer = Embedding(
        input_dim=n_nodes_total,
        output_dim=node_emb_dim,
        name="node_embedding"
    )
    # node_emb: (N, E)
    node_emb = node_embedding_layer(node_ids_input)

    # Broadcast node_emb theo batch & time
    def broadcast_node_emb(inputs):
        node_emb, x = inputs  # node_emb: (N,E), x: (B,T,N,C)
        B = tf.shape(x)[0]
        T = tf.shape(x)[1]
        N = tf.shape(x)[2]
        E = tf.shape(node_emb)[1]

        # (N,E) -> (1,1,N,E)
        node_emb_exp = tf.reshape(node_emb, (1, 1, N, E))
        # tile ra (B,T,N,E)
        node_emb_exp = tf.tile(node_emb_exp, [B, T, 1, 1])
        return node_emb_exp

    node_emb_broadcast = Lambda(
        broadcast_node_emb,
        name="broadcast_node_embedding"
    )([node_emb, counters_input])  # (B,T,N,E)

    # ------------------------------------------------------
    # 4. Ghép counter latent + node embedding  => (B,T,N,F)
    # ------------------------------------------------------
    node_feature = Concatenate(axis=-1, name="concat_counter_node")(
        [encoded_counters, node_emb_broadcast]
    )  # (B,T,N,H+E)
    feature_dim = counter_latent_dim + node_emb_dim  # F

    # ------------------------------------------------------
    # 5. Self-Attention theo node (trong cùng 1 time-step)
    #    - reshape (B,T,N,F) -> (B*T, N, F)
    #    - MultiHeadAttention
    #    - reshape lại (B,T,N,F)
    # ------------------------------------------------------
    node_mha = MultiHeadAttention(
        num_heads=1,       # đơn giản: 1 head, có thể tăng
        key_dim=feature_dim,
        name="node_mha"
    )

    def apply_node_attention(x):
        # x: (B,T,N,F)
        B = tf.shape(x)[0]
        T = tf.shape(x)[1]
        N = tf.shape(x)[2]
        F = tf.shape(x)[3]

        x_reshaped = tf.reshape(x, (B * T, N, F))  # (B*T, N, F)
        attn_out = node_mha(x_reshaped, x_reshaped)  # (B*T, N, F)
        out = tf.reshape(attn_out, (B, T, N, F))     # (B,T,N,F)
        return out

    attn_out = Lambda(apply_node_attention, name="node_attention")(node_feature)  # (B,T,N,F)

    # ------------------------------------------------------
    # 6. Pooling theo node: mean trên N
    #    (B,T,N,F) -> (B,T,F)
    # ------------------------------------------------------
    def pool_nodes_mean(x):
        # x: (B,T,N,F)
        B = tf.shape(x)[0]
        T = tf.shape(x)[1]
        N = tf.shape(x)[2]
        F = tf.shape(x)[3]
        x_reshaped = tf.reshape(x, (B * T, N, F))       # (B*T,N,F)
        pooled = tf.reduce_mean(x_reshaped, axis=1)     # (B*T,F)
        return tf.reshape(pooled, (B, T, F))            # (B,T,F)

    temporal_features = Lambda(
        pool_nodes_mean,
        name="node_mean_pool"
    )(attn_out)  # (B,T,F)

    # ------------------------------------------------------
    # 7. LSTM theo thời gian (B,T,F) -> (B, hidden)
    # ------------------------------------------------------
    lstm_out = LSTM(
        lstm_units,
        return_sequences=False,
        name="temporal_lstm"
    )(temporal_features)   # (B, lstm_units)

    lstm_out = Dropout(dropout_rate, name="dropout")(lstm_out)

    # ------------------------------------------------------
    # 8. Dense -> (B, N*K) rồi reshape về (B, N, K)
    # ------------------------------------------------------
    hidden = Dense(64, activation="relu", name="dense_hidden")(lstm_out)

    out_flat = Dense(
        n_nodes * n_counters_target,
        activation="linear",
        name="dense_output_flat"
    )(hidden)                         # (B, N*K)

    out = Reshape(
        (n_nodes, n_counters_target),
        name="output_reshape"
    )(out_flat)                       # (B, N, K)

    model = Model(
        inputs=[counters_input, node_ids_input],
        outputs=out,
        name="LSTM_NodeCounter_Model"
    )

    model.compile(optimizer="adam", loss="mse")
    return model




In [6]:
n_past = 24               # số time steps quá khứ
n_nodes = 6               # số node hiện có trong dữ liệu
n_counters_total = 20     # tổng số counter mỗi node
n_counters_target = 3     # số counter muốn dự đoán cho mỗi node
n_nodes_total = 100       # tổng số node_id có thể có trong hệ thống (embedding)

model = build_lstm_node_counter_model(
    n_past=n_past,
    n_nodes=n_nodes,
    n_counters_total=n_counters_total,
    n_counters_target=n_counters_target,
    n_nodes_total=n_nodes_total,
    counter_latent_dim=8,
    node_emb_dim=16,
    lstm_units=64
)

model.summary()




NotImplementedError: Exception encountered when calling Lambda.call().

[1mWe could not automatically infer the shape of the Lambda's output. Please specify the `output_shape` argument for this Lambda layer.[0m

Arguments received by Lambda.call():
  • args=(['<KerasTensor shape=(None, 6, 16), dtype=float32, sparse=False, ragged=False, name=keras_tensor_1>', '<KerasTensor shape=(None, 24, 6, 20), dtype=float32, sparse=False, ragged=False, name=counters_input>'],)
  • kwargs={'mask': ['None', 'None']}

In [None]:
Y_pred = model.predict([X_test, node_ids])  # (B_test, n_nodes, n_counters_target)