In [5]:
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler
min_max_scaler = preprocessing.MinMaxScaler()

num_cols = ['meter_reading'] #,"hour" 
scaler = MinMaxScaler()
df[num_cols] = scaler.fit_transform(df[num_cols])

In [6]:
import numpy as np

def sliding_windows_with_labels(df, window_size):
    labels = df["anomaly"].values
    data = df.drop(["anomaly"], axis=1).values

    X_windows = []
    y_labels = []

    for i in range(len(df) - window_size + 1):
        window = data[i:i+window_size]
        window_label = labels[i:i+window_size]

        X_windows.append(window)
        y_labels.append(float(any(window_label)))  # 윈도우에 이상치 하나라도 있으면 1

    return np.array(X_windows), np.array(y_labels)

In [7]:
window_size = 24
X_windows, y_labels = sliding_windows_with_labels(df, window_size)

print("X_windows shape:", X_windows.shape)  # (윈도우 개수, window_size, 특성 수)
print("y_labels shape:", y_labels.shape)    # (윈도우 개수,)

X_windows shape: (8735, 24, 1)
y_labels shape: (8735,)


In [8]:
from sklearn.utils import shuffle

X_shuffled, y_shuffled = shuffle(X_windows, y_labels, random_state=42)

In [9]:
import numpy as np

# 1. 이상치 여부에 따라 정렬 (label==0 먼저, label==1 나중)
normal_mask = y_shuffled == 0
anomaly_mask = y_shuffled == 1

X_normal = X_shuffled[normal_mask]
y_normal = y_shuffled[normal_mask]

X_anomaly = X_shuffled[anomaly_mask]
y_anomaly = y_shuffled[anomaly_mask]

# 다시 합치기: 정상 → 이상 순서
X_sorted = np.concatenate([X_normal, X_anomaly], axis=0)
y_sorted = np.concatenate([y_normal, y_anomaly], axis=0)

# 2. 6:2:2로 분할
total = len(X_sorted)
n_train = int(total * 0.6)
n_val = int(total * 0.2)

X_train = X_sorted[:n_train]
y_train = y_sorted[:n_train]

X_val = X_sorted[n_train:n_train + n_val]
y_val = y_sorted[n_train:n_train + n_val]

X_test = X_sorted[n_train + n_val:]
y_test = y_sorted[n_train + n_val:]

In [10]:
print("X_train shape:", X_train.shape)
print("X_val shape:", X_val.shape)
print("X_test shape:", X_test.shape)

print("y_train 이상치 비율:", y_train.mean())
print("y_val 이상치 비율:", y_val.mean())
print("y_test 이상치 비율:", y_test.mean())

if y_val.sum() == 0:
    print("✅ val에는 이상치가 없습니다.")
else:
    print("❌ val에 이상치가 있습니다.")

X_train shape: (5241, 24, 1)
X_val shape: (1747, 24, 1)
X_test shape: (1747, 24, 1)
y_train 이상치 비율: 0.0
y_val 이상치 비율: 0.0
y_test 이상치 비율: 0.6977676016027475
✅ val에는 이상치가 없습니다.


In [15]:
windows_normal_train = X_train        # 정상 train 윈도우
windows_normal_val = X_val            # 정상 validation 윈도우
windows_attack = X_test               # test: 정상 + 이상 포함
labels = y_test                       # test 윈도우별 이상 여부

In [16]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.utils.data as data_utils
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import f1_score
from sklearn.model_selection import train_test_split
import random
from sklearn.metrics import f1_score, recall_score, precision_score


from utils_f1 import *
from usad_lead_f1 import *


random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [24]:
import torch.utils.data as data_utils

BATCH_SIZE = 64
N_EPOCHS = 1000
hidden_size = 0.5


w_size=windows_normal_train.shape[1]*windows_normal_train.shape[2]
z_size= int(windows_normal_train.shape[1]*hidden_size)

# windows_normal_train = windows_normal[:int(np.floor(.8 *  windows_normal.shape[0]))]
# windows_normal_val = windows_normal[int(np.floor(.8 *  windows_normal.shape[0])):int(np.floor(windows_normal.shape[0]))]

train_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_train).float().view(([windows_normal_train.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

val_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_normal_val).float().view(([windows_normal_val.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)

test_loader = torch.utils.data.DataLoader(data_utils.TensorDataset(
    torch.from_numpy(windows_attack).float().view(([windows_attack.shape[0],w_size]))
) , batch_size=BATCH_SIZE, shuffle=False, num_workers=0)



model = UsadModel(w_size, z_size)
model = to_device(model, device)

In [25]:
model

UsadModel(
  (encoder): Encoder(
    (linear1): Linear(in_features=24, out_features=20, bias=True)
    (linear2): Linear(in_features=20, out_features=17, bias=True)
    (linear3): Linear(in_features=17, out_features=12, bias=True)
    (relu): ReLU(inplace=True)
  )
  (decoder1): Decoder(
    (linear1): Linear(in_features=12, out_features=17, bias=True)
    (linear2): Linear(in_features=17, out_features=20, bias=True)
    (linear3): Linear(in_features=20, out_features=24, bias=True)
    (relu): ReLU(inplace=True)
    (sigmoid): Sigmoid()
  )
  (decoder2): Decoder(
    (linear1): Linear(in_features=12, out_features=17, bias=True)
    (linear2): Linear(in_features=17, out_features=20, bias=True)
    (linear3): Linear(in_features=20, out_features=24, bias=True)
    (relu): ReLU(inplace=True)
    (sigmoid): Sigmoid()
  )
)

In [None]:
history = training(N_EPOCHS, model, train_loader, val_loader, test_loader, labels, window_size)

Epoch [0], val_loss1: 0.0514, val_loss2: 0.0529, ROC AUC: 0.8094, F1: 0.8097, Precision: 0.8767, Recall: 0.7523
Epoch [1], val_loss1: 0.0372, val_loss2: -0.0019, ROC AUC: 0.7665, F1: 0.7518, Precision: 0.8355, Recall: 0.6833
Epoch [2], val_loss1: 0.0420, val_loss2: -0.0169, ROC AUC: 0.6591, F1: 0.6620, Precision: 0.7653, Recall: 0.5833
Epoch [3], val_loss1: 0.0406, val_loss2: -0.0227, ROC AUC: 0.6959, F1: 0.7100, Precision: 0.8025, Recall: 0.6366
Epoch [4], val_loss1: 0.0542, val_loss2: -0.0378, ROC AUC: 0.2739, F1: 0.4156, Precision: 0.5415, Recall: 0.3372
Epoch [5], val_loss1: 0.0469, val_loss2: -0.0335, ROC AUC: 0.8228, F1: 0.8315, Precision: 0.8912, Recall: 0.7793
Epoch [6], val_loss1: 0.0468, val_loss2: -0.0357, ROC AUC: 0.8333, F1: 0.8434, Precision: 0.9004, Recall: 0.7933
Epoch [7], val_loss1: 0.0468, val_loss2: -0.0375, ROC AUC: 0.8362, F1: 0.8449, Precision: 0.9006, Recall: 0.7957
Epoch [8], val_loss1: 0.0471, val_loss2: -0.0391, ROC AUC: 0.8348, F1: 0.8436, Precision: 0.8996,