In [None]:
import pandas as pd
import numpy as np
from glob import glob
import torch
from sklearn.cluster import KMeans
import os
from tqdm import tqdm
import ast

# 파일 경로 설정
test_path = './processed_test/'
model_path = './new_model_save/time_series_cluster_models.pth'
error_path = './result/reconstruction_errors.csv'
output_path = './cluster_submission_2.csv'

In [None]:
# 모델 클래스 정의
class TimeSeriesSensorAutoencoder(torch.nn.Module):
    def __init__(self, input_dim, latent_dim=32, hidden_dim=256):
        super(TimeSeriesSensorAutoencoder, self).__init__()
        self.encoder_lstm = torch.nn.LSTM(
            input_size=input_dim,
            hidden_size=hidden_dim,
            num_layers=2,
            batch_first=True,
            dropout=0.3,
            bidirectional=True
        )
        self.encoder_fc = torch.nn.Sequential(
            torch.nn.Linear(hidden_dim * 2, latent_dim),
            torch.nn.ReLU()
        )
        self.decoder_fc = torch.nn.Sequential(
            torch.nn.Linear(latent_dim, hidden_dim),
            torch.nn.ReLU()
        )
        self.decoder_lstm = torch.nn.LSTM(
            input_size=hidden_dim,
            hidden_size=hidden_dim,
            num_layers=2,
            batch_first=True,
            dropout=0.3,
            bidirectional=True
        )
        self.output_layer = torch.nn.Linear(hidden_dim * 2, input_dim)

    def forward(self, x):
        lstm_out, _ = self.encoder_lstm(x)
        z = self.encoder_fc(lstm_out[:, -1, :])
        decoded = self.decoder_fc(z).unsqueeze(1).repeat(1, x.size(1), 1)
        decoded, _ = self.decoder_lstm(decoded)
        return z, self.output_layer(decoded[:, -1, :])

# 데이터 처리 함수
def get_cluster_for_p_sensor(df, p_sensor, kmeans):
    p_mean = np.mean(df[p_sensor].values)
    return kmeans.predict([[p_mean]])[0]

def process_test_files(kmeans):
    file_list = sorted(glob(os.path.join(test_path, '*.csv')))
    cluster_info = {}
    for file_path in tqdm(file_list, desc="Processing files"):
        df = pd.read_csv(file_path)
        file_name = os.path.basename(file_path).split('.')[0]
        p_sensors = [col for col in df.columns if col.startswith('P') and not col.endswith('_flag')]
        clusters = [get_cluster_for_p_sensor(df, p_sensor, kmeans) for p_sensor in p_sensors]
        cluster_info[file_name] = clusters
    return cluster_info

def create_submission_with_threshold(errors_df, cluster_info, thresholds):
    results = []
    for _, row in errors_df.iterrows():
        file_id = row['ID']
        errors = ast.literal_eval(row['error_list'])
        clusters = cluster_info[file_id]

        # 파일별 활성화된 클러스터 확인
        active_clusters = thresholds.get(file_id[:6], {})

        anomaly_flags = [
            1 if cluster in active_clusters and error > active_clusters.get(cluster, float('inf')) else 0
            for error, cluster in zip(errors, clusters)
        ]
        results.append({'ID': file_id, 'flag_list': str(anomaly_flags)})
    return pd.DataFrame(results)


In [None]:
# Main
checkpoint = torch.load(model_path)
kmeans = checkpoint['kmeans']
cluster_stats = checkpoint['cluster_stats']
errors_df = pd.read_csv(error_path)

cluster_info = process_test_files(kmeans)

# 사용자 정의 Threshold 설정
thresholds = {
    'TEST_C': {3: 7.25, 5: 10},  # TEST_C 클러스터별 Threshold
    'TEST_D': {2: 6, 4: 15, 5: 3.75}   # TEST_D 클러스터별 Threshold
}

# 최종 제출 파일 생성
final_submission = create_submission_with_threshold(errors_df, cluster_info, thresholds)
final_submission.to_csv(output_path, index=False)
print(f"Results saved to {output_path}")