In [5]:
import pandas as pd
import numpy as np
import random

# ----------------------------
#  기본 설정
# ----------------------------
row_count = 150

# timestamp 생성 (기존 DF 패턴과 동일하게 바꿀 수 있음)
timestamps = pd.date_range(start="2025-05-16 08:00:00", periods=row_count, freq="5min")

module_id = ["moduleE"] * row_count

# ----------------------------
# 정상 범위 생성 함수
# ----------------------------
def normal_value(low, high):
    return round(np.random.uniform(low, high), 3)

# ----------------------------
# 기본 정상 데이터 생성
# ----------------------------
df_e = pd.DataFrame({
    "timestamp": timestamps,
    "module_id": module_id,
    "cell_voltage_v": [normal_value(3.4, 4.1) for _ in range(row_count)],
    "cell_temperature_c": [normal_value(22, 29) for _ in range(row_count)],
    "module_current_a": [normal_value(-14, 14) for _ in range(row_count)],
    "module_power_kw": [normal_value(0.95, 1.45) for _ in range(row_count)],
    "converter_command_pct": [normal_value(35, 95) for _ in range(row_count)],
    "soc_pct": [normal_value(35, 80) for _ in range(row_count)],
    "soh_pct": [normal_value(80, 100) for _ in range(row_count)],
    "anomaly_score_pct": [0 for _ in range(row_count)],
    "diagnostic_flag": [0 for _ in range(row_count)],
    "latency_ms": [random.randint(15, 200) for _ in range(row_count)]
})

# ----------------------------
# 이상치 삽입 구간 랜덤 선택
# ----------------------------
abnormal_indexes = random.sample(range(row_count), 35)

for idx in abnormal_indexes:

    case = random.choice([1,2,3,4,5,6,7])

    if case == 1:
        df_e.loc[idx, "cell_temperature_c"] = normal_value(31, 45)

    elif case == 2:
        df_e.loc[idx, "soc_pct"] = random.choice([normal_value(5, 20), normal_value(90, 99)])

    elif case == 3:
        df_e.loc[idx, "module_current_a"] = normal_value(23, 40)

    elif case == 4:
        # 연속 5개 이상 power <= 0.83
        start = max(0, idx - 4)
        df_e.loc[start:start+4, "module_power_kw"] = [normal_value(0.3, 0.8) for _ in range(5)]

    elif case == 5:
        df_e.loc[idx:idx+3, "module_power_kw"] = [normal_value(1.55, 2.2) for _ in range(4)]

    elif case == 6:
        df_e.loc[idx, "module_power_kw"] = random.choice([normal_value(0.05, 0.2), normal_value(2.8, 4.5)])

    elif case == 7:
        sign = random.choice([-1, 0, 1])
        df_e.loc[idx: idx+9, "module_current_a"] = [sign * normal_value(10, 20) for _ in range(10)]

# anomaly 표시
df_e.loc[abnormal_indexes, "diagnostic_flag"] = 1
df_e.loc[abnormal_indexes, "anomaly_score_pct"] = normal_value(50, 99)

df_e.reset_index(drop=True, inplace=True)

df_e.head()


df_e.to_csv("./Data/moduleE_simulated.csv", index=False)


  df_e.loc[abnormal_indexes, "anomaly_score_pct"] = normal_value(50, 99)
