In [3]:
import numpy as np
import pandas as pd

# 파라미터 설정
num_sensors = 12       # 센서 개수
t_end = 10000          # 총 시간(초)
time = np.arange(0, t_end + 1)  # 0~480초
change_point = 2000      # 변화점 시각(초)
peak_time = 240        # 피크 시각(초)
noise_std = 0.1        # 노이즈 표준편차

# 시드 고정 (재현성)
np.random.seed(42)

# 데이터 생성
sensor_data = {}
for i in range(num_sensors):
    amp = np.random.uniform(1.0, 2.0)  # 센서별 피크 진폭
    signal = np.zeros_like(time, dtype=float)

    # 램프업 구간
    ramp_mask = (time >= change_point) & (time <= peak_time)
    signal[ramp_mask] = amp * (time[ramp_mask] - change_point) / (peak_time - change_point)

    # 디케이 구간
    decay_mask = time > peak_time
    signal[decay_mask] = amp * (1 - (time[decay_mask] - peak_time) / (t_end - peak_time))
    signal[decay_mask] = np.clip(signal[decay_mask], 0, None)

    # 노이즈 추가 및 음수 클리핑
    noise = np.random.normal(scale=noise_std, size=signal.shape)
    values = np.clip(signal + noise, 0, None)

    sensor_data[f'sensor_{i+1}'] = values

# 데이터프레임 생성
df = pd.DataFrame(sensor_data, index=time)
df.index.name = 'time(s)'

# CSV로 저장
df.to_csv('E:\INHA\BS\gas_system\synthetic_sensor_data.csv', index=True)

print("Saved synthetic_sensor_data.csv")


Saved synthetic_sensor_data.csv


In [5]:
df.shape

(10001, 12)

# MiniROCKET Trainer

In [None]:
# train_minirocket.py

import os
import numpy as np
import pandas as pd
from joblib import dump
from core.dataset import create_training_windows
from models.load_base_models import minirocket

def load_data_long(file_path: str):
    """
    time,Variable,Value,Label 형태의 long-format CSV를
    (n_samples, n_features) 배열과 라벨 벡터로 변환합니다.
    """
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    pivot = df.pivot_table(
        index=["time", "Label"],
        columns="Variable",
        values="Value",
        aggfunc="first"
    ).sort_index()
    X_raw = pivot.to_numpy(dtype=float)
    y_raw = np.array([lab for (_, lab) in pivot.index], dtype=int)
    return X_raw, y_raw

def main():
    # 1) 데이터 로드
    data_path = "data/Et_H_CO.csv"
    X_raw, y_raw = load_data_long(data_path)

    # 2) 슬라이딩 윈도우 생성 (window_size=5, step=1)
    window_size = 5
    X_train, y_train = create_training_windows(X_raw, y_raw, window_size=window_size)

    # 3) MiniRocketClassifier 학습
    print("▶ MiniRocketClassifier 학습 시작...")
    clf = minirocket()
    clf.fit(X_train, y_train)

    # 4) 모델 저장
    os.makedirs("models", exist_ok=True)
    dump(clf, "models/minirocket/minirocket.pkl")
    print("✅ 모델 저장 완료: models/minirocket/minirocket.pkl")

if __name__ == "__main__":
    main()


  from .autonotebook import tqdm as notebook_tqdm


▶ MiniRocketClassifier 학습 시작...
✅ 모델 저장 완료: models/minirocket/minirocket.pkl


In [2]:
def load_data_long(file_path: str):
    df = pd.read_csv(file_path)
    df.columns = df.columns.str.strip()
    pivot = df.pivot_table(
        index=["time", "Label"],
        columns="Variable",
        values="Value",
        aggfunc="first"
    ).sort_index()
    X_raw = pivot.to_numpy(dtype=float)
    y_raw = np.array([lab for (_, lab) in pivot.index], dtype=int)
    classes = np.unique(y_raw)
    label_mapping = {c: c for c in classes}
    return X_raw, y_raw, label_mapping

In [3]:
data_path = "data/Et_H_CO.csv"
X_raw, y_raw, label_mapping = load_data_long(data_path)

In [4]:
X_raw

array([[629., 719., 331., ..., 572., 566., 700.],
       [644., 737., 334., ..., 548., 584., 727.],
       [665., 759., 342., ..., 598., 593., 739.],
       ...,
       [721., 815., 359., ..., 705., 675., 853.],
       [730., 829., 359., ..., 656., 686., 865.],
       [742., 843., 363., ..., 707., 694., 872.]])

In [6]:
X_raw.shape

(11880, 8)

In [5]:
y_raw

array([1, 2, 3, ..., 2, 3, 4])

In [15]:
import joblib
scaler = joblib.load(r"E:\INHA\BS\sensor\gpsig\py\std_scaler.pkl")

KeyError: 243

In [19]:
# scripts/train_gpsig_scaler.py

import os, glob, numpy as np, pandas as pd
from sklearn.preprocessing import StandardScaler
import joblib

# 원시 CSV들이 모여 있는 폴더
DATA_FOLDER = r"E:\INHA\BS\gas_system\data"
LEN_EX      = 297
EXPECTED_SENSORS = 8

def load_one_csv(fpath):
    df = pd.read_csv(fpath)
    # drop 가능한 컬럼만
    df = df.drop(
        columns=[c for c in ["Time(s)", "Temperature(oC)", "Relative_Humidity(%)"]
                 if c in df.columns],
        errors='ignore'
    )
    df = df.groupby(df.index // 10).mean().iloc[-LEN_EX:, :]
    return df.values

def main():
    # 1) 파일 패턴을 제한
    pattern = os.path.join(DATA_FOLDER, "[0-9][0-9][0-9]_*.csv")
    files = sorted(glob.glob(pattern))
    if not files:
        raise RuntimeError(f"No raw CSVs found in {DATA_FOLDER}")

    seqs = []
    for f in files:
        arr = load_one_csv(f)  # (297, sensor_count)
        # sensor_count 확인
        if arr.shape[1] != EXPECTED_SENSORS:
            print(f"Skipping {os.path.basename(f)}: sensors={arr.shape[1]}")
            continue
        time = np.linspace(0, 1, LEN_EX)[:, None]
        seq = np.hstack([time, arr])  # (297, EXPECTED_SENSORS+1)
        seqs.append(seq)

    # 2) 한 번 더 검증
    if not seqs:
        raise RuntimeError("No sequences collected after filtering.")

    # 3) 스케일러 학습 및 저장
    all_samples = np.vstack(seqs)  # shape (n_files*297, 9)
    scaler = StandardScaler().fit(all_samples)
    os.makedirs("models", exist_ok=True)
    joblib.dump(scaler, "models/gpsig_scaler.pkl")
    print("Saved new scaler → models/gpsig_scaler.pkl")

if __name__ == "__main__":
    main()


Saved new scaler → models/gpsig_scaler.pkl


In [4]:
import joblib
SCALER_PATH = "models/gpsig_scaler.pkl"
scaler = joblib.load(SCALER_PATH)

In [5]:
scaler

StandardScaler()

In [1]:
# split_by_class.py

import os
import pandas as pd

def split_by_class(long_csv, out_dir="data"):
    # 1) 원본 긴 형식 CSV 로드
    df = pd.read_csv(long_csv)
    df.columns = df.columns.str.strip()
    
    # 2) 레이블별 그룹핑 → 피벗 → 파일 저장
    os.makedirs(out_dir, exist_ok=True)
    for label, grp in df.groupby("Label"):
        pivot = grp.pivot(index="time", columns="Variable", values="Value")
        pivot = pivot.reset_index()
        fname = os.path.join(out_dir, f"class_{label}.csv")
        pivot.to_csv(fname, index=False)
        print(f"Saved class {label} → {fname}")

if __name__ == "__main__":
    split_by_class("data/Et_H_CO.csv")


Saved class 1 → data\class_1.csv
Saved class 2 → data\class_2.csv
Saved class 3 → data\class_3.csv
Saved class 4 → data\class_4.csv


In [8]:
import os
from pathlib import Path
import pandas as pd

DATA_DIR = Path("wavelet/data_250418")
OUT_DIR  = DATA_DIR / "expanded"
OUT_DIR.mkdir(exist_ok=True)

for csv_path in DATA_DIR.glob("*.csv"):
    df = pd.read_csv(csv_path)

    # 마지막 컬럼 이름과 값을 가져옵니다.
    last_col_name = df.columns[-1]
    last = df[last_col_name]

    # 4번 복사해서 새로운 컬럼으로 붙이기
    for i in range(1, 5):
        df[f"{last_col_name}_dup{i}"] = last

    # 새 이름: 원본 파일명에 _dup 붙임
    new_name = csv_path.stem + "_dup" + csv_path.suffix
    out_path = OUT_DIR / new_name

    df.to_csv(out_path, index=False)
    print(f"Saved expanded file to {out_path}")


Saved expanded file to wavelet\data_250418\expanded\000_Et_H_CO_n_dup.csv
Saved expanded file to wavelet\data_250418\expanded\002_Et_H_CO_H_dup.csv
Saved expanded file to wavelet\data_250418\expanded\008_Et_H_CO_L_dup.csv
Saved expanded file to wavelet\data_250418\expanded\028_Et_H_CO_M_dup.csv


In [7]:
import os
os.listdir('wavelet/data_250418')

['000_Et_H_CO_n.csv',
 '002_Et_H_CO_H.csv',
 '008_Et_H_CO_L.csv',
 '028_Et_H_CO_M.csv']