In [2]:
import os
import re
import io
import contextlib
import time
import glob
import copy
import torch
import random
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.metrics import accuracy_score, f1_score

try:
    from fvcore.nn import FlopCountAnalysis
    FVCORE_AVAILABLE = True
except ImportError:
    FVCORE_AVAILABLE = False

# ------------------------------------------------------------------------------
# 0. Utils & Setup
# ------------------------------------------------------------------------------
def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# ------------------------------------------------------------------------------
# 1. WISDMDataset
# ------------------------------------------------------------------------------
class WISDMDataset(Dataset):
    """
    단일 WISDM txt 형식:
    subject,activity,timestamp,x,y,z;
    예) 33,Jogging,49105962326000,-0.6946377,12.680544,0.50395286;
    """
    def __init__(self, file_path: str, window_size: int = 80, step_size: int = 40):
        super().__init__()
        self.file_path = file_path
        self.window_size = window_size
        self.step_size = step_size

        if not os.path.isfile(file_path):
            raise FileNotFoundError(f"WISDM txt file not found: {file_path}")

        df = self._load_file(file_path)
        self.X, self.y, self.subjects = self._create_windows(df)

        self.unique_subjects = sorted(np.unique(self.subjects))

        print("=" * 80)
        print("Loaded WISDM dataset (single txt)")
        print(f"  X shape       : {self.X.shape}  (N, T, C)")
        print(f"  y shape       : {self.y.shape}  (N,)")
        print(f"  subjects shape: {self.subjects.shape} (N,)")
        print(f"  unique subjects: {self.unique_subjects}")
        print("=" * 80)

    def _load_file(self, file_path: str) -> pd.DataFrame:
        """원본 txt 한 개를 통째로 읽어서 DataFrame으로 변환."""
        with open(file_path, "r") as f:
            lines = f.readlines()

        rows = []
        for line in lines:
            line = line.strip()
            if not line:
                continue
            # 끝 세미콜론 제거
            line = line.replace(";", "")
            parts = line.split(",")

            # subject, activity, timestamp, x, y, z → 6개 아니면 스킵
            if len(parts) != 6:
                continue

            subj, act, ts, x, y, z = parts

            # x,y,z 중 하나라도 비어있으면 스킵
            if x.strip() == "" or y.strip() == "" or z.strip() == "":
                continue

            rows.append([subj, act, ts, x, y, z])

        if not rows:
            raise ValueError(f"No valid rows parsed from file: {file_path}")

        df = pd.DataFrame(rows, columns=["subject", "activity", "timestamp", "x", "y", "z"])

        # 문자열 → NaN 처리 후 숫자로 변환
        df = df.replace(["", "NaN", "nan"], np.nan).dropna(subset=["subject", "x", "y", "z"])

        df["subject"] = pd.to_numeric(df["subject"], errors="coerce")
        df["x"] = pd.to_numeric(df["x"], errors="coerce")
        df["y"] = pd.to_numeric(df["y"], errors="coerce")
        df["z"] = pd.to_numeric(df["z"], errors="coerce")

        df = df.dropna(subset=["subject", "x", "y", "z"])

        if df.empty:
            raise ValueError("After cleaning, WISDM DataFrame is empty. Check file format.")

        df["subject"] = df["subject"].astype(int)

        # activity 문자열 → 정수 라벨
        df["activity_id"] = df["activity"].astype("category").cat.codes

        return df

    def _create_windows(self, df: pd.DataFrame):
        """
        subject 별로 나눠서 sliding window 생성.
        X: (N, T, 3), y: (N,), subjects: (N,)
        """
        X_list, y_list, s_list = [], [], []

        for subj_id in sorted(df["subject"].unique()):
            df_sub = df[df["subject"] == subj_id]

            # 필요하면 여기서 activity별로도 끊을 수 있음 (원하면 확장 가능)
            data = df_sub[["x", "y", "z"]].to_numpy(dtype=np.float32)      # (L, 3)
            labels = df_sub["activity_id"].to_numpy(dtype=np.int64)        # (L,)
            L = len(df_sub)

            start = 0
            while start + self.window_size <= L:
                end = start + self.window_size

                window_x = data[start:end]          # (T, 3)
                window_y = labels[end - 1]          # 마지막 타임스텝 라벨

                X_list.append(window_x.T)           # (3, T)
                y_list.append(window_y)
                s_list.append(subj_id)

                start += self.step_size

        if len(X_list) == 0:
            raise ValueError("[WISDMDataset] No windows created. Try smaller window_size or check data.")

        X = np.stack(X_list, axis=0).astype(np.float32)  # (N, 3, T)
        y = np.array(y_list, dtype=np.int64)
        s = np.array(s_list, dtype=np.int64)

        # (N, 3, T) → (N, T, 3)
        X = X.transpose(0, 2, 1)
        return X, y, s

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx: int):
        return (
            torch.FloatTensor(self.X[idx]),          # (T, 3)
            torch.LongTensor([self.y[idx]])[0],
            self.subjects[idx],
        )

# ------------------------------------------------------------------------------
# 2. Baseline Model Components
# ------------------------------------------------------------------------------
# ASF-DCL과 공정한 비교를 위해 동일한 Encoder 구조 사용
class LatentEncoder(nn.Module):
    def __init__(self, input_channels=9, latent_dim=64):
        super().__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, latent_dim, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(latent_dim)

    def forward(self, x):
        x = x.transpose(1, 2)
        h = F.relu(self.bn1(self.conv1(x)))
        h = F.relu(self.bn2(self.conv2(h)))
        s = F.relu(self.bn3(self.conv3(h)))
        s = s.transpose(1, 2)
        return s

# Baseline Model: Encoder + Global Average Pooling + Classifier
class StandardCNN(nn.Module):
    def __init__(self, input_channels=9, latent_dim=64, num_classes=6, hidden_dim=64):
        super().__init__()
        self.latent_encoder = LatentEncoder(input_channels, latent_dim)

        # Flow 모듈 없이 바로 분류 (일반적인 CNN 구조)
        self.classifier = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        # 1. Encode: [Batch, Time, Dim]
        s = self.latent_encoder(x)

        # 2. Global Average Pooling (Time 축 평균)
        s_pool = torch.mean(s, dim=1)

        # 3. Classify
        logits = self.classifier(s_pool)
        return logits

# ------------------------------------------------------------------------------
# 3. Train & Evaluate Functions (Baseline용)
# ------------------------------------------------------------------------------
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for batch in dataloader:
        x = batch[0].to(device)
        y = batch[1].to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = F.cross_entropy(logits, y, label_smoothing=0.05)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(y.detach().cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    f1 = f1_score(all_labels, all_preds, average='macro')
    return avg_loss, f1

def evaluate(model, dataloader, device):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            x = batch[0].to(device)
            y = batch[1].to(device)

            logits = model(x)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.detach().cpu().numpy())
            all_labels.extend(y.detach().cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    return acc, f1

def measure_efficiency(model, input_shape=(1, 128, 9), warmup=10, iters=100):
    """
    모델의 파라미터 수, FLOPs, 추론 속도를 측정합니다.
    CPU 환경에서 측정하여 하드웨어 의존성을 줄인 벤치마크를 수행합니다.
    """
    measure_device = torch.device('cpu')
    model_cpu = copy.deepcopy(model).to(measure_device)
    model_cpu.eval()

    # 더미 입력 데이터 생성 (Batch=1)
    real_input_shape = list(input_shape)
    real_input_shape[0] = 1
    sample_input = torch.randn(tuple(real_input_shape)).to(measure_device)

    # 1) 파라미터 수
    total_params = sum(p.numel() for p in model_cpu.parameters())
    params_m = total_params / 1e6  # million params

    # 2) FLOPs 측정 (fvcore 사용 가능할 때만)
    flops_m = None
    if FVCORE_AVAILABLE:
        try:
            with torch.no_grad():
                # 불필요한 출력 억제
                fake_out = io.StringIO()
                fake_err = io.StringIO()
                with contextlib.redirect_stdout(fake_out), contextlib.redirect_stderr(fake_err):
                    flops = FlopCountAnalysis(model_cpu, (sample_input,))
                    total_flops = flops.total()
                flops_m = total_flops / 1e6  # to millions
        except Exception as e:
            print(f"FLOPs calculation failed: {e}")
            flops_m = None

    # 3) 추론 시간 측정
    with torch.no_grad():
        # Warmup
        for _ in range(warmup):
            _ = model_cpu(sample_input)

        start = time.time()
        for _ in range(iters):
            _ = model_cpu(sample_input)
        end = time.time()

    avg_sec = (end - start) / iters
    inference_ms = avg_sec * 1000.0

    del model_cpu

    return {
        "params_m": params_m,
        "flops_m": flops_m,
        "inference_ms": inference_ms,
    }
# ------------------------------------------------------------------------------
# 4. Main Execution
# ------------------------------------------------------------------------------
def main():
    # 설정 (기존과 동일하게 맞춤)
    SEED = 42
    set_seed(SEED)
    DATA_PATH = '/content/drive/MyDrive/Colab Notebooks/HAR_data/WISDM_ar_v1.1_raw.txt'
    BATCH_SIZE = 64
    NUM_EPOCHS = 50
    LEARNING_RATE = 0.001
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    print(f"Running Standard 1D-CNN Baseline on {DEVICE}")

    # 데이터 로드
    full_dataset = WISDMDataset(DATA_PATH, window_size=80, step_size=40)

    total_size = len(full_dataset)
    train_size = int(total_size * 0.8)
    test_size = total_size - train_size

    train_dataset, test_dataset = random_split(
        full_dataset, [train_size, test_size],
        generator=torch.Generator().manual_seed(SEED)
    )

    g = torch.Generator()
    g.manual_seed(SEED)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True,
                              worker_init_fn=seed_worker, generator=g)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=False,
                             worker_init_fn=seed_worker, generator=g)

    # 모델 초기화
    model = StandardCNN(input_channels=3, latent_dim=64, num_classes=6).to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=NUM_EPOCHS
    )

    best_f1 = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    # --- 학습 루프 ---
    print("\nStarting Training (Standard CNN)...")
    for epoch in range(NUM_EPOCHS):
        t_loss, t_f1 = train_epoch(model, train_loader, optimizer, DEVICE)

        v_acc, v_f1 = evaluate(model, test_loader, DEVICE)

        if v_f1 > best_f1:
            best_f1 = v_f1
            best_model_wts = copy.deepcopy(model.state_dict())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] Train F1: {t_f1:.4f} | Test F1: {v_f1:.4f} (Best: {best_f1:.4f})")

    metrics = measure_efficiency(model, input_shape=(1, 128, 3), warmup=10, iters=100)
    print(f"Inference Time   : {metrics['inference_ms']:.4f} ms / sample")

if __name__ == "__main__":
    main()

Running Standard 1D-CNN Baseline on cuda
Loaded WISDM dataset (single txt)
  X shape       : (27108, 80, 3)  (N, T, C)
  y shape       : (27108,)  (N,)
  subjects shape: (27108,) (N,)
  unique subjects: [np.int64(1), np.int64(2), np.int64(3), np.int64(4), np.int64(5), np.int64(6), np.int64(7), np.int64(8), np.int64(9), np.int64(10), np.int64(11), np.int64(12), np.int64(13), np.int64(14), np.int64(15), np.int64(16), np.int64(17), np.int64(18), np.int64(19), np.int64(20), np.int64(21), np.int64(22), np.int64(23), np.int64(24), np.int64(25), np.int64(26), np.int64(27), np.int64(28), np.int64(29), np.int64(30), np.int64(31), np.int64(32), np.int64(33), np.int64(34), np.int64(35), np.int64(36)]

Starting Training (Standard CNN)...
Epoch [10/50] Train F1: 0.9619 | Test F1: 0.9625 (Best: 0.9625)
Epoch [20/50] Train F1: 0.9720 | Test F1: 0.9658 (Best: 0.9690)
Epoch [30/50] Train F1: 0.9750 | Test F1: 0.9687 (Best: 0.9707)
Epoch [40/50] Train F1: 0.9794 | Test F1: 0.9552 (Best: 0.9730)
Epoch [5