In [1]:
import os
import io
import contextlib
import time
import copy
import torch
import random
import numpy as np
import torch.nn as nn
import pandas as pd
from pathlib import Path
from sklearn.preprocessing import StandardScaler
from torch.utils.data import random_split
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix

try:
    from fvcore.nn import FlopCountAnalysis
    FVCORE_AVAILABLE = True
except ImportError:
    FVCORE_AVAILABLE = False

def set_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    os.environ['PYTHONHASHSEED'] = str(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# ------------------------------------------------------------------------------
# 1. motion-sense Dataset
# ------------------------------------------------------------------------------
class MotionSenseDataset(Dataset):
    """
    Motion-Sense Dataset Loader
    - 입력: (T, 6) -> (userAcceleration.x, y, z, rotationRate.x, y, z)
    - 라벨: 폴더명(dws, ups, wlk, jog, sit, std)을 파싱하여 인덱스로 매핑
    """

    def __init__(
        self,
        root_dir,
        window_size=128,
        step_size=64,
        normalize=True,
        target_subjects=None,
        scaler=None
    ):
        self.root_dir = Path(root_dir)
        self.window_size = window_size
        self.step_size = step_size
        self.normalize = normalize

        # Motion-Sense의 데이터 폴더 경로 (이미지 기준 A_DeviceMotion_data 폴더)
        self.data_dir = self.root_dir / "A_DeviceMotion_data"

        # 1) 데이터 로드 및 통합
        df_all = self._load_all_data()

        if target_subjects is not None:
            df_all = df_all[df_all['subject_id'].isin(target_subjects)].copy()
            print(f"Dataset initialized with subjects: {target_subjects}")
            print(f"Total rows after filtering: {len(df_all)}")

        # 2) 라벨 -> 인덱스 매핑
        # Motion-Sense의 6개 클래스: dws, ups, wlk, jog, sit, std
        activities = ['dws', 'jog', 'sit', 'std', 'ups', 'wlk']
        self.label2idx = {label: i for i, label in enumerate(activities)}
        self.idx2label = {i: label for label, i in self.label2idx.items()}
        df_all["label_idx"] = df_all["activity"].map(self.label2idx)

        # 3) 정규화 (StandardScaler)
        # MotionSense 컬럼: userAcceleration.x/y/z (acc), rotationRate.x/y/z (gyro)
        feat_cols = [
            "userAcceleration.x", "userAcceleration.y", "userAcceleration.z",
            "rotationRate.x", "rotationRate.y", "rotationRate.z"
        ]
        feats = df_all[feat_cols].values.astype(np.float32)

        if self.normalize:
            if scaler is None:
                # 스케일러가 없으면(Train용) -> 새로 맞춤(fit)
                self.scaler = StandardScaler()
                feats = self.scaler.fit_transform(feats)
            else:
                # 스케일러가 있으면(Test용) -> 기존 것 사용(transform)
                self.scaler = scaler
                feats = self.scaler.transform(feats)
        else:
            self.scaler = None

        df_all[feat_cols] = feats

        # 4) 슬라이딩 윈도우 생성 (Subject, Activity, Trial 별로 그룹화)
        X_list = []
        y_list = []

        # trial_id는 각 csv 파일을 구분하기 위해 _load_all_data에서 생성해야 함
        for _, g in df_all.groupby(["subject_id", "activity", "trial_id"]):
            g = g.sort_values("timestamp_idx").reset_index(drop=True)

            data = g[feat_cols].values
            labels = g["label_idx"].values
            n = len(g)

            if n < window_size:
                continue

            for start in range(0, n - window_size + 1, step_size):
                end = start + window_size
                w_data = data[start:end]
                w_labels = labels[start:end]

                # 윈도우 라벨 (Mode)
                majority_label = np.bincount(w_labels).argmax()

                X_list.append(w_data.astype(np.float32))
                y_list.append(majority_label)

        self.X = np.stack(X_list) if len(X_list) > 0 else np.zeros((0, window_size, 6), dtype=np.float32)
        self.y = np.array(y_list, dtype=np.int64)

        print(f"[MotionSenseDataset] windows: {len(self.X)}, classes: {len(self.label2idx)}")
        print(f"Classes map: {self.label2idx}")

    def _load_all_data(self):
        """
        A_DeviceMotion_data 내부의 모든 폴더를 순회하며 CSV 로드
        """
        all_dfs = []

        # data_dir 내부의 폴더들 (예: dws_1, jog_9 ...)
        if not self.data_dir.exists():
             raise FileNotFoundError(f"Directory not found: {self.data_dir}")

        for folder in os.listdir(self.data_dir):
            folder_path = self.data_dir / folder
            if not folder_path.is_dir():
                continue

            # 폴더명 파싱 (예: dws_1 -> activity=dws, subject=1)
            parts = folder.split('_')
            activity_label = parts[0]
            subject_id = parts[1]

            # 폴더 내 csv 파일 읽기 (보통 sub_1.csv 같은 형태)
            for csv_file in os.listdir(folder_path):
                if not csv_file.endswith(".csv"):
                    continue

                file_path = folder_path / csv_file
                df = pd.read_csv(file_path)

                # Unnamed: 0 컬럼이 타임스탬프 역할(인덱스)
                if "Unnamed: 0" in df.columns:
                    df = df.rename(columns={"Unnamed: 0": "timestamp_idx"})
                else:
                    df["timestamp_idx"] = range(len(df))

                df["activity"] = activity_label
                df["subject_id"] = int(subject_id)
                df["trial_id"] = folder  # 폴더명 자체를 trial 식별자로 사용

                all_dfs.append(df)

        return pd.concat(all_dfs, ignore_index=True)

    def __len__(self):
        return len(self.X)

    def __getitem__(self, idx):
        return torch.from_numpy(self.X[idx]), torch.tensor(self.y[idx], dtype=torch.long)


# ------------------------------------------------------------------------------
# 2. ASF Model Components
# ------------------------------------------------------------------------------

class LatentEncoder(nn.Module):
    def __init__(self, input_channels=9, latent_dim=64):
        super().__init__()
        self.conv1 = nn.Conv1d(input_channels, 32, kernel_size=5, padding=2)
        self.bn1 = nn.BatchNorm1d(32)
        self.conv2 = nn.Conv1d(32, 64, kernel_size=5, padding=2)
        self.bn2 = nn.BatchNorm1d(64)
        self.conv3 = nn.Conv1d(64, latent_dim, kernel_size=3, padding=1)
        self.bn3 = nn.BatchNorm1d(latent_dim)

    def forward(self, x):
        x = x.transpose(1, 2)
        h = F.relu(self.bn1(self.conv1(x)))
        h = F.relu(self.bn2(self.conv2(h)))
        s = F.relu(self.bn3(self.conv3(h)))
        s = s.transpose(1, 2)
        return s

class StandardCNN(nn.Module):
    def __init__(self, input_channels=9, latent_dim=64, num_classes=6, hidden_dim=64):
        super().__init__()

        # 1. ASF-DCL과 동일한 Encoder
        self.latent_encoder = LatentEncoder(input_channels, latent_dim)

        # 2. Flow 모듈 없이 단순한 Classifier (Pooling 후 FC)
        self.classifier = nn.Sequential(
            nn.Linear(latent_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(hidden_dim, num_classes)
        )

    def forward(self, x):
        # [Batch, Time, Dim]
        s = self.latent_encoder(x)

        # Global Average Pooling (시간 축 평균)
        s_pool = torch.mean(s, dim=1)

        logits = self.classifier(s_pool)
        return logits


# ------------------------------------------------------------------------------
# 5. Train / Evaluation
# ------------------------------------------------------------------------------
def train_epoch(model, dataloader, optimizer, device):
    model.train()
    total_loss = 0
    all_preds = []
    all_labels = []

    for batch in dataloader:
        x = batch[0].to(device)
        y = batch[1].to(device)

        optimizer.zero_grad()
        logits = model(x)
        loss = F.cross_entropy(logits, y, label_smoothing=0.05)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        preds = torch.argmax(logits, dim=1)
        all_preds.extend(preds.detach().cpu().numpy())
        all_labels.extend(y.detach().cpu().numpy())

    avg_loss = total_loss / len(dataloader)
    f1 = f1_score(all_labels, all_preds, average='macro')
    return avg_loss, f1

def evaluate(model, dataloader, device):
    """
    AWGN 노이즈를 주입하여 모델의 견고성을 평가하는 함수
    sigma: 노이즈 강도 (Standard Deviation)
    """
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for batch in dataloader:
            x = batch[0].to(device)
            y = batch[1].to(device)

            logits = model(x)
            preds = torch.argmax(logits, dim=1)
            all_preds.extend(preds.detach().cpu().numpy())
            all_labels.extend(y.detach().cpu().numpy())

    acc = accuracy_score(all_labels, all_preds)
    f1 = f1_score(all_labels, all_preds, average='macro')
    return acc, f1

def measure_efficiency(model, input_shape=(1, 128, 9), warmup=10, iters=100):
    """
    모델의 파라미터 수, FLOPs, 추론 속도를 측정합니다.
    CPU 환경에서 측정하여 하드웨어 의존성을 줄인 벤치마크를 수행합니다.
    """
    measure_device = torch.device('cpu')
    model_cpu = copy.deepcopy(model).to(measure_device)
    model_cpu.eval()

    # 더미 입력 데이터 생성 (Batch=1)
    real_input_shape = list(input_shape)
    real_input_shape[0] = 1
    sample_input = torch.randn(tuple(real_input_shape)).to(measure_device)

    # 1) 파라미터 수
    total_params = sum(p.numel() for p in model_cpu.parameters())
    params_m = total_params / 1e6  # million params

    # 2) FLOPs 측정 (fvcore 사용 가능할 때만)
    flops_m = None
    if FVCORE_AVAILABLE:
        try:
            with torch.no_grad():
                # 불필요한 출력 억제
                fake_out = io.StringIO()
                fake_err = io.StringIO()
                with contextlib.redirect_stdout(fake_out), contextlib.redirect_stderr(fake_err):
                    flops = FlopCountAnalysis(model_cpu, (sample_input,))
                    total_flops = flops.total()
                flops_m = total_flops / 1e6  # to millions
        except Exception as e:
            print(f"FLOPs calculation failed: {e}")
            flops_m = None

    # 3) 추론 시간 측정
    with torch.no_grad():
        # Warmup
        for _ in range(warmup):
            _ = model_cpu(sample_input)

        start = time.time()
        for _ in range(iters):
            _ = model_cpu(sample_input)
        end = time.time()

    avg_sec = (end - start) / iters
    inference_ms = avg_sec * 1000.0

    del model_cpu

    return {
        "params_m": params_m,
        "flops_m": flops_m,
        "inference_ms": inference_ms,
    }

# ------------------------------------------------------------------------------
# 6. Main Training Loop
# ------------------------------------------------------------------------------
def main():
    SEED = 42
    set_seed(SEED)

    DATA_PATH = '/content/drive/MyDrive/Colab Notebooks/HAR_data/motion-sense'
    BATCH_SIZE = 64
    NUM_EPOCHS = 50
    LEARNING_RATE = 0.001
    DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    full_dataset = MotionSenseDataset(
        root_dir=DATA_PATH,
        target_subjects=None,
        normalize=True
    )

    total_size = len(full_dataset)
    train_size = int(total_size * 0.8)
    test_size = total_size - train_size

    generator = torch.Generator().manual_seed(SEED)
    train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size], generator=generator)
    print(f"Total samples: {total_size} | Train: {len(train_dataset)} | Test: {len(test_dataset)}")

    g = torch.Generator()
    g.manual_seed(SEED)
    train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE,
                              shuffle=True, num_workers=2,
                              worker_init_fn=seed_worker,
                              generator=g)
    test_loader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
                             shuffle=False, num_workers=2,
                             worker_init_fn=seed_worker,
                             generator=g)

    model = StandardCNN(
        input_channels=6,
        latent_dim=64,
        num_classes=6,
        hidden_dim=64
    ).to(DEVICE)

    total_params = sum(p.numel() for p in model.parameters())
    print()
    print(f"Total parameters: {total_params:,}")

    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=LEARNING_RATE,
                                 weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(
        optimizer, T_max=NUM_EPOCHS
    )

    best_acc = 0.0
    best_f1 = 0.0
    best_model_wts = copy.deepcopy(model.state_dict())

    # --- 학습 루프 ---
    print("\nStarting Training (Standard CNN)...")
    for epoch in range(NUM_EPOCHS):
        t_loss, t_f1 = train_epoch(model, train_loader, optimizer, DEVICE)

        v_acc, v_f1 = evaluate(model, test_loader, DEVICE)

        if v_f1 > best_f1:
            best_f1 = v_f1
            best_model_wts = copy.deepcopy(model.state_dict())

        if (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{NUM_EPOCHS}] Train F1: {t_f1:.4f} | Test F1: {v_f1:.4f} (Best: {best_f1:.4f})")

    metrics = measure_efficiency(model, input_shape=(1, 128, 6), warmup=10, iters=100)
    print(f"Inference Time   : {metrics['inference_ms']:.4f} ms / sample")


if __name__ == "__main__":
    main()

[MotionSenseDataset] windows: 22053, classes: 6
Classes map: {'dws': 0, 'jog': 1, 'sit': 2, 'std': 3, 'ups': 4, 'wlk': 5}
Total samples: 22053 | Train: 17642 | Test: 4411

Total parameters: 28,518

Starting Training (Standard CNN)...
Epoch [10/50] Train F1: 0.9858 | Test F1: 0.9868 (Best: 0.9879)
Epoch [20/50] Train F1: 0.9948 | Test F1: 0.9857 (Best: 0.9894)
Epoch [30/50] Train F1: 0.9974 | Test F1: 0.9914 (Best: 0.9950)
Epoch [40/50] Train F1: 0.9975 | Test F1: 0.9925 (Best: 0.9967)
Epoch [50/50] Train F1: 0.9986 | Test F1: 0.9909 (Best: 0.9967)
Inference Time   : 0.3804 ms / sample
