# DTW

In [None]:
import os
import pandas as pd
import numpy as np
from dtw import dtw
from tqdm import tqdm

def compute_dtw_distance(seq1: np.ndarray, seq2: np.ndarray):
    # 使用欧氏距离计算3轴加速度的DTW
    return dtw(seq1, seq2, keep_internals=False).distance

def print_dtw(original_dir, augmented_dir):
    original_data = {}
    for fname in os.listdir(original_dir):
        if fname.endswith('.csv'):
            data = pd.read_csv(os.path.join(original_dir, fname))[['accel_x', 'accel_y', 'accel_z']].to_numpy()
            original_data[fname] = data

    dtw_distances = []
    for fname in tqdm(os.listdir(augmented_dir)):
        if not fname.endswith('.csv'):
            continue

        aug_data = pd.read_csv(os.path.join(augmented_dir, fname))[['accel_x', 'accel_y', 'accel_z']].to_numpy()

        rand_key = np.random.choice(list(original_data.keys()))
        orig_data = original_data[rand_key]

        dist = compute_dtw_distance(aug_data, orig_data)
        dtw_distances.append(dist)

    print(f"平均DTW距离: {np.mean(dtw_distances):.9f}")
    print(f"中位数DTW距离: {np.median(dtw_distances):.9f}")

In [None]:
# Path
original_dir = './data_origin/sequence'
augmented_dir = './data_aug/ChatGPT-o4-instructed_v2/sequence'

print_dtw(original_dir, augmented_dir)

100%|██████████| 300/300 [00:06<00:00, 44.45it/s] 

平均DTW距离: 1508.493311894
中位数DTW距离: 85.392676477





In [None]:
# Path
original_dir = './data_origin/sequence'
augmented_dir = './data_aug/GN/sequence'

print_dtw(original_dir, augmented_dir)

100%|██████████| 300/300 [00:07<00:00, 39.07it/s]

平均DTW距离: 1604.201465709
中位数DTW距离: 78.188436558





In [None]:
# Path
original_dir = './data_origin/sequence'
augmented_dir = './data_aug/GS/sequence'

print_dtw(original_dir, augmented_dir)

100%|██████████| 300/300 [00:06<00:00, 43.05it/s]

平均DTW距离: 1158.065324426
中位数DTW距离: 81.104038711





In [None]:
# Path
original_dir = './data_origin/sequence'
augmented_dir = './data_aug/CS/sequence'

print_dtw(original_dir, augmented_dir)

100%|██████████| 300/300 [00:05<00:00, 56.48it/s] 

平均DTW距离: 902.477916448
中位数DTW距离: 88.900963819





# F1-score

In [2]:
import torch
from torch.utils.data import DataLoader
from torch.nn.utils.rnn import pad_sequence

from models.lstm import LSTMClassifier
from tqdm import tqdm

def collate_fn(batch, label2idx):
    sequences = [item[0] for item in batch]
    actions = [item[1] for item in batch]
    lengths = torch.tensor([len(seq) for seq in sequences], dtype=torch.long)
    padded_sequences = pad_sequence(sequences, batch_first=True)
    labels = torch.tensor([label2idx[action] for action in actions], dtype=torch.long)
    return padded_sequences, lengths, labels

def get_predictions(model_path, dataset, label2idx, batch_size=32, device='cuda'):
    num_classes = len(label2idx)
    model = LSTMClassifier(input_dim=3, hidden_dim=64, num_layers=2, num_classes=16, dropout=0.3)
    model.load_state_dict(torch.load(model_path, map_location=device))
    model.to(device)
    model.eval()

    loader = DataLoader(
        dataset,
        batch_size=batch_size,
        shuffle=False,
        collate_fn=lambda batch: collate_fn(batch, label2idx)
    )

    y_true, y_pred = [], []

    with torch.no_grad():
        for x, lengths, labels in tqdm(loader, desc="Predicting"):
            x = x.to(device)
            # lengths = lengths.to(device)
            labels = labels.to(device)

            logits = model(x, lengths.cpu())
            preds = torch.argmax(logits, dim=1)

            y_true.extend(labels.cpu().numpy().tolist())
            y_pred.extend(preds.cpu().numpy().tolist())

    return y_true, y_pred


from sklearn.metrics import accuracy_score, f1_score, classification_report

def eval_F1score(y_true, y_pred, label2idx):
    acc = accuracy_score(y_true, y_pred)
    f1_macro = f1_score(y_true, y_pred, average='macro')
    f1_weighted = f1_score(y_true, y_pred, average='weighted')

    print(f"Accuracy: {acc:.9f}")
    print(f"Macro F1-score: {f1_macro:.9f}")
    print(f"Weighted F1-score: {f1_weighted:.9f}")
    
    print("\nClassification Report:")
    print(classification_report(y_true, y_pred, target_names=list(label2idx.keys())))
    
    return {
        'accuracy': acc,
        'macro_f1': f1_macro,
        'weighted_f1': f1_weighted
    }

In [3]:
from utils.dataset import BabyMotionDataset

origin_dir = "./data_origin"

label2idx = {
    'crawl': 0, 'walk': 1,
    'sit-floor': 2, 'sit-high-chair': 3, 'sit-low-chair': 4, 'stand': 5, 
    'hold-horizontal': 6, 'hold-vertical': 7, 'piggyback': 8, 
    'baby-food': 9, 'bottle': 10, 'breast': 11, 
    'face-down': 12, 'face-side': 13, 'face-up':14, 'roll-over': 15
}

val_dataset = BabyMotionDataset(
    origin_dir=origin_dir,
    aug_dirs=None,
    max_len=100,
    min_len=10,
    is_train=False
)

## origin

In [17]:
model_path = '/fast/workspace/robinson/CodeSource/babycare/logs/lstm/origin/20250625_004247/checkpoints/best_model_epoch211.pt'
y_true, y_pred = get_predictions(model_path, val_dataset, label2idx)
eval_F1score(y_true, y_pred, label2idx)

Predicting:   0%|          | 0/4 [00:00<?, ?it/s]

Predicting: 100%|██████████| 4/4 [00:00<00:00, 10.17it/s]

Accuracy: 0.786324786
Macro F1-score: 0.688659695
Weighted F1-score: 0.782997196

Classification Report:
                 precision    recall  f1-score   support

          crawl       1.00      0.83      0.91         6
           walk       0.75      0.75      0.75         8
      sit-floor       0.83      0.91      0.87        11
 sit-high-chair       1.00      0.33      0.50         3
  sit-low-chair       0.00      0.00      0.00         1
          stand       1.00      0.33      0.50         3
hold-horizontal       0.81      0.93      0.87        14
  hold-vertical       0.62      0.71      0.67        14
      piggyback       0.78      0.78      0.78         9
      baby-food       0.78      0.86      0.82        21
         bottle       1.00      0.71      0.83         7
         breast       0.00      0.00      0.00         1
      face-down       1.00      1.00      1.00         3
      face-side       0.57      1.00      0.73         4
        face-up       1.00      1.00   


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.7863247863247863,
 'macro_f1': 0.6886596947738253,
 'weighted_f1': 0.7829971960406743}

## GN

In [15]:
model_path = '/fast/workspace/robinson/CodeSource/babycare/logs/lstm/GN/20250618_013831/checkpoints/best_model_epoch453.pt'
y_true, y_pred = get_predictions(model_path, val_dataset, label2idx)
eval_F1score(y_true, y_pred, label2idx)

Predicting: 100%|██████████| 4/4 [00:00<00:00,  9.92it/s]

Accuracy: 0.794871795
Macro F1-score: 0.690995667
Weighted F1-score: 0.787260756

Classification Report:
                 precision    recall  f1-score   support

          crawl       1.00      0.83      0.91         6
           walk       0.75      0.38      0.50         8
      sit-floor       0.69      0.82      0.75        11
 sit-high-chair       0.50      1.00      0.67         3
  sit-low-chair       0.00      0.00      0.00         1
          stand       1.00      0.33      0.50         3
hold-horizontal       1.00      0.86      0.92        14
  hold-vertical       0.69      0.79      0.73        14
      piggyback       0.75      0.67      0.71         9
      baby-food       0.86      0.86      0.86        21
         bottle       0.70      1.00      0.82         7
         breast       0.00      0.00      0.00         1
      face-down       1.00      1.00      1.00         3
      face-side       0.80      1.00      0.89         4
        face-up       0.75      1.00   


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.7948717948717948,
 'macro_f1': 0.6909956669147845,
 'weighted_f1': 0.7872607561144516}

## GS

In [13]:
model_path = '/fast/workspace/robinson/CodeSource/babycare/logs/lstm/GS/20250618_054720/checkpoints/best_model_epoch486.pt'
y_true, y_pred = get_predictions(model_path, val_dataset, label2idx)
eval_F1score(y_true, y_pred, label2idx)

Predicting: 100%|██████████| 4/4 [00:00<00:00,  9.97it/s]

Accuracy: 0.760683761
Macro F1-score: 0.671004020
Weighted F1-score: 0.749190254

Classification Report:
                 precision    recall  f1-score   support

          crawl       1.00      0.83      0.91         6
           walk       0.67      0.25      0.36         8
      sit-floor       0.54      0.64      0.58        11
 sit-high-chair       0.75      1.00      0.86         3
  sit-low-chair       0.00      0.00      0.00         1
          stand       0.25      0.33      0.29         3
hold-horizontal       1.00      0.93      0.96        14
  hold-vertical       0.60      0.43      0.50        14
      piggyback       0.69      1.00      0.82         9
      baby-food       0.85      0.81      0.83        21
         bottle       0.88      1.00      0.93         7
         breast       0.00      0.00      0.00         1
      face-down       1.00      1.00      1.00         3
      face-side       0.80      1.00      0.89         4
        face-up       0.75      1.00   


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.7606837606837606,
 'macro_f1': 0.6710040201976979,
 'weighted_f1': 0.7491902538472214}

## CS

In [14]:
model_path = '/fast/workspace/robinson/CodeSource/babycare/logs/lstm/CS/20250618_094700/checkpoints/best_model_epoch3191.pt'
y_true, y_pred = get_predictions(model_path, val_dataset, label2idx)
eval_F1score(y_true, y_pred, label2idx)

Predicting: 100%|██████████| 4/4 [00:00<00:00, 10.13it/s]

Accuracy: 0.777777778
Macro F1-score: 0.674743880
Weighted F1-score: 0.767736005

Classification Report:
                 precision    recall  f1-score   support

          crawl       1.00      0.83      0.91         6
           walk       0.80      0.50      0.62         8
      sit-floor       0.80      0.73      0.76        11
 sit-high-chair       0.50      1.00      0.67         3
  sit-low-chair       0.00      0.00      0.00         1
          stand       0.50      0.33      0.40         3
hold-horizontal       0.81      0.93      0.87        14
  hold-vertical       0.64      0.50      0.56        14
      piggyback       0.69      1.00      0.82         9
      baby-food       0.86      0.90      0.88        21
         bottle       0.86      0.86      0.86         7
         breast       0.00      0.00      0.00         1
      face-down       1.00      1.00      1.00         3
      face-side       0.67      1.00      0.80         4
        face-up       0.75      1.00   


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.7777777777777778,
 'macro_f1': 0.674743880150857,
 'weighted_f1': 0.7677360048021944}

## ChatGPT-o4

In [6]:
model_path = '/fast/workspace/robinson/CodeSource/babycare/logs/lstm/ChatGPT-o4-instructed/20250627_090546/checkpoints/best_model_epoch418.pt'
y_true, y_pred = get_predictions(model_path, val_dataset, label2idx)
eval_F1score(y_true, y_pred, label2idx)

Predicting: 100%|██████████| 4/4 [00:00<00:00, 10.02it/s]

Accuracy: 0.846153846
Macro F1-score: 0.786136960
Weighted F1-score: 0.839650837

Classification Report:
                 precision    recall  f1-score   support

          crawl       1.00      0.83      0.91         6
           walk       1.00      0.75      0.86         8
      sit-floor       0.85      1.00      0.92        11
 sit-high-chair       1.00      1.00      1.00         3
  sit-low-chair       0.33      1.00      0.50         1
          stand       0.50      0.33      0.40         3
hold-horizontal       0.81      0.93      0.87        14
  hold-vertical       0.80      0.57      0.67        14
      piggyback       0.90      1.00      0.95         9
      baby-food       0.81      0.81      0.81        21
         bottle       0.78      1.00      0.88         7
         breast       0.00      0.00      0.00         1
      face-down       1.00      1.00      1.00         3
      face-side       0.80      1.00      0.89         4
        face-up       1.00      1.00   


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


{'accuracy': 0.8461538461538461,
 'macro_f1': 0.7861369597679582,
 'weighted_f1': 0.8396508366838604}