### Global Training Normalizaiton

In [11]:
import os
import json
import numpy as np
from glob import glob

def normalize_train_set_from_split(input_root, output_root, split_json, stats_path='norm_stats.json', revised_split_path='data_split_revised.json'):
    os.makedirs(output_root, exist_ok=True)

    # --- load split json ---
    with open(split_json, 'r', encoding='utf-8') as f:
        split_data = json.load(f)

    # --- validate files ---
    revised_split = {}
    for split_name, files in split_data.items():
        valid_files = []
        for rel_path in files:
            full_path = os.path.join(input_root, rel_path)
            if os.path.exists(full_path):
                valid_files.append(rel_path)
            else:
                print(f'⚠️ File not found and skipped: {rel_path}')
        revised_split[split_name] = valid_files

    # --- save revised split ---
    with open(revised_split_path, 'w', encoding='utf-8') as f:
        json.dump(revised_split, f, indent=2, ensure_ascii=False)
    print(f'Saved revised split to "{revised_split_path}"')

    train_files = revised_split.get('train', [])

    def to_np(key, people):
        arr = people.get(key, [])
        arr = np.array(arr)
        if arr.size == 0:
            return np.zeros((0, 3))
        return arr.reshape(-1, 3)

    pose_all = []
    hand_all = []

    # --- 통계 계산: 존재하는 train 파일만 ---
    print(f'Collecting stats from {len(train_files)} valid training files')
    for rel_path in train_files:
        file_path = os.path.join(input_root, rel_path)
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        people = data['landmarks']
        pose = to_np('pose_keypoints_3d', people)
        hand_l = to_np('hand_left_keypoints_3d', people)
        hand_r = to_np('hand_right_keypoints_3d', people)

        if pose.size > 0:
            pose_all.append(pose)
        if hand_l.size > 0:
            hand_all.append(hand_l)
        if hand_r.size > 0:
            hand_all.append(hand_r)

    # --- 통계 계산 ---
    pose_all_np = np.concatenate(pose_all, axis=0) if pose_all else np.zeros((0, 3))
    hand_all_np = np.concatenate(hand_all, axis=0) if hand_all else np.zeros((0, 3))

    pose_mean = pose_all_np.mean(axis=0)
    pose_std = pose_all_np.std(axis=0) + 1e-6
    hand_min = hand_all_np.min(axis=0)
    hand_max = hand_all_np.max(axis=0)

    stats = {
        'pose_mean': pose_mean.tolist(),
        'pose_std': pose_std.tolist(),
        'hand_min': hand_min.tolist(),
        'hand_max': hand_max.tolist()
    }

    with open(stats_path, 'w', encoding='utf-8') as f:
        json.dump(stats, f, indent=2)
    print(f'Saved normalization stats to "{stats_path}"')

    # --- 정규화 함수 ---
    def normalize_pose(pose):
        return (pose - pose_mean) / pose_std if pose.size > 0 else pose

    def normalize_hand(hand):
        return (hand - hand_min) / (hand_max - hand_min + 1e-6) if hand.size > 0 else hand

    def to_list(x):
        return x.flatten().tolist() if x.size > 0 else []

    # --- 전체 파일 정규화 ---
    print('Applying normalization to all .json files...')
    all_files = glob(os.path.join(input_root, '**', '*.json'), recursive=True)
    for file_path in all_files:
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
        people = data['landmarks']

        pose = to_np('pose_keypoints_3d', people)
        hand_l = to_np('hand_left_keypoints_3d', people)
        hand_r = to_np('hand_right_keypoints_3d', people)

        pose_n = normalize_pose(pose)
        hand_l_n = normalize_hand(hand_l)
        hand_r_n = normalize_hand(hand_r)

        people['pose_keypoints_3d'] = to_list(pose_n)
        people['hand_left_keypoints_3d'] = to_list(hand_l_n)
        people['hand_right_keypoints_3d'] = to_list(hand_r_n)

        rel_path = os.path.relpath(file_path, input_root)
        output_file_path = os.path.join(output_root, rel_path)
        os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

        with open(output_file_path, 'w', encoding='utf-8') as f:
            json.dump(data, f, ensure_ascii=False, indent=2)

    print('Finished applying normalization to all files.')

In [None]:
normalize_train_set_from_split(
    input_root = 'C:/Users/이예지학부휴학통계학과/Downloads/raw',
    output_root = 'C:/Users/이예지학부휴학통계학과/Downloads/normalized',
    split_json='C:/Users/이예지학부휴학통계학과/Downloads/conference/.venv/NIA-1-69 재난안전정보 수어영상 데이터_AI 모델소스/data/data_split.json',
    stats_path='C:/Users/이예지학부휴학통계학과/Downloads/conference/.venv/NIA-1-69 재난안전정보 수어영상 데이터_AI 모델소스/data/norm_stats.json',
    revised_split_path='C:/Users/이예지학부휴학통계학과/Downloads/conference/.venv/NIA-1-69 재난안전정보 수어영상 데이터_AI 모델소스/data/data_split_revised.json'
)

⚠️ File not found and skipped: NIA_SL_G2_FIRE001747_1_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE001747_3_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE000879_3_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE004347_1_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002390_2_TW03.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE004113_1_CROWD1075744.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE003237_1_CROWD1075776.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE003131_1_CROWD1075776.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE004292_2_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002305_2_TW03.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE001098_2_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002888_2_TW30.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002131_2_KU02.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002387_1_TW03.json
⚠️ File not found and skipped: NIA_SL_G2_FIRE002700_2_KU02.json
⚠️ File not foun

### Global Validation Normalization

In [None]:
import os
import json
import numpy as np
from glob import glob

def normalize_validation_set(input_root, output_root, keywords, stats_path='norm_stats.json'):
    os.makedirs(output_root, exist_ok=True)

    # --- 통계 불러오기 ---
    with open(stats_path, 'r', encoding='utf-8') as f:
        stats = json.load(f)

    pose_mean = np.array(stats['pose_mean'])
    pose_std = np.array(stats['pose_std']) + 1e-6
    hand_min = np.array(stats['hand_min'])
    hand_max = np.array(stats['hand_max'])
    hand_range = hand_max - hand_min + 1e-6

    def to_np(key, people):
        arr = people.get(key, [])
        arr = np.array(arr)
        if arr.size == 0:
            return np.zeros((0, 3))
        return arr.reshape(-1, 3)

    def normalize_pose(pose):
        if pose.size == 0:
            return pose
        return (pose - pose_mean) / pose_std

    def normalize_hand(hand):
        if hand.size == 0:
            return hand
        return (hand - hand_min) / hand_range

    def to_list(x):
        return x.flatten().tolist() if x.size > 0 else []

    for keyword in keywords:
        print(f'Normalizing validation keyword: {keyword}')
        files = glob(os.path.join(input_root, '**', f'*{keyword}*.json'), recursive=True)
        for file_path in files:
            with open(file_path, 'r', encoding='utf-8') as f:
                data = json.load(f)
            people = data['landmarks']

            pose = to_np('pose_keypoints_3d', people)
            hand_l = to_np('hand_left_keypoints_3d', people)
            hand_r = to_np('hand_right_keypoints_3d', people)

            pose_n = normalize_pose(pose)
            hand_l_n = normalize_hand(hand_l)
            hand_r_n = normalize_hand(hand_r)

            people['pose_keypoints_3d'] = to_list(pose_n)
            people['hand_left_keypoints_3d'] = to_list(hand_l_n)
            people['hand_right_keypoints_3d'] = to_list(hand_r_n)

            rel_path = os.path.relpath(file_path, input_root)
            output_file_path = os.path.join(output_root, rel_path)
            os.makedirs(os.path.dirname(output_file_path), exist_ok=True)

            with open(output_file_path, 'w', encoding='utf-8') as f:
                json.dump(data, f, ensure_ascii=False, indent=2)

        print(f'Finished normalizing validation keyword: {keyword}')


In [None]:
normalize_validation_set(
    input_root='C:/Users/이예지학부휴학통계학과/Downloads/tact_morpheme_val/1_1',
    output_root='C:/Users/이예지학부휴학통계학과/Downloads/normalized',
    keywords=['TW03', 'TW87', 'TW104'],
    stats_path='norm_stats.json'
)

In [None]:
normalize_validation_set(
    input_root='C:/Users/이예지학부휴학통계학과/Downloads/tact_morpheme_val/1_2',
    output_root='C:/Users/이예지학부휴학통계학과/Downloads/normalized',
    keywords=['TW03', 'TW114', 'TW04', 'TW30'],
    stats_path='norm_stats.json'
)

In [None]:
normalize_validation_set(
    input_root='C:/Users/이예지학부휴학통계학과/Downloads/tact_morpheme_val/1_3',
    output_root='C:/Users/이예지학부휴학통계학과/Downloads/normalized',
    keywords=['KU02'],
    stats_path='norm_stats.json'
)

In [None]:
normalize_validation_set(
    input_root='C:/Users/이예지학부휴학통계학과/Downloads/untact_morpheme_val/1_1',
    output_root='C:/Users/이예지학부휴학통계학과/Downloads/normalized',
    keywords=['CROWD04', 'CROWD1067644', 'CROWD1075', 'CROWD128', 'CROWD87', 'CROWD112'],
    stats_path='norm_stats.json'
)