In [None]:
"""
Creates database from converted MediaPipe output files (should now be in .txt format with _mp33 suffix)
for use with RNN for Human Activity Recognition - 2D Pose Input (now 33*2D = 66 features)

Adapted from original OpenPose script by Stuart Eiffert 13/12/2017
Modifications for MediaPipe (33 points) by Your Name/AI DATE

All code is provided under the MIT License (assuming original license applies)

"""

import glob
import os
import numpy as np
# ★★★ pandasライブラリをインポートします ★★★
import pandas as pd

# --- Configuration for Output Files ---
test_file_X = "X_test.txt"
test_file_Y = "Y_test.txt"
train_file_X = "X_train.txt"
train_file_Y = "Y_train.txt"

# --- Configuration based on your MediaPipe setup ---
data_path = r"C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON"

# MODIFIED: Your list of actions from the MediaPipe script
#activity_list = ['heel_hook', 'deadpoint', 'dyno', 'cross_move']
activity_list = ['ageru', 'understand', 'annsinnsuru' , 'heavy']

# --- RNN Sequence Configuration ---
num_steps = 30
test_train_split = 0.0
split = False
overlap = 0.8125

# --- Ensure data_path exists ---
if not os.path.isdir(data_path):
    print(f"Error: data_path '{data_path}' not found. Please check the path.")
    exit()

# --- Clean up old train/test files if they exist ---
files_to_remove = [test_file_X, test_file_Y, train_file_X, train_file_Y]
for f_name in files_to_remove:
    f_path = os.path.join(data_path, f_name)
    if os.path.exists(f_path):
        print(f"Removing existing file: {f_path}")
        os.remove(f_path)
print("-" * 30)

initial_cwd = os.getcwd()
abs_data_path = os.path.abspath(data_path)

# Process each activity
for activity_idx, activity_name in enumerate(activity_list):
    current_activity_full_path = os.path.join(abs_data_path, activity_name)
    
    print(f"Processing activity: {activity_name} in {current_activity_full_path}")

    if not os.path.isdir(current_activity_full_path):
        print(f"Warning: Directory not found for activity {activity_name}, skipping: {current_activity_full_path}")
        continue

    try:
        os.chdir(current_activity_full_path)
        print(f"  Changed CWD to: {os.getcwd()}")
    except FileNotFoundError:
        print(f"  Error: Could not change directory to {current_activity_full_path}, skipping activity.")
        os.chdir(abs_data_path)
        continue

    for file_name_in_activity_dir in sorted(glob.glob("*_mp33.txt")):
        print(f"    Processing file: {file_name_in_activity_dir}")

        is_train = np.random.rand() < test_train_split
        print(f"      Assigning to {'train' if is_train else 'test'} set.")

        # --- ★★★ ここからが修正部分です ★★★ ---
        try:
            # pandasでテキストファイルを読み込みます。
            # '0.0'は欠損値(NaN)として扱います。ヘッダーはないのでNoneを指定。
            data_df = pd.read_csv(file_name_in_activity_dir, header=None, na_values=0.0)
            
            # DataFrameが空でないことを確認
            if data_df.empty:
                print(f"      Warning: File {file_name_in_activity_dir} is empty or could not be parsed, skipping.")
                continue

            # 線形補間を実行します。
            data_df.interpolate(method='linear', axis=0, inplace=True, limit_direction='both')

            # 補間後にもNaNが残る場合に備えて0.0で埋めます（例：列全体が欠損の場合）
            data_df.fillna(0.0, inplace=True)

            # DataFrameを元のテキスト形式（文字列のリスト）に戻します。
            # この方法なら、行末に余計なカンマはつきません。
            lines_as_strings = []
            for index, row in data_df.iterrows():
                # 各行をカンマ区切りの文字列に変換し、末尾に改行を追加
                line = ",".join(row.astype(str).tolist())
                lines_as_strings.append(line + '\n')
            file_text = lines_as_strings
        
        except FileNotFoundError:
            print(f"      Error: File not found {file_name_in_activity_dir} within {os.getcwd()}, skipping.")
            continue
        except pd.errors.EmptyDataError:
            print(f"      Warning: File {file_name_in_activity_dir} is empty, skipping.")
            continue
        # --- ★★★ 修正はここまでです ★★★ ---
        
        if not file_text:
            print(f"      Warning: File {file_name_in_activity_dir} is empty, skipping.")
            continue

        num_frames = len(file_text)
        if num_frames < num_steps:
            print(f"      Warning: File {file_name_in_activity_dir} has {num_frames} frames, less than num_steps ({num_steps}). Skipping.")
            continue
            
        if 1 - overlap <= 1e-6:
            print(f"      Error: Invalid overlap value ({overlap}) results in zero or negative step. Skipping file.")
            num_framesets = 0 
        else:
            num_framesets = int((num_frames - num_steps) / (num_steps * (1 - overlap))) + 1
        
        print(f"      Total frames: {num_frames}, Num sequences possible: {num_framesets}")

        if num_framesets <= 0:
            print(f"      Not enough frames in {file_name_in_activity_dir} to create any sequences with current settings. Skipping.")
            continue

        if is_train:
            output_file_X_basename = train_file_X
            output_file_Y_basename = train_file_Y
        else:
            output_file_X_basename = test_file_X
            output_file_Y_basename = test_file_Y

        x_output_full_path = os.path.join(abs_data_path, output_file_X_basename)
        y_output_full_path = os.path.join(abs_data_path, output_file_Y_basename)

        try:
            with open(x_output_full_path, 'a') as x_file:
                for frameset_idx in range(num_framesets):
                    start_frame = int(frameset_idx * num_steps * (1 - overlap))
                    end_frame = start_frame + num_steps
                    
                    if start_frame < 0 or end_frame > num_frames or start_frame >= end_frame :
                        print(f"      Warning: Invalid frame slice [{start_frame}:{end_frame}] for num_frames {num_frames}. Skipping frameset_idx {frameset_idx}.")
                        continue

                    for line_idx in range(start_frame, end_frame):
                        x_file.write(file_text[line_idx])
            print(f"      Appended {num_framesets} sequences to {x_output_full_path}")

            with open(y_output_full_path, 'a') as y_file:
                for _ in range(num_framesets):
                    y_file.write(str(activity_idx + 1) + "\n")
            print(f"      Appended {num_framesets} labels to {y_output_full_path}")

        except IOError as e:
            print(f"      Error writing to output files: {e}")
    
    try:
        os.chdir(abs_data_path)
        print(f"  Returned CWD to: {os.getcwd()} (after processing {activity_name})")
    except Exception as e:
        print(f"  Error on os.chdir back to base data_path ({abs_data_path}) " \
              f"from within {activity_name} directory structure: {e}")
        print(f"  Current CWD is: {os.getcwd()}. Subsequent paths might be incorrect.")

print("-" * 30)

# (split=True のブロックは変更ありません)
if split:
    print(f"Performing second stage split (if split=True) in CWD: {os.getcwd()}")
    source_X_to_resplit = train_file_X 
    source_Y_to_resplit = train_file_Y 
    # ... (以降のコードも変更なし)

print("Database creation process finished.")

Error: data_path 'C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON' not found. Please check the path.
------------------------------
Processing activity: ageru in C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON\ageru
Processing activity: understand in C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON\understand
Processing activity: annsinnsuru in C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON\annsinnsuru
Processing activity: heavy in C:\Users\admin\Downloads\RNN-for-Human-Activity-Recognition-using-2D-Pose-Input-master\MP_Data_JSON\heavy
------------------------------
Database creation process finished.


: 