In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
import os
import re
from typing import Dict, List, Tuple
import numpy as np # Added numpy just in case it's used elsewhere

# Suppress warnings that may arise during plotting/stitching
import warnings
warnings.filterwarnings("ignore")


A module that was compiled using NumPy 1.x cannot be run in
NumPy 2.2.6 as it may crash. To support both 1.x and 2.x
versions of NumPy, modules must be compiled with NumPy 2.0.
Some module may need to rebuild instead e.g. with 'pybind11>=2.12'.

If you are a user of the module, the easiest solution will be to
downgrade to 'numpy<2' or try to upgrade the affected module.
We expect that some modules will need time to support NumPy 2.

Traceback (most recent call last):  File "/Users/khangphan/.pyenv/versions/3.10.5/lib/python3.10/runpy.py", line 196, in _run_module_as_main
    return _run_code(code, main_globals, None,
  File "/Users/khangphan/.pyenv/versions/3.10.5/lib/python3.10/runpy.py", line 86, in _run_code
    exec(code, run_globals)
  File "/Users/khangphan/.pyenv/versions/3.10.5/lib/python3.10/site-packages/ipykernel_launcher.py", line 18, in <module>
    app.launch_new_instance()
  File "/Users/khangphan/.pyenv/versions/3.10.5/lib/python3.10/site-packages/traitlets/config/appl

AttributeError: _ARRAY_API not found

1. Global Definitions

In [2]:
# --- Global Configurations ---
# Define directory paths ( Để lấy dữ liệu của participant và save vào mục file chứa file xử lý png của participants)
DATA_DIR: str = '../Processed_data_shrink'
OUTPUT_BASE_DIR: str = './image list'
ACTIVITY_OUTPUT_FILE: str = 'overlaid_chronological_activities.png'

# Define Activity Labels 
LABEL_MAP: Dict[int, str] = {
    1: 'jogging', 2: 'jogging (rotating arms)', 3: 'jogging (skipping)',
    4: 'jogging (sidesteps)', 5: 'jogging (butt-kicks)', 6: 'stretching (triceps)',
    7: 'stretching (lunging)', 8: 'stretching (shoulders)', 9: 'stretching (hamstrings)',
    10: 'stretching (lumbar rotation)', 11: 'push-ups', 12: 'push-ups (complex)',
    13: 'sit-ups', 14: 'sit-ups (complex)', 15: 'burpees',
    16: 'lunges', 17: 'lunges (complex)', 18: 'bench-dips',
    19: 'Not recognized'
}
# Creates the inverse map: Activity Name -> Code
INVERTED_LABEL_MAP: Dict[str, int] = {v: k for k, v in LABEL_MAP.items()}

2. Activity Plotting

In [3]:
# --- Generic Helper Functions ---

def _prepare_activity_data(df: pd.DataFrame) -> pd.DataFrame:
    """Helper function to clean and prepare the 'label' column for plotting."""
    if df['label'].dtype == object:
        df['label'] = df['label'].map(INVERTED_LABEL_MAP)
    df['label'] = pd.to_numeric(df['label'], errors='coerce')
    return df.dropna(subset=['label'])


def _plot_overlaid_activity(df_train: pd.DataFrame, df_test: pd.DataFrame) -> str:
    """Generates a single plot showing training and test activities overlaid (sensor-agnostic)."""
    
    df_train_plot = _prepare_activity_data(df_train.copy())
    df_test_plot = _prepare_activity_data(df_test.copy())

    fig_act, ax_act = plt.subplots(1, 1, figsize=(15, 6))
    
    # Plot Training Activity
    ax_act.plot(df_train_plot.index, df_train_plot['label'], drawstyle='steps-post', label='Training Activity', linewidth=1.5, color='C2')
    # Plot Test Activity (Overlay)
    ax_act.plot(df_test_plot.index, df_test_plot['label'], drawstyle='steps-post', label='Test Activity', linewidth=1.5, color='C3', alpha=0.7) 

    activity_labels = list(LABEL_MAP.values())
    activity_values = list(LABEL_MAP.keys())

    ax_act.set_yticks(activity_values)
    ax_act.set_yticklabels(activity_labels)
    ax_act.set_xlabel('Time Step (Index) - Chronological Order')
    ax_act.set_ylabel('Activity')
    ax_act.set_title('Chronological Activities: Training and Test Overlaid', fontsize=14)
    ax_act.grid(True, linestyle='--', alpha=0.6)
    ax_act.legend(loc='upper right')

    ax_act.set_ylim(min(activity_values) - 0.5, max(activity_values) + 0.5)
    max_len = max(len(df_train_plot), len(df_test_plot))
    ax_act.set_xlim(0, max_len)

    plt.tight_layout()
    fig_act.savefig(ACTIVITY_OUTPUT_FILE)
    plt.close(fig_act)
    return ACTIVITY_OUTPUT_FILE

3. Image Stitching and Cleanup
Tác dụng: gộp 3 graph acceleration x,y,z của tay (và chân) với activities graph thành 1 file png

In [4]:
def _stitch_images(file_list: List[str], final_output_file: str, output_dir: str) -> str:
    """Stitches multiple PNG images, saves the final file to the specified directory, and cleans up the intermediary files."""
    
    final_output_path = os.path.join(output_dir, final_output_file)
    
    if not file_list or any(not os.path.exists(f) for f in file_list):
        for f in file_list:
            if os.path.exists(f):
                os.remove(f)
        raise FileNotFoundError("One or more generated plot files are missing. Stitching aborted.")
        
    images = [Image.open(f) for f in file_list]
    widths, heights = zip(*(i.size for i in images))
    max_width = max(widths)
    total_height = sum(heights)

    combined_img = Image.new('RGB', (max_width, total_height), color='white')

    y_offset = 0
    for img in images:
        x_offset = int((max_width - img.size[0]) / 2)
        combined_img.paste(img, (x_offset, y_offset))
        y_offset += img.size[1]

    # Create directory if it doesn't exist and save the final image
    os.makedirs(output_dir, exist_ok=True)
    combined_img.save(final_output_path)
    
    # Delete individual plot files from the current working directory
    for f in file_list:
        if os.path.exists(f):
            os.remove(f)
    
    return final_output_path

3.1. Arm Acceleration Plotting
Plotting arm acceleration data của training và test của cùng 1 participant và xuất file png (bằng function _stitch_images)

In [5]:
# --- Arm Acceleration Functions ---

def _plot_separate_arm_acceleration(df_train: pd.DataFrame, df_test: pd.DataFrame) -> List[str]:
    """Generates three temporary figures for X, Y, Z ARM acceleration."""
    
    acc_cols = ["arm_acc_x", "arm_acc_y", "arm_acc_z"]
    generated_files = []
    
    for col in acc_cols:
        fig, ax = plt.subplots(1, 1, figsize=(15, 6))
        ax.plot(df_train.index, df_train[col], label='Training Data', linewidth=0.5, color='C0')
        ax.plot(df_test.index, df_test[col], label='Test Data', linewidth=0.5, color='C1', alpha=0.7)

        axis = col.split("_")[-1].upper()
        ax.set_title(f'Arm Acceleration Component: {axis} Axis - Training vs. Test (Overlay)', fontsize=14)
        ax.set_ylabel(r'Acceleration ($\mathrm{m/s^2}$)')
        ax.set_xlabel('Time Step (Index) - Chronological Order')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend(loc='upper right')
        
        plt.tight_layout()
        output_filename = f'combined_arm_acc_{axis.lower()}.png'
        fig.savefig(output_filename)
        plt.close(fig)
        generated_files.append(output_filename)
        
    return generated_files


def generate_combined_arm_acceleration_plots(df_train: pd.DataFrame, df_test: pd.DataFrame, output_dir: str) -> str:
    """Generates and saves the final combined PNG for ARM acceleration."""
    
    all_generated_files = []
    
    all_generated_files.extend(_plot_separate_arm_acceleration(df_train, df_test))
    
    # We must include the activity plot path to stitch it and ensure it's deleted.
    if not os.path.exists(ACTIVITY_OUTPUT_FILE):
        all_generated_files.append(_plot_overlaid_activity(df_train.copy(), df_test.copy()))
    else:
        all_generated_files.append(ACTIVITY_OUTPUT_FILE)

    final_file = "all_graphs_combined_arm.png"
    return _stitch_images(all_generated_files, final_file, output_dir)

3.2 Leg Acceleration Plotting
Plotting leg acceleration data của training và test của cùng 1 participant và xuất file png (bằng function _stitch_images)

In [6]:
# --- Leg Acceleration Functions ---

def _plot_separate_leg_acceleration(df_train: pd.DataFrame, df_test: pd.DataFrame) -> List[str]:
    """Generates three temporary figures for X, Y, Z LEG acceleration."""
    
    acc_cols = ["leg_acc_x", "leg_acc_y", "leg_acc_z"]
    generated_files = []
    
    for col in acc_cols:
        fig, ax = plt.subplots(1, 1, figsize=(15, 6))
        ax.plot(df_train.index, df_train[col], label='Training Data', linewidth=0.5, color='C0')
        ax.plot(df_test.index, df_test[col], label='Test Data', linewidth=0.5, color='C1', alpha=0.7)

        axis = col.split("_")[-1].upper()
        ax.set_title(f'Leg Acceleration Component: {axis} Axis - Training vs. Test (Overlay)', fontsize=14)
        ax.set_ylabel(r'Acceleration ($\mathrm{m/s^2}$)')
        ax.set_xlabel('Time Step (Index) - Chronological Order')
        ax.grid(True, linestyle='--', alpha=0.6)
        ax.legend(loc='upper right')
        
        plt.tight_layout()
        output_filename = f'combined_leg_acc_{axis.lower()}.png'
        fig.savefig(output_filename)
        plt.close(fig)
        generated_files.append(output_filename)
        
    return generated_files


def generate_combined_leg_acceleration_plots(df_train: pd.DataFrame, df_test: pd.DataFrame, output_dir: str) -> str:
    """Generates and saves the final combined PNG for LEG acceleration."""
    
    all_generated_files = []
    
    all_generated_files.extend(_plot_separate_leg_acceleration(df_train, df_test))
    all_generated_files.append(_plot_overlaid_activity(df_train.copy(), df_test.copy()))

    final_file = "all_graphs_combined_leg.png"
    return _stitch_images(all_generated_files, final_file, output_dir)

4. Thực hiện quả trình gộp, xuất file png cho từng participant và save trong folder cá nhân  của participant

In [7]:
# --- Master Execution Function ---

def process_all_participants() -> str:
    """
    Finds all participant data pairs, creates an output folder for each,
    and generates the Arm and Leg combined plots, saving them to the participant's folder.
    """
    
    try:
        if not os.path.exists(DATA_DIR):
            return f"\nERROR: Data directory not found: '{DATA_DIR}'. Please ensure it exists."

        # --- 1. Identify and Group Files ---
        all_files = os.listdir(DATA_DIR)
        
        # Regex captures data type (1 or 2) and subject code (e.g., 0, 1, 0_2)
        pattern = re.compile(r'([12])_sbj_([0-9_]+)\.csv')
        
        file_groups: Dict[str, Dict[str, str]] = {}

        for filename in all_files:
            match = pattern.match(filename)
            if match:
                data_type, subj_code = match.groups()
                if subj_code not in file_groups:
                    file_groups[subj_code] = {}
                # '1' is training data path, '2' is test data path
                file_groups[subj_code][data_type] = os.path.join(DATA_DIR, filename)

        # Filter for only subjects that have both a '1' (train) and '2' (test) file
        complete_groups = {
            subj: paths for subj, paths in file_groups.items() 
            if '1' in paths and '2' in paths
        }
        
        if not complete_groups:
            return f"\nNo complete participant pairs ('1_sbj_X.csv' and '2_sbj_X.csv') found in '{DATA_DIR}'. Found {len(all_files)} files."

        # --- 2. Process Each Participant ---
        print(f"Found {len(complete_groups)} complete participant pairs to process.")
        summary_results: List[str] = []
        
        for subj_code, paths in complete_groups.items():
            print(f"\n--- Processing Participant Code: {subj_code} ---")
            train_path = paths['1']
            test_path = paths['2']
            
            # The folder name is the participant's code name
            output_folder = os.path.join(OUTPUT_BASE_DIR, subj_code)

            # Load Data
            df_train = pd.read_csv(train_path)
            df_test = pd.read_csv(test_path)
            
            # Generate and save LEG analysis
            print(f"  -> Generating LEG analysis for {subj_code}...")
            leg_file_path = generate_combined_leg_acceleration_plots(df_train, df_test, output_folder)
            summary_results.append(f"✅ Leg plots for {subj_code} saved to: {leg_file_path}")

            # Generate and save ARM analysis
            print(f"  -> Generating ARM analysis for {subj_code}...")
            arm_file_path = generate_combined_arm_acceleration_plots(df_train, df_test, output_folder)
            summary_results.append(f"✅ Arm plots for {subj_code} saved to: {arm_file_path}")

        return "\n" + "\n".join(summary_results) + "\n\n**Processing complete for all participants.**"

    except KeyError as e:
        return f"\nERROR: Missing required data column: {e}. Please check the CSV file structure."
    except Exception as e:
        return f"\nAn unexpected error occurred: {e}"

5. Chạy code

In [8]:
# Execute the master function
result = process_all_participants()
print(result)

Found 4 complete participant pairs to process.

--- Processing Participant Code: 0_2 ---
  -> Generating LEG analysis for 0_2...
  -> Generating ARM analysis for 0_2...

--- Processing Participant Code: 2 ---
  -> Generating LEG analysis for 2...
  -> Generating ARM analysis for 2...

--- Processing Participant Code: 0 ---
  -> Generating LEG analysis for 0...
  -> Generating ARM analysis for 0...

--- Processing Participant Code: 1 ---
  -> Generating LEG analysis for 1...
  -> Generating ARM analysis for 1...

✅ Leg plots for 0_2 saved to: ./image list/0_2/all_graphs_combined_leg.png
✅ Arm plots for 0_2 saved to: ./image list/0_2/all_graphs_combined_arm.png
✅ Leg plots for 2 saved to: ./image list/2/all_graphs_combined_leg.png
✅ Arm plots for 2 saved to: ./image list/2/all_graphs_combined_arm.png
✅ Leg plots for 0 saved to: ./image list/0/all_graphs_combined_leg.png
✅ Arm plots for 0 saved to: ./image list/0/all_graphs_combined_arm.png
✅ Leg plots for 1 saved to: ./image list/1/all_g