For ETRI datasets we only use one body dataset.
1. delete the dataset that body more than 2.

In [None]:
import pandas as pd
import os
import shutil
from concurrent.futures import ThreadPoolExecutor

# Define the folder containing the files and the destination folder
input_folder = "csv data"
output_folder = "2bodynum"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

def process_file(file_name):
    if file_name.endswith(".csv"):  # Ensure it's a CSV file
        file_path = os.path.join(input_folder, file_name)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        # Check if the 'trackingID' column exists and has more than one unique value
        if 'trackingID' in df.columns and df['trackingID'].nunique() > 1:
            # Construct the new file path in the output folder
            new_file_path = os.path.join(output_folder, file_name)
            
            # Move the file to the output folder
            shutil.move(file_path, new_file_path)
            
            print(f"Moved: {file_path} to {new_file_path}")

# Get list of files in the input folder
file_list = [f for f in os.listdir(input_folder) if f.endswith(".csv")]

# Use ThreadPoolExecutor to process files concurrently
with ThreadPoolExecutor() as executor:
    executor.map(process_file, file_list)

print("Finished moving files.")


data processing

In [1]:
import pandas as pd
import os
import numpy as np
from concurrent.futures import ThreadPoolExecutor

# Define the folder containing the CSV files
input_folder = "csv data"
output_folder = "skeletons"

# Create the output folder if it doesn't exist
os.makedirs(output_folder, exist_ok=True)

# Get the set of already processed filenames in the output directory
processed_files = {os.path.splitext(f)[0] for f in os.listdir(output_folder) if f.endswith(".skeleton")}

# Function to process a single CSV file
def process_csv(file_path, output_path):
    try:
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path)
        
        if df.empty:
            print(f"Warning: File '{os.path.basename(file_path)}' is empty. Skipped processing.")
            return

        # Extract joint data columns
        joint_columns = ['3dX', '3dY', '3dZ', 'depthX', 'depthY', 'orientationW', 'orientationX', 'orientationY', 'orientationZ', 'trackingState']
        missing_columns = [f'joint{i}_{coord}' for coord in joint_columns for i in range(1, 26) if f'joint{i}_{coord}' not in df.columns]

        if missing_columns:
            print(f"Warning: File '{os.path.basename(file_path)}' is missing columns: {missing_columns}. Skipped processing.")
            return
        
        # Initialize numpy arrays to store the data
        joint_data = np.empty((len(df) * 25, len(joint_columns)))
        for idx, coord in enumerate(joint_columns):
            columns = [f'joint{i}_{coord}' for i in range(1, 26)]
            joint_data[:, idx] = df[columns].values.ravel()
        
        # Create the combined DataFrame
        combined_df = pd.DataFrame(joint_data, columns=[f'{coord}_Value' for coord in joint_columns])
        combined_df['colorX'] = 0
        combined_df['colorY'] = 0
        combined_df['frameNumber'] = df['frameNum'].max() - 1 if 'frameNum' in df.columns else 0
        combined_df['bodyindexID'] = df['bodyindexID'].unique()[0] if 'bodyindexID' in df.columns else 0
        combined_df['num_joints'] = 25
        combined_df['bodies_num'] = 1
        combined_df['bodyID'] = 0
        combined_df['clipedEdges'] = 0
        combined_df['handLeftConfidence'] = 0
        combined_df['handLeftState'] = 0
        combined_df['handRightConfidence'] = 0
        combined_df['handRightState'] = 0
        combined_df['isRestricted'] = 0
        combined_df['leanX'] = 0
        combined_df['leanY'] = 0

        if combined_df.empty:
            print(f"Warning: File '{os.path.basename(file_path)}' has no valid data after processing. Skipped.")
            return

        lines = []

        # Prepare data to be written to the file
        lines.append(f"{combined_df['frameNumber'].iloc[0]}\n")
        lines.append(f"{combined_df['bodies_num'].iloc[0]}\n")
        
        body_info = combined_df.iloc[0][['bodyID', 'clipedEdges', 'handLeftConfidence', 'handLeftState',
                                         'handRightConfidence', 'handRightState', 'isRestricted', 'leanX',
                                         'leanY']]
        lines.append(' '.join(map(str, body_info)) + f" {int(combined_df['trackingState_Value'].iloc[0])}\n")
        lines.append(f"{combined_df['num_joints'].iloc[0]}\n")

        for i in range(len(combined_df)):
            joint_data = combined_df.iloc[i][['3dX_Value', '3dY_Value', '3dZ_Value',
                                              'depthX_Value', 'depthY_Value', 'colorX', 'colorY',
                                              'orientationW_Value', 'orientationX_Value', 'orientationY_Value',
                                              'orientationZ_Value']]
            lines.append(' '.join(map(str, joint_data)) + f" {int(combined_df['trackingState_Value'].iloc[i])}\n")

            if (i + 1) % 25 == 0 and (i + 1) < len(combined_df):
                lines.append(f"{combined_df['bodies_num'].iloc[0]}\n")
                body_info = combined_df.iloc[i][['bodyID', 'clipedEdges', 'handLeftConfidence', 'handLeftState',
                                                 'handRightConfidence', 'handRightState', 'isRestricted', 'leanX',
                                                 'leanY']]
                lines.append(' '.join(map(str, body_info)) + f" {int(combined_df['trackingState_Value'].iloc[i])}\n")
                lines.append(f"{combined_df['num_joints'].iloc[0]}\n")

        # Write all lines to the file at once
        with open(output_path, "w") as txt_file:
            txt_file.writelines(lines)

    except Exception as e:
        print(f"Error processing file '{file_path}': {e}")

# Function to handle the processing of each file
def handle_file(file_name):
    if file_name.endswith(".csv"):  # Ensure it's a CSV file
        # Extract relevant parts of the file name
        parts = file_name.split('_')
        if len(parts) < 4:
            print(f"Warning: File name '{file_name}' does not match expected pattern. Skipped.")
            return
        
        label = parts[0]  # Extract the label
        person = parts[1]  # Extract the person ID
        gesture = parts[2]  # Extract the gesture ID
        camera = parts[3].split('.')[0]  # Extract the camera ID and remove the ".csv" extension
        
        # Construct the output file name
        output_file_name = f"{label}{person}{gesture}{camera}.skeleton"

        # Check if the file has already been processed
        if output_file_name in processed_files:
            print(f"File '{output_file_name}' already processed. Skipping.")
            return
        
        # Construct the full file path for input and output
        input_file_path = os.path.join(input_folder, file_name)
        output_file_path = os.path.join(output_folder, output_file_name)
        
        # Process the CSV file
        process_csv(input_file_path, output_file_path)

# Get list of files in the input folder
file_list = [f for f in os.listdir(input_folder) if f.endswith(".csv")]

# Use ThreadPoolExecutor to process files concurrently
with ThreadPoolExecutor() as executor:
    executor.map(handle_file, file_list)

print("Processing completed.")

upated statistics folder

camera.txt

In [8]:
import os
input_folder = "nturgb+d_skeletons"
etri_files = sorted(os.listdir(input_folder))

In [9]:
# Function to extract the number after "C" from the filename
def extract_number_after_C(filename):
    # Find the part that contains 'C'
    if 'C' in filename:
        # Extract the number after 'C'
        return int(filename.split('C')[1].split('.')[0])
    return None

# Extract numbers after "C" from each file name and maintain the order
camera_numbers = [extract_number_after_C(file_name) for file_name in etri_files]

# Save the numbers to a file named 'camera.txt'
with open('camera.txt', 'w') as file:
    for number in camera_numbers:
        if number is not None:  # Ensure that we have a valid number
            file.write(f"{number}\n")

label.txt

In [10]:
etri_files[0]

'A001P001G001C001.skeleton'

In [11]:
# Function to extract the number after 'A' and before 'P'
def extract_label(filename):
    # Extract the part after 'A' and before 'P'
    start = filename.find('A') + 1
    end = filename.find('P')
    if start != -1 and end != -1:
        return int(filename[start:end])
    return None

# Extract the numbers from each file name
labels = [extract_label(file_name) for file_name in etri_files]

# Pair the labels with their original filenames and sort by filenames
sorted_labels = [label for _, label in sorted(zip(etri_files, labels))]

# Save the sorted numbers to a file named 'label.txt'
with open('label.txt', 'w') as file:
    for label in sorted_labels:
        if label is not None:  # Ensure that we have a valid label
            file.write(f"{label}\n")

performer.tx

In [12]:
# Function to extract the number after 'A' and before 'P'
def extract_label(filename):
    # Extract the part after 'A' and before 'P'
    start = filename.find('P') + 1
    end = filename.find('G')
    if start != -1 and end != -1:
        return int(filename[start:end])
    return None

# Extract the numbers from each file name
labels = [extract_label(file_name) for file_name in etri_files]

# Pair the labels with their original filenames and sort by filenames
sorted_labels = [label for _, label in sorted(zip(etri_files, labels))]

# Save the sorted numbers to a file named 'label.txt'
with open('performer.txt', 'w') as file:
    for label in sorted_labels:
        if label is not None:  # Ensure that we have a valid label
            file.write(f"{label}\n")

statistic/replication.txt (value is 0)

In [13]:
# Function to extract the number after 'A' and before 'P'
def extract_label(filename):
    # Extract the part after 'A' and before 'P'
    start = filename.find('A') + 1
    end = filename.find('P')
    if start != -1 and end != -1:
        return int(filename[start:end])
    return None

# Extract the numbers from each file name
labels = [extract_label(file_name) for file_name in etri_files]

# Save the sorted numbers to a file named 'label.txt'
with open('label.txt', 'w') as label_file, open('replication.txt', 'w') as replication_file:
    for label in labels:
        if label is not None:  # Ensure that we have a valid label
            label_file.write(f"{label}\n")
            replication_file.write("0\n")

setup.txt

In [14]:
# Function to extract the number after 'A' and before 'P'
def extract_label(filename):
    # Extract the part after 'A' and before 'P'
    start = filename.find('G') + 1
    end = filename.find('C')
    if start != -1 and end != -1:
        return int(filename[start:end])
    return None

# Extract the numbers from each file name
labels = [extract_label(file_name) for file_name in etri_files]

# Pair the labels with their original filenames and sort by filenames
sorted_labels = [label for _, label in sorted(zip(etri_files, labels))]

# Save the sorted numbers to a file named 'label.txt'
with open('setup.txt', 'w') as file:
    for label in sorted_labels:
        if label is not None:  # Ensure that we have a valid label
            file.write(f"{label}\n")

setup.txt

In [15]:
# Function to extract the number after 'A' and before 'P'
def extract_label(filename):
    # Extract the part after 'A' and before 'P'
    start = filename.find('G') + 1
    end = filename.find('C')
    if start != -1 and end != -1:
        return int(filename[start:end])
    return None

# Extract the numbers from each file name
labels = [extract_label(file_name) for file_name in etri_files]

# Pair the labels with their original filenames and sort by filenames
sorted_labels = [label for _, label in sorted(zip(etri_files, labels))]

# Save the sorted numbers to a file named 'label.txt'
with open('setup.txt', 'w') as file:
    for label in sorted_labels:
        if label is not None:  # Ensure that we have a valid label
            file.write(f"{label}\n")

skes_available_name.txt

In [16]:
# Remove underscores and the extension from each file name
formatted_names = [file_name.replace('_', '').replace('.skeleton', '') for file_name in etri_files]

# Save the formatted names to a file named 'formatted_names.txt'
with open('skes_available_name.txt', 'w') as file:
    for name in formatted_names:
        file.write(name + '\n')