In [None]:
import pandas as pd
import numpy as np

# Function to extract features from Actigraphy data
def extract_features(data):
    required_columns = {'X', 'Y', 'Z', 'enmo'}
    if not required_columns.issubset(data.columns):
        raise ValueError(f"Missing required columns: {required_columns - set(data.columns)}")
    
    # Statistical features
    mean_x = data['X'].mean()
    mean_y = data['Y'].mean()
    mean_z = data['Z'].mean()
    std_x = data['X'].std()
    std_y = data['Y'].std()
    std_z = data['Z'].std()
    
    mean_enmo = data['enmo'].mean()
    std_enmo = data['enmo'].std()
    max_enmo = data['enmo'].max()
    min_enmo = data['enmo'].min()

    magnitude = np.sqrt(data['X']**2 + data['Y']**2 + data['Z']**2)
    mean_magnitude = magnitude.mean()
    std_magnitude = magnitude.std()

    activity_level = pd.cut(
        magnitude,
        bins=[0, 0.5, 1.0, 1.5, 2.0],
        labels=['Very Low', 'Low', 'Medium', 'High'],
        include_lowest=True
    ).mode()[0] if len(magnitude) > 0 else 'Unknown'

    inactivity_percentage = (data['enmo'] == 0).mean() * 100

    non_wear_percentage = (data['non-wear_flag'] == 1).mean() * 100 if 'non-wear_flag' in data.columns else None

    features = {
        'mean_x': mean_x,
        'mean_y': mean_y,
        'mean_z': mean_z,
        'std_x': std_x,
        'std_y': std_y,
        'std_z': std_z,
        'mean_enmo': mean_enmo,
        'std_enmo': std_enmo,
        'max_enmo': max_enmo,
        'min_enmo': min_enmo,
        'mean_magnitude': mean_magnitude,
        'std_magnitude': std_magnitude,
        'activity_level': activity_level,
        'inactivity_percentage': inactivity_percentage,
        'non_wear_percentage': non_wear_percentage
    }
    return pd.DataFrame([features])

# Path for the participant file 
participant_file = "./id=0a418b57.parquet"

# Loading participant data
participant_data = pd.read_parquet(participant_file)

# Extracting features for the participant
participant_features = extract_features(participant_data)

# Checking if features were extracted successfully
if participant_features is not None and not participant_features.empty:
    # Dynamically creating the output file path
    participant_id = participant_file.split('id=')[-1].split('.')[0]
   
    # Saved extracted features to a parquet file
    print(f"Features extracted and saved successfully for participant {participant_id}.")
    print(participant_features)  
else:
    print("No valid features extracted.")

Features extracted and saved successfully for participant 0a418b57.
     mean_x    mean_y    mean_z     std_x     std_y     std_z  mean_enmo  \
0  0.006772 -0.134328 -0.925798  0.137918  0.175017  0.133737    0.02247   

   std_enmo  max_enmo  min_enmo  mean_magnitude  std_magnitude activity_level  \
0  0.031348  0.079348  0.000162        0.966236       0.057239            Low   

   inactivity_percentage  non_wear_percentage  
0                    0.0                  0.0  
