# First analysis: calculation of the FuncUseRatio
Author: Marion Granier  
Date: 2025-03-17 
  
This script uses part of Victor Fernando Lopes De Souza's script.

In [1]:
# Load the autoreload extension
%load_ext autoreload

In [2]:
# Set extension to autoreload all modules every time before executing the Python code
%autoreload 2

# Importing the necessary libraries
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd
import sys
import os

notebook_dir = os.getcwd()
sys.path.append(os.path.join(notebook_dir,"..", "sources"))

import handle_data, functional_metrics

## Check if all csv files are present

In [9]:
participants_info = pd.read_csv('../data/participants_1.csv', sep=';')
participants_info.head()

Unnamed: 0,folder_name,is_patient,parent_folder,paretic_side,start_day,end_day,start_month,end_month,start_year,end_year,age,FMScore,freq,time_stroke,laterality,barthel,bbt_paretic,bbt_non_paretic
0,C1P20_M1,True,data_actimetry,right,29,7,11,12,2024,2024,34,56,50,115.0,right,100,42,62
1,C1P30_M1,True,data_actimetry,right,29,7,11,12,2024,2024,67,45,50,36.0,right,95,17,46
2,C1P31_M1,True,data_actimetry,left,10,18,1,1,2025,2025,75,49,50,123.0,right,80,35,52
3,C1P32_M1,True,data_actimetry,right,29,7,11,12,2024,2024,78,45,50,,right,90,20,50
4,C1P33_M1,True,data_actimetry,left,29,7,11,12,2024,2024,81,54,50,,right,85,18,38


In [None]:
# Definition of the base path and the list of patients
base_path = "../data/data_actimetry"
patients = participants_info['folder_name'].str.split('_M').str[0].tolist()
# patients = ["C1P20", "C1P30", "C1P31", "C1P32", "C1P33"]
print(patients)
months = [f"M{i}" for i in range(1, 7)]  # M1 to M6

# Function to check if the files are present
def check_files():
    """
    Check if the files are present in the base path for each patient and each month.
    """
    for patient in patients:
        for month in months:
            folder_name = f"{patient}_{month}"
            folder_path = os.path.join(base_path, folder_name)
            
            if not os.path.exists(folder_path):
                print(f"***** Missing file: {folder_name} *****")
                continue
            
            left_file = os.path.join(folder_path, "left.csv")
            right_file = os.path.join(folder_path, "right.csv")
            
            missing_files = []
            if not os.path.isfile(left_file):
                missing_files.append("left.csv")
            if not os.path.isfile(right_file):
                missing_files.append("right.csv")
            
            if missing_files:
                print(f"***** Missing files in {folder_name} : {', '.join(missing_files)} *****")
#            else:
#                print(f"All files are present in {folder_name}.")

check_files()

['C1P20', 'C1P30', 'C1P31', 'C1P32', 'C1P33']
***** Missing file: C1P20_M4 *****
***** Missing file: C1P20_M5 *****
***** Missing file: C1P20_M6 *****
***** Missing file: C1P30_M4 *****
***** Missing file: C1P30_M5 *****
***** Missing file: C1P30_M6 *****
***** Missing file: C1P31_M3 *****
***** Missing file: C1P31_M4 *****
***** Missing file: C1P31_M5 *****
***** Missing file: C1P31_M6 *****
***** Missing file: C1P32_M4 *****
***** Missing file: C1P32_M5 *****
***** Missing file: C1P32_M6 *****
***** Missing file: C1P33_M4 *****
***** Missing file: C1P33_M5 *****
***** Missing file: C1P33_M6 *****


## FuncUseRatioPerDay for all patients and all months

In [None]:
import pandas as pd
import numpy as np

# Resampling
resampling_freq = 50 # Hz

# Low pass filter
filter_butter_cutoff = 1 # Hz

# Windowing
### VOIR SI ON LA PASSE A 2 SECONDES COMME LEUENBERGER ET GAEL ONT FAIT
seconds_per_window = 0.5 # s 

# To calculate the jerk ratio we ignore the moments where one arm is not moving
treshold_removal_JR = 0.01 # m/s^3

def get_functional_uses_per_day(folder_name, month):
    """
    Get functional use counts per day for the given patient and month.
    """
    try:
        # get data
        time_index, acceleration_xyzn, is_patient, FM = handle_data.extract_data(folder_name, filter_butter_cutoff=filter_butter_cutoff, resampling_freq=resampling_freq, month=month)
        
        # convert time index to seconds
        time_index_values = (time_index.values - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
        
        # partition data
        windows_small, time_indexes_small = handle_data.partition(acceleration_xyzn, time_index_values, seconds_per_window=seconds_per_window)

        # get angles
        alphas = functional_metrics.get_alphas(windows_small[:, :, :, 1], windows_small[:, :, :, 3])

        # get if window contains a functional movement for different (symmetric) angle thresholds (30)
        test_range = [30]
        is_functional = {i: functional_metrics.test_functional(alphas=alphas, treshold_symregion_degrees=i, treshold_amp_degrees=30) for i in test_range}
        
        # Get functional count per day
        functional_count_per_day = functional_metrics.get_functional_count_per_day(is_functional[30], time_indexes_small)
        functional_uses_per_day_df = pd.DataFrame(functional_count_per_day)

        # Add patient info to the dataframe
        ID = folder_name.split('_')[0]
        functional_uses_per_day_df.insert(0, 'ID', ID)
        functional_uses_per_day_df.insert(1, 'month', month)
        functional_uses_per_day_df.insert(2, 'day', range(1, functional_uses_per_day_df.shape[0] + 1))
        functional_uses_per_day_df.columns = ['ID', 'month', 'day', 'FuncUse_non_paretic_day', 'FuncUse_paretic_day']
        functional_uses_per_day_df['FuncUseRatio_day'] = functional_uses_per_day_df['FuncUse_paretic_day'] / (functional_uses_per_day_df['FuncUse_paretic_day'] + functional_uses_per_day_df['FuncUse_non_paretic_day'])

        return functional_uses_per_day_df

    except Exception as e:
        print(f"Error processing functional uses per day for {folder_name} in month {month}: {e}")
        return None


# Define months you want to process
months = [1, 2]

# Initialize an empty list to store all daily results
all_daily_results = []

# Loop over each month
for month in months:
    # Load participants info for the current month
    participants_info = pd.read_csv(f'../data/participants_{month}.csv', sep=';')
    
    # Process each participant in the current month
    for _, row in participants_info.iterrows():
        folder_name = row['folder_name']
        print(f'-----\nProcessing {folder_name} for month {month}')
        try:
            # Get the functional uses per day for this folder_name and month
            daily_result = get_functional_uses_per_day(folder_name, month)
            
            # If result is not None, append it to the global list
            if daily_result is not None:
                all_daily_results.append(daily_result)
        
        except Exception as e:
            print(f'Error processing {folder_name}: {e}')

# Concatenate all daily results into a single DataFrame
all_daily_results_df = pd.concat(all_daily_results, axis=0)

# Sort the dataframe by patient (ID), then by month, and finally by day
all_daily_results_df = all_daily_results_df.sort_values(by=['ID', 'month', 'day']).reset_index(drop=True)

# Save the final daily results to a CSV file
all_daily_results_df.to_csv('../results/results_FuncUsePerDay_all_patients.csv', index=False)

print("Processing complete. Results saved to 'results_FuncUsePerDay_all_patients.csv'.")


-----
Processing C1P20_M1 for month 1
-----
Processing C1P30_M1 for month 1
-----
Processing C1P31_M1 for month 1
-----
Processing C1P32_M1 for month 1
-----
Processing C1P33_M1 for month 1
-----
Processing C1P20_M2 for month 2
-----
Processing C1P30_M2 for month 2
-----
Processing C1P31_M2 for month 2
-----
Processing C1P32_M2 for month 2
-----
Processing C1P33_M2 for month 2
Processing complete. Results saved to 'results_FuncUsePerDay_all_patients.csv'.


## FuncUseRatio per month for all patients and all months

In [None]:
# Resampling
resampling_freq = 50 # Hz

# Low pass filter
filter_butter_cutoff = 1 # Hz

# Windowing
### VOIR SI ON LA PASSE A 2 SECONDES COMME LEUENBERGER ET GAEL ONT FAIT
seconds_per_window = 0.5 # s 

# To calculate the jerk ratio we ignore the moments where one arm is not moving
treshold_removal_JR = 0.01 # m/s^3

In [None]:
import pandas as pd

def get_participant_metrics(folder_name, month):
    """ 
    folder_name: name of the folder containing the actimetric files 
    month: a vector containing the months to be analyzed

    return: An overall dataframe with all the variables of interest for each patient and each month
    """
    try:
        # get data
        time_index, acceleration_xyzn, is_patient, FM = handle_data.extract_data(folder_name, filter_butter_cutoff=filter_butter_cutoff, resampling_freq=resampling_freq, month=month)
        # convert time index to seconds
        time_index_values = (time_index.values - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
        # partition data
        windows_small, time_indexes_small = handle_data.partition(acceleration_xyzn, time_index_values, seconds_per_window=seconds_per_window)

        # get angles
        alphas = functional_metrics.get_alphas(windows_small[:, :, :, 1], windows_small[:, :, :, 3])

        # get if window contains a functional movement for different (symmetric) angle thresholds (30)
        test_range = [30]
        is_functional = {i: functional_metrics.test_functional(alphas=alphas, treshold_symregion_degrees=i, treshold_amp_degrees=30) for i in test_range}

        # get functional use counts
        functional_uses = {i: is_functional[i].sum(axis=0) for i in test_range}
        functional_uses_df = pd.DataFrame(functional_uses).T
        functional_uses_df.columns = ['functional_non_paretic', 'functional_paretic']
        functional_uses_df['functional_ratio'] = functional_metrics.get_ratio(functional_uses_df['functional_non_paretic'], functional_uses_df['functional_paretic'])

        # get use hours (considering 30 degrees as threshold)
        use_hours = functional_metrics.get_use_hours(is_functional[30], time_indexes_small)
        # get use hours ratio
        use_hours_ratio = functional_metrics.get_ratio(use_hours[0], use_hours[1])

        # save results
        patient_id = folder_name.split('_')[0]

        # Return results for the current patient for the specified month
        results_global = pd.DataFrame({'month': month,
                                      'FuncUse_non_paretic_month': functional_uses_df.iloc[0, 0],
                                      'FuncUse_paretic_month': functional_uses_df.iloc[0, 1],
                                      'FuncUserRatio_month': functional_uses_df.iloc[0, 1] / (functional_uses_df.iloc[0, 1] + functional_uses_df.iloc[0, 0]),
                                      'UseHours_non_paretic_month': use_hours[0],
                                      'UseHours_paretic_month': use_hours[1],
                                      'UseHoursRatio_month': use_hours_ratio / 2}, index=[patient_id])

        return results_global

    except Exception as e:
        print(f"Error processing {folder_name} for month {month}: {e}")
        return None


# Define months you want to process
months = [1, 2]

# Initialize an empty DataFrame to store all results
all_results = []

# Loop over each month
for month in months:
    # Load participants info for the current month
    participants_info = pd.read_csv(f'../data/participants_{month}.csv', sep=';')
    
    # Process each participant in the current month
    for _, row in participants_info.iterrows():
        folder_name = row['folder_name']
        print(f'-----\nProcessing {folder_name}')
        try:
            # Get participant metrics for this folder_name and month
#            result = functional_metrics.get_participant_metrics(folder_name, month)
            result = get_participant_metrics(folder_name, month)
            
            # If result is not None, append it to the global results list
            if result is not None:
                all_results.append(result)
        
        except Exception as e:
            print(f'Error processing {folder_name}: {e}')

# Concatenate all the results by patient
all_results_df = pd.concat(all_results, axis=0)

all_results_df = pd.read_csv('../results/results_FuncUse_all_patients.csv')
all_results_df.rename(columns={all_results_df.columns[0]: 'ID'}, inplace=True)

# Sort the dataframe by patient (ID), then by month, and finally by day
all_results_df = all_results_df.sort_values(by=['ID', 'month']).reset_index(drop=True)

# Save the final results to a CSV file
all_results_df.to_csv('../results/results_FuncUse_all_patients.csv', index=False)

print("Processing complete. Results saved to 'results_FuncUse_all_patients.csv'.")

-----
Processing C1P20_M1
-----
Processing C1P30_M1
-----
Processing C1P31_M1
-----
Processing C1P32_M1
-----
Processing C1P33_M1
-----
Processing C1P20_M2
-----
Processing C1P30_M2
-----
Processing C1P31_M2
-----
Processing C1P32_M2
-----
Processing C1P33_M2


KeyError: 'ID'

## For a single patient and a single month

### Enter the data information

In [3]:
# Patient's code (CXPXX)
ID = "C1P33"

# Number of the record (X in range of 1 to 6)
month = "1"

# Resampling
resampling_freq = 50 # Hz

# Low pass filter
filter_butter_cutoff = 1 # Hz

# Windowing
seconds_per_window = 0.5 # s

# To calculate the jerk ratio we ignore the moments where one arm is not moving
treshold_removal_JR = 0.01 # m/s^3

### Extract Data

In [None]:
FileName = ID + "_M" + month
FileName

# Load the data
time_index, acceleration_xyzn, is_patient, FM = handle_data.extract_data(FileName, filter_butter_cutoff=filter_butter_cutoff, resampling_freq=resampling_freq, month=month)

In [10]:
acceleration_xyzn.shape

(32399681, 2, 4)

### Implementing Metrics

In [11]:
time_index_values = (time_index.values - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
windows_small, time_indexes_small = handle_data.partition(acceleration_xyzn, time_index_values, seconds_per_window=0.5)

In [12]:
# get jerk
# jerk = functional_metrics.get_jerk(acceleration_xyzn, time_index_values)
# get jerk ratio
# jerk_ratio = functional_metrics.get_jerk_ratio(jerk, treshold_removal=treshold_removal_JR)
# get jerk ratio mean
# jerk_ratio_mean = np.mean(jerk_ratio)

# get angles
alphas = functional_metrics.get_alphas(windows_small[:, :, :, 1], windows_small[:, :, :, 3])

# get if window contains a functional movement for different (symmetric) angle thresholds (10, 20, ..., 80)
test_range = range(10, 90, 10)
is_functional = {i : functional_metrics.test_functional(alphas=alphas, treshold_symregion_degrees=i, treshold_amp_degrees=30) for i in test_range}

# get functional use counts
functional_uses = {i : is_functional[i].sum(axis=0) for i in test_range}
functional_uses_df = pd.DataFrame(functional_uses).T
functional_uses_df.columns = ['non_paretic', 'paretic']

# get use hours (considering 30 degrees as threshold)
use_hours = functional_metrics.get_use_hours(is_functional[30], time_indexes_small)
use_hours
# get use hours ratio
use_hours_ratio = functional_metrics.get_ratio(use_hours[0], use_hours[1])

### FuncUse per month (one patient, one month)

In [None]:
# create a dataframe with the results
results_global = pd.DataFrame({'month' : month,
                        'FuncUse_non_paretic_month' : functional_uses_df.iloc[2, 0],
                        'FuncUse_paretic_month' : functional_uses_df.iloc[2, 1],  
                        'FuncUserRatio_month' : functional_uses_df.iloc[2, 1] / (functional_uses_df.iloc[2, 1] + functional_uses_df.iloc[2, 0]),
                        'UseHours_non_paretic_month' : use_hours[0],
                        'UseHours_paretic_month' : use_hours[1],
                        'UseHoursRatio_month' : use_hours_ratio / 2}, index=[ID])
results_global

# save the results per month
results_global.to_csv('../results/results_FuncUse' + ID + '_M' + month + '.csv', index_label="ID")

### Concatenate individual reports to make a global report of all patients

In [97]:
import os
import pandas as pd
import re

# Define the folder path and the output file
folder_path = '../results/'
output_file = os.path.join(folder_path, 'all_global_results.csv')

# Identify the pattern of the files
file_pattern = re.compile(r'results_C(\w+)_M(\d+)\.csv')

# Recover the list of files
csv_files = [f for f in os.listdir(folder_path) if file_pattern.match(f)]

# Sort the files by patient ID and month
csv_files.sort(key=lambda x: (file_pattern.match(x).group(1), int(file_pattern.match(x).group(2))))

# Load the first file to get the columns
first_file = os.path.join(folder_path, csv_files[0])
df_example = pd.read_csv(first_file)
columns = df_example.columns.tolist()

# Load the global DataFrame if it exists
if os.path.exists(output_file):
    global_df = pd.read_csv(output_file)
else:
    global_df = pd.DataFrame(columns=columns)

# Traits each file
for file in csv_files:
    file_path = os.path.join(folder_path, file)

    try:
        # read the file
        df = pd.read_csv(file_path, skiprows=1, header=None, names=columns)
        if df.empty:
            print(f"***** Warning: {file} is empty and will be skipped. *****")
            continue

        # Recover the patient ID and month
        match = file_pattern.match(file)
        patient_id, month = match.groups()
        month = int(month)

        # Check if the data is already in the global
        if not global_df.empty and ((global_df['ID'] == patient_id) & (global_df['month'] == month)).any():
            print(f"Skipping {file}: data for {patient_id} month {month} is already in {output_file}.")
            continue

        # Add to the global DataFrame
        df.to_csv(output_file, mode='a', header=not os.path.exists(output_file), index=False)

        print(f"Added {file} to {output_file}.")

    except Exception as e:
        print(f"***** Error: Failed to process {file}. Reason: {str(e)} *****")

print("Merging process completed.")


Added results_C1P30_M1.csv to ../results/all_global_results.csv.
Added results_C1P30_M2.csv to ../results/all_global_results.csv.
Added results_C1P30_M3.csv to ../results/all_global_results.csv.
Added results_C1P31_M1.csv to ../results/all_global_results.csv.
Added results_C1P31_M2.csv to ../results/all_global_results.csv.
Added results_C1P32_M1.csv to ../results/all_global_results.csv.
Added results_C1P32_M2.csv to ../results/all_global_results.csv.
Added results_C1P32_M3.csv to ../results/all_global_results.csv.
Added results_C1P33_M1.csv to ../results/all_global_results.csv.
Added results_C1P33_M2.csv to ../results/all_global_results.csv.
Added results_C1P33_M3.csv to ../results/all_global_results.csv.
Merging process completed.


### FuncUsePerDay (one patient, one month)

In [None]:
# get functional count per day
functional_count_per_day = functional_metrics.get_functional_count_per_day(is_functional[30], time_indexes_small)
functional_count_per_day
functional_uses_per_day_df = pd.DataFrame(functional_count_per_day)

# create the dataframe of FuncUse per day for the patient
functional_uses_per_day_df.insert(0, 'ID', ID)
functional_uses_per_day_df.insert(1, 'month', month)
functional_uses_per_day_df.insert(2, 'day', range(1, functional_uses_per_day_df.shape[0] + 1))
functional_uses_per_day_df.columns = ['ID', 'month', 'day', 'FuncUse_non_paretic_day', 'FuncUse_paretic_day']
functional_uses_per_day_df['FuncUseRatio_day'] = functional_uses_per_day_df['FuncUse_paretic_day'] / (functional_uses_per_day_df['FuncUse_paretic_day'] + functional_uses_per_day_df['FuncUse_non_paretic_day'])
print(functional_uses_per_day_df)

# save the results per day
functional_uses_per_day_df.to_csv('../results/results_FuncUsePerDay_' + ID + '_M' + month + '.csv', index=False)

      ID month  day  FuncUse_non_paretic_day  FuncUse_paretic_day  \
0  C1P33     1    1                    201.0                 49.0   
1  C1P33     1    2                    149.0                 34.0   
2  C1P33     1    3                    156.0                 46.0   
3  C1P33     1    4                    187.0                 31.0   
4  C1P33     1    5                    197.0                 22.0   
5  C1P33     1    6                    211.0                 33.0   
6  C1P33     1    7                    196.0                 29.0   
7  C1P33     1    8                      1.0                  1.0   

   FuncUseRatio_day  
0          0.196000  
1          0.185792  
2          0.227723  
3          0.142202  
4          0.100457  
5          0.135246  
6          0.128889  
7          0.500000  
