# First analysis
Author: Marion Granier  
Date: 2025-03-04 
  
This script uses part of Victor Fernando Lopes De Souza's script.

In [None]:
# Load the autoreload extension
%load_ext autoreload

In [None]:
# Set extension to autoreload all modules every time before executing the Python code
%autoreload 2

# Importing the necessary libraries
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 10000
import matplotlib.pyplot as plt

import numpy as np
import pandas as pd

import handle_data, functional_metrics
import os

  """
  """
  """


## Check if all files are present

In [None]:
# Definition of the base path and the list of patients
base_path = "../data/data_actimetry_copy"
participants_info = pd.read_csv('../data/participants_1.csv', sep=';')
patients = participants_info['folder_name'].str.split('_M').str[0].tolist()
# patients = ["C1P20", "C1P30", "C1P31", "C1P32", "C1P33"]
print(patients)
months = [f"M{i}" for i in range(1, 7)]  # M1 à M6

# Function to check if the files are present
def check_files():
    """
    Check if the files are present in the base path for each patient and each month.
    """
    for patient in patients:
        for month in months:
            folder_name = f"{patient}_{month}"
            folder_path = os.path.join(base_path, folder_name)
            
            if not os.path.exists(folder_path):
                print(f"Missing file: {folder_name}")
                continue
            
            left_file = os.path.join(folder_path, "left.csv")
            right_file = os.path.join(folder_path, "right.csv")
            
            missing_files = []
            if not os.path.isfile(left_file):
                missing_files.append("left.csv")
            if not os.path.isfile(right_file):
                missing_files.append("right.csv")
            
            if missing_files:
                print(f"***** Missing files in {folder_name} : {', '.join(missing_files)} *****")
            else:
                print(f"All files are present in {folder_name}.")

# Check if the files are present
check_files()

['C1P20', 'C1P30', 'C1P31', 'C1P32', 'C1P33']
Fichiers manquants dans C1P20_M1 : right.csv
Dossier manquant : C1P20_M2
Dossier manquant : C1P20_M3
Dossier manquant : C1P20_M4
Dossier manquant : C1P20_M5
Dossier manquant : C1P20_M6
Dossier manquant : C1P30_M1
Dossier manquant : C1P30_M2
Dossier manquant : C1P30_M3
Dossier manquant : C1P30_M4
Dossier manquant : C1P30_M5
Dossier manquant : C1P30_M6
Dossier manquant : C1P31_M1
Dossier manquant : C1P31_M2
Dossier manquant : C1P31_M3
Dossier manquant : C1P31_M4
Dossier manquant : C1P31_M5
Dossier manquant : C1P31_M6
Dossier manquant : C1P32_M1
Dossier manquant : C1P32_M2
Dossier manquant : C1P32_M3
Dossier manquant : C1P32_M4
Dossier manquant : C1P32_M5
Dossier manquant : C1P32_M6
Dossier manquant : C1P33_M1
Dossier manquant : C1P33_M2
Dossier manquant : C1P33_M3
Dossier manquant : C1P33_M4
Dossier manquant : C1P33_M5
Dossier manquant : C1P33_M6


## First analysis for a single patient

### Enter the data information

In [None]:
# Patient's code (CXPXX)
ID = "C0P00"

# Number of the record (X in range of 1 to 6)
month = "1"

# Resampling
resampling_freq = 50 # Hz

# Low pass filter
filter_butter_cutoff = 1 # Hz

# Windowing
seconds_per_window = 0.5 # s

# To calculate the jerk ratio we ignore the moments where one arm is not moving
treshold_removal_JR = 0.01 # m/s^3

### Extract Data

In [None]:
FileName = ID + "_M" + month
FileName

# Load the data
time_index, acceleration_xyzn, is_patient, FM = handle_data.extract_data(FileName, filter_butter_cutoff=filter_butter_cutoff, resampling_freq=resampling_freq)

In [10]:
acceleration_xyzn.shape

(12959783, 2, 4)

### Implementing Metrics

In [11]:
time_index_values = (time_index.values - np.datetime64('1970-01-01T00:00:00')) / np.timedelta64(1, 's')
windows_small, time_indexes_small = handle_data.partition(acceleration_xyzn, time_index_values, seconds_per_window=0.5)

In [None]:
# get jerk
# jerk = functional_metrics.get_jerk(acceleration_xyzn, time_index_values)
# get jerk ratio
# jerk_ratio = functional_metrics.get_jerk_ratio(jerk, treshold_removal=treshold_removal_JR)
# get jerk ratio mean
# jerk_ratio_mean = np.mean(jerk_ratio)

# get angles
alphas = functional_metrics.get_alphas(windows_small[:, :, :, 1], windows_small[:, :, :, 3])

# get if window contains a functional movement for different (symmetric) angle thresholds (10, 20, ..., 80)
test_range = range(10, 90, 10)
is_functional = {i : functional_metrics.test_functional(alphas=alphas, treshold_symregion_degrees=i, treshold_amp_degrees=30) for i in test_range}

# get functional use counts
functional_uses = {i : is_functional[i].sum(axis=0) for i in test_range}
functional_uses_df = pd.DataFrame(functional_uses).T
functional_uses_df.columns = ['non_paretic', 'paretic']

# get use hours (considering 30 degrees as threshold)
use_hours = functional_metrics.get_use_hours(is_functional[30], time_indexes_small)
use_hours
# get use hours ratio
use_hours_ratio = functional_metrics.get_ratio(use_hours[0], use_hours[1])

In [None]:
def get_functional_count_per_day(is_functional, time_indexes):
    # get first element of each window
    start = time_indexes[:, 0]
    
    # get differences
    delta_windowing = np.diff(start).mean()
    
    # get the second when each window starts
    time_after_beginning = np.cumsum(delta_windowing * np.ones(start.shape[0]))
    second_of_measurement = np.floor(time_after_beginning).astype(int)

    # convert seconds to days
    day_of_measurement = second_of_measurement // (24 * 3600)

    # one row per day, one column per arm
    functional_count_day = np.zeros((day_of_measurement[-1] + 1, is_functional.shape[1]))
    
    # the functional count in each day is the number of functional movements that started in that day
    for i in range(is_functional.shape[0]):
        for j in range(is_functional.shape[1]):
            functional_count_day[day_of_measurement[i], j] += is_functional[i, j]

    return functional_count_day

# Use the new function to get functional count per day
functional_count_per_day = get_functional_count_per_day(is_functional[30], time_indexes_small)
functional_count_per_day
functional_uses_per_day_df = pd.DataFrame(functional_count_per_day)

# add a column for the month in the first column
functional_uses_per_day_df.insert(0, 'ID', ID)
functional_uses_per_day_df.insert(1, 'month', month)
functional_uses_per_day_df.insert(2, 'day', range(1, functional_uses_per_day_df.shape[0] + 1))

functional_uses_per_day_df.columns = ['ID', 'month', 'day', 'FuncUse_non_paretic_day', 'FuncUse_paretic_day']

functional_uses_per_day_df['FuncUseRatio_day'] = functional_uses_per_day_df['FuncUse_paretic_day'] / (functional_uses_per_day_df['FuncUse_paretic_day'] + functional_uses_per_day_df['FuncUse_non_paretic_day'])

print(functional_uses_per_day_df)

# save the results per day
functional_uses_per_day_df.to_csv('../results/results_FuncUsePerDay_' + ID + '_M' + month + '.csv', index=False)

      ID month  day  FuncUse_non_paretic_day  FuncUse_paretic_day  \
0  C0P00     1    1                    435.0                368.0   
1  C0P00     1    2                    288.0                298.0   
2  C0P00     1    3                    381.0                382.0   

   FuncUseRatio_day  
0          0.458281  
1          0.508532  
2          0.500655  


### Make the report for the patient

In [None]:
# create a dataframe with the results
results_global = pd.DataFrame({'month' : month,
                        'FuncUse_non_paretic_month' : functional_uses_df.iloc[2, 0],
                        'FuncUse_paretic_month' : functional_uses_df.iloc[2, 1],  
                        'FuncUserRatio_month' : functional_uses_df.iloc[2, 1] / (functional_uses_df.iloc[2, 1] + functional_uses_df.iloc[2, 0]),
                        'UseHours_non_paretic_month' : use_hours[0],
                        'UseHours_paretic_month' : use_hours[1],
                        'UseHoursRatio_month' : use_hours_ratio / 2}, index=[ID])
results_global

# save the results per month
results_global.to_csv('../results/results_' + ID + '_M' + month + '.csv', index_label="ID")

### Make the global report of all patients

In [None]:
import os
import pandas as pd
import re

# Define the folder path and the output file
folder_path = '../results/'
output_file = os.path.join(folder_path, 'all_global_results.csv')

# Identify the pattern of the files
file_pattern = re.compile(r'results_C(\w+)_M(\d+)\.csv')

# Recover the list of files
csv_files = [f for f in os.listdir(folder_path) if file_pattern.match(f)]

# Sort the files by patient ID and month
csv_files.sort(key=lambda x: (file_pattern.match(x).group(1), int(file_pattern.match(x).group(2))))

# Load the first file to get the columns
first_file = os.path.join(folder_path, csv_files[0])
df_example = pd.read_csv(first_file)
columns = df_example.columns.tolist()

# Load the global DataFrame if it exists
if os.path.exists(output_file):
    global_df = pd.read_csv(output_file)
else:
    global_df = pd.DataFrame(columns=columns)

# Traits each file
for file in csv_files:
    file_path = os.path.join(folder_path, file)

    try:
        # read the file
        df = pd.read_csv(file_path, skiprows=1, header=None, names=columns)
        if df.empty:
            print(f"***** Warning: {file} is empty and will be skipped. *****")
            continue

        # Recover the patient ID and month
        match = file_pattern.match(file)
        patient_id, month = match.groups()
        month = int(month)

        # Check if the data is already in the global
        if not global_df.empty and ((global_df['ID'] == patient_id) & (global_df['month'] == month)).any():
            print(f"Skipping {file}: data for {patient_id} month {month} is already in {output_file}.")
            continue

        # Add to the global DataFrame
        df.to_csv(output_file, mode='a', header=not os.path.exists(output_file), index=False)

        print(f"Added {file} to {output_file}.")

    except Exception as e:
        print(f"***** Error: Failed to process {file}. Reason: {str(e)} *****")

print("Merging process completed.")


✅ Added results_C0P00_M1.csv to ../results/dataOut4.csv.
✅ Added results_C0P00_M2.csv to ../results/dataOut4.csv.
✅ Merging process completed.
