In [1]:
import numpy as np
import pandas as pd
import zipfile

In [2]:
# Defining the range of subjects, tasks, and trials
subjects = np.arange(6, 39)
tasks = np.arange(20, 35)         ## FALL
trials = np.arange(1, 12)

In [3]:
sensor_zip_path = 'sensor_data.zip'
label_zip_path = 'label_data.zip'

In [4]:
# Initializing list to hold the sensor data
sensor_data_list = []

In [5]:
# Function to read an Excel file from a zip archive
def read_excel_from_zip(zip_path, file_name):
    with zipfile.ZipFile(zip_path, 'r') as z:
        if file_name in z.namelist():
            with z.open(file_name) as f:
                return pd.read_excel(f)
        else:
            print(f"File {file_name} not found in {zip_path}")
            return None

In [6]:
# Function to read a CSV file from a zip archive
def read_csv_from_zip(zip_path, file_name):
    with zipfile.ZipFile(zip_path, 'r') as z:
        if file_name in z.namelist():
            with z.open(file_name) as f:
                return pd.read_csv(f)
        else:
            print(f"File {file_name} not found in {zip_path}")
            return None

In [7]:
# Function to process label data
def process_label_data(label_file, subject_id):
    label_data = read_excel_from_zip(label_zip_path, label_file)
    if label_data is not None:
        
        label_data['Subject'] = subject_id

        if 'Task Code (Task ID)' in label_data.columns:
            task_ids = label_data['Task Code (Task ID)'].str.extract(r'\((\d+)\)')
            task_ids = task_ids[0].dropna().astype(int)
            label_data['Task'] = task_ids
        else:
            
            label_data['Task'] = np.nan

        if 'Trial ID' in label_data.columns:
            label_data['Trial'] = label_data['Trial ID']
        else:
            
            label_data['Trial'] = np.nan

    
        
    return label_data

In [8]:
# Load all label data
label_data_list = []
for i in subjects:
    label_file = f'SA{i:02d}_label.xlsx'
    label_data = process_label_data(label_file, i)
    if label_data is not None:
        label_data_list.append(label_data)

# Concatenate all label data into a single DataFrame
if label_data_list:
    label_data = pd.concat(label_data_list, ignore_index=True)
else:
    label_data = pd.DataFrame()

File SA34_label.xlsx not found in label_data.zip


In [9]:
label_data.head(15)

Unnamed: 0,Task Code (Task ID),Description,Trial ID,Fall_onset_frame,Fall_impact_frame,Subject,Task,Trial
0,F01 (20),Forward fall when trying to sit down,1,130,208,6,20.0,1
1,,,2,184,272,6,,2
2,,,3,184,260,6,,3
3,,,4,151,231,6,,4
4,,,5,128,223,6,,5
5,F02 (21),Backward fall when trying to sit down,1,122,161,6,21.0,1
6,,,2,128,167,6,,2
7,,,3,147,187,6,,3
8,,,4,130,167,6,,4
9,F03 (22),lateral fall when trying to sit down,1,128,194,6,22.0,1


In [10]:
label_data[['Task Code (Task ID)', 'Description', 'Task']] = label_data[['Task Code (Task ID)', 'Description', 'Task']].ffill()

In [11]:
label_data.head(20)

Unnamed: 0,Task Code (Task ID),Description,Trial ID,Fall_onset_frame,Fall_impact_frame,Subject,Task,Trial
0,F01 (20),Forward fall when trying to sit down,1,130,208,6,20.0,1
1,F01 (20),Forward fall when trying to sit down,2,184,272,6,20.0,2
2,F01 (20),Forward fall when trying to sit down,3,184,260,6,20.0,3
3,F01 (20),Forward fall when trying to sit down,4,151,231,6,20.0,4
4,F01 (20),Forward fall when trying to sit down,5,128,223,6,20.0,5
5,F02 (21),Backward fall when trying to sit down,1,122,161,6,21.0,1
6,F02 (21),Backward fall when trying to sit down,2,128,167,6,21.0,2
7,F02 (21),Backward fall when trying to sit down,3,147,187,6,21.0,3
8,F02 (21),Backward fall when trying to sit down,4,130,167,6,21.0,4
9,F03 (22),lateral fall when trying to sit down,1,128,194,6,22.0,1


In [12]:
for i in subjects:
    for j in tasks:
        for k in trials:
            try:
                filename = f'S{i:02d}T{j:02d}R{k:02d}.csv'
                data = read_csv_from_zip(sensor_zip_path, f'SA{i:02d}/{filename}')
                if data is None:
                    continue
                
                sensor_data = data[['TimeStamp(s)', 'FrameCounter', 'AccX', 'AccY', 'AccZ', 'GyrX', 'GyrY', 'GyrZ', 'EulerX', 'EulerY', 'EulerZ']]
                
                label_subset = label_data[(label_data['Subject'] == i) & (label_data['Task'] == j) & (label_data['Trial'] == k)]
                for _, label_row in label_subset.iterrows():
                    if 'Fall_onset_frame' in label_row and 'Fall_impact_frame' in label_row:
                        frame_onset = label_row['Fall_onset_frame']
                        frame_impact = label_row['Fall_impact_frame']
                        relevant_sensor_data = sensor_data[(sensor_data['FrameCounter'] >= frame_onset) & (sensor_data['FrameCounter'] <= frame_impact)]
                        
                        for sensor_row in relevant_sensor_data.values:
                            sensor_data_list.append(np.append(sensor_row, 1))  # Append 1 to indicate a fall
                    

            except Exception as e:
                
                continue

File SA06/S06T20R06.csv not found in sensor_data.zip
File SA06/S06T20R07.csv not found in sensor_data.zip
File SA06/S06T20R08.csv not found in sensor_data.zip
File SA06/S06T20R09.csv not found in sensor_data.zip
File SA06/S06T20R10.csv not found in sensor_data.zip
File SA06/S06T20R11.csv not found in sensor_data.zip
File SA06/S06T21R05.csv not found in sensor_data.zip
File SA06/S06T21R06.csv not found in sensor_data.zip
File SA06/S06T21R07.csv not found in sensor_data.zip
File SA06/S06T21R08.csv not found in sensor_data.zip
File SA06/S06T21R09.csv not found in sensor_data.zip
File SA06/S06T21R10.csv not found in sensor_data.zip
File SA06/S06T21R11.csv not found in sensor_data.zip
File SA06/S06T22R06.csv not found in sensor_data.zip
File SA06/S06T22R07.csv not found in sensor_data.zip
File SA06/S06T22R08.csv not found in sensor_data.zip
File SA06/S06T22R09.csv not found in sensor_data.zip
File SA06/S06T22R10.csv not found in sensor_data.zip
File SA06/S06T22R11.csv not found in sensor_da

In [13]:
df_sensor_data = pd.DataFrame(sensor_data_list, columns=['TimeStamp(s)', 'FrameCounter', 'AccX', 'AccY', 'AccZ', 'GyrX', 'GyrY', 'GyrZ', 'EulerX', 'EulerY', 'EulerZ', 'Fall'])

In [19]:
df_sensor_data

Unnamed: 0,TimeStamp(s),FrameCounter,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,EulerX,EulerY,EulerZ,Fall
0,1.30,130.0,-0.043,-0.729,-0.159,25.439335,0.401071,-0.802141,19.033665,-12.163898,51.995939,1.0
1,1.31,131.0,-0.043,-0.719,-0.142,27.731167,0.171887,-1.718874,19.314414,-12.152439,51.978750,1.0
2,1.32,132.0,-0.042,-0.709,-0.136,29.965703,2.750198,-2.005353,19.623811,-12.118062,51.967291,1.0
3,1.33,133.0,-0.049,-0.711,-0.157,31.971056,2.750198,-0.974029,19.944668,-12.083684,51.967291,1.0
4,1.34,134.0,-0.062,-0.726,-0.151,33.575339,2.005353,0.401071,20.282713,-12.060766,51.978750,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
173333,4.67,467.0,0.895,-3.884,1.057,200.019638,-69.671693,41.825934,124.853278,6.978628,-72.490646,1.0
173334,4.68,468.0,-0.045,-3.997,1.733,173.434387,-47.269035,49.732754,126.543504,6.858307,-73.132359,1.0
173335,4.69,469.0,-0.452,-3.324,1.912,123.930815,-25.324744,-38.159003,127.809741,7.316674,-73.075063,1.0
173336,4.70,470.0,-0.637,-3.173,2.092,164.209763,-18.105473,-81.302740,129.528615,8.061519,-72.685452,1.0


In [20]:
df_sensor_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 173338 entries, 0 to 173337
Data columns (total 12 columns):
 #   Column        Non-Null Count   Dtype  
---  ------        --------------   -----  
 0   TimeStamp(s)  173338 non-null  float64
 1   FrameCounter  173338 non-null  float64
 2   AccX          173338 non-null  float64
 3   AccY          173338 non-null  float64
 4   AccZ          173338 non-null  float64
 5   GyrX          173338 non-null  float64
 6   GyrY          173338 non-null  float64
 7   GyrZ          173338 non-null  float64
 8   EulerX        173338 non-null  float64
 9   EulerY        173338 non-null  float64
 10  EulerZ        173338 non-null  float64
 11  Fall          173338 non-null  float64
dtypes: float64(12)
memory usage: 15.9 MB


In [21]:
df_sensor_data.head(80)

Unnamed: 0,TimeStamp(s),FrameCounter,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,EulerX,EulerY,EulerZ,Fall
0,1.30,130.0,-0.043,-0.729,-0.159,25.439335,0.401071,-0.802141,19.033665,-12.163898,51.995939,1.0
1,1.31,131.0,-0.043,-0.719,-0.142,27.731167,0.171887,-1.718874,19.314414,-12.152439,51.978750,1.0
2,1.32,132.0,-0.042,-0.709,-0.136,29.965703,2.750198,-2.005353,19.623811,-12.118062,51.967291,1.0
3,1.33,133.0,-0.049,-0.711,-0.157,31.971056,2.750198,-0.974029,19.944668,-12.083684,51.967291,1.0
4,1.34,134.0,-0.062,-0.726,-0.151,33.575339,2.005353,0.401071,20.282713,-12.060766,51.978750,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
75,2.05,205.0,0.655,0.588,-2.426,-327.731976,0.859437,31.569986,-43.779721,-9.150139,50.827104,1.0
76,2.06,206.0,0.205,1.331,-3.199,-278.113813,-26.470660,23.090207,-46.661700,-9.178787,51.216716,1.0
77,2.07,207.0,-0.477,2.461,-3.613,-215.718687,-40.164356,-1.031324,-48.896236,-9.465266,51.531843,1.0
78,2.08,208.0,-0.380,3.418,-3.463,-96.543423,-43.372921,-26.757139,-49.898912,-9.958010,51.692271,1.0


In [22]:
df_sensor_data.tail(80)

Unnamed: 0,TimeStamp(s),FrameCounter,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,EulerX,EulerY,EulerZ,Fall
173258,4.96,496.0,0.125,-0.869,0.089,101.127087,-7.448454,26.184181,105.263844,-2.589770,-78.976531,1.0
173259,4.97,497.0,0.010,-0.870,0.024,97.059085,-6.130651,25.783110,106.260791,-2.813224,-79.091122,1.0
173260,4.98,498.0,-0.043,-0.818,0.014,95.111028,-6.302538,25.267448,107.229090,-3.030948,-79.211444,1.0
173261,4.99,499.0,-0.088,-0.749,0.086,98.205001,-6.417130,26.699843,108.237496,-3.260131,-79.337494,1.0
173262,5.00,500.0,-0.040,-0.673,0.180,101.757341,-1.718874,28.705196,109.274550,-3.517962,-79.434897,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...
173333,4.67,467.0,0.895,-3.884,1.057,200.019638,-69.671693,41.825934,124.853278,6.978628,-72.490646,1.0
173334,4.68,468.0,-0.045,-3.997,1.733,173.434387,-47.269035,49.732754,126.543504,6.858307,-73.132359,1.0
173335,4.69,469.0,-0.452,-3.324,1.912,123.930815,-25.324744,-38.159003,127.809741,7.316674,-73.075063,1.0
173336,4.70,470.0,-0.637,-3.173,2.092,164.209763,-18.105473,-81.302740,129.528615,8.061519,-72.685452,1.0
