In [27]:
import numpy as np
import pandas as pd
import zipfile

In [28]:
subjects = np.arange(1, 40)
tasks = np.arange(20, 35)          # is fall
trials = np.arange(1, 12)

In [29]:
sensor_zip_path = 'sensor_data.zip'

In [30]:
sensor_data_list = []

In [31]:
def read_file_from_zip(zip_path, file_name):
    with zipfile.ZipFile(zip_path, 'r') as z:
        if file_name in z.namelist():
            with z.open(file_name) as f:
                return pd.read_csv(f)
        else:
            return None

In [32]:
for i in subjects:
    for j in tasks:
        for k in trials:
            try:
                
                filename = f'S{i:02d}T{j:02d}R{k:02d}.csv'
                
                # sensor data
                data = read_file_from_zip(sensor_zip_path, f'SA{i:02d}/{filename}')
                if data is None:
                    continue  
                
                #  sensor data columns
                sensor_data = data[['AccX', 'AccY', 'AccZ', 'GyrX', 'GyrY', 'GyrZ', 'EulerX', 'EulerY', 'EulerZ']].values
                
                # Appending each row of the sensor data to the list
                for row in sensor_data:
                    sensor_data_list.append(np.append(row, 1))  # Appending 1 to indicate a fall
            
            except Exception as e:
                continue

----------------------------------------ADL DATA------------------------------------

In [33]:
range_array = np.arange(1, 20)
additional_numbers = np.array([35, 36])
# Concatenate the arrays
adl_task = np.concatenate((range_array, additional_numbers))

In [34]:
adl_task

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
       18, 19, 35, 36])

In [35]:
for i in subjects:
    for j in adl_task:
        for k in trials:
            try:
                
                filename = f'S{i:02d}T{j:02d}R{k:02d}.csv'
                
                # sensor data
                data = read_file_from_zip(sensor_zip_path, f'SA{i:02d}/{filename}')
                if data is None:
                    continue  
                
                #  sensor data columns
                sensor_data = data[['AccX', 'AccY', 'AccZ', 'GyrX', 'GyrY', 'GyrZ', 'EulerX', 'EulerY', 'EulerZ']].values
                
                # Appending each row of the sensor data to the list
                for row in sensor_data:
                    sensor_data_list.append(np.append(row, 0))  # Appending 0 to indicate a non-fall
            
            except Exception as e:
                continue

In [36]:
df_sensor_data = pd.DataFrame(sensor_data_list, columns=['AccX', 'AccY', 'AccZ', 'GyrX', 'GyrY', 'GyrZ', 'EulerX', 'EulerY', 'EulerZ','Fall'])

In [37]:
df_sensor_data['Fall'].value_counts()

Fall
0.0    2269693
1.0    1725407
Name: count, dtype: int64

In [41]:
df_sensor_data

Unnamed: 0,AccX,AccY,AccZ,GyrX,GyrY,GyrZ,EulerX,EulerY,EulerZ,Fall
0,-0.250,-0.809,-0.188,-35.122325,14.954204,4.755551,40.657100,-14.782316,56.952025,1.0
1,-0.070,-1.023,0.056,-2.578311,-0.171887,-0.114592,40.628452,-14.782316,56.946296,1.0
2,-0.070,-1.019,0.056,-3.781523,0.859437,-0.286479,40.588345,-14.776587,56.952025,1.0
3,-0.070,-1.019,0.044,-5.385805,1.948057,-0.458366,40.531049,-14.759398,56.957755,1.0
4,-0.069,-1.025,0.028,-6.531721,2.578311,-0.687550,41.310272,-14.656266,56.539495,1.0
...,...,...,...,...,...,...,...,...,...,...
3995095,-0.035,-1.004,0.138,-0.229183,-1.661578,2.578311,98.033114,-3.420559,-29.742250,0.0
3995096,-0.030,-1.000,0.135,-0.630254,-1.375099,2.463719,98.027384,-3.443478,-29.759439,0.0
3995097,-0.024,-0.997,0.131,-1.203212,-1.546987,2.349128,97.947170,-3.374723,-29.776627,0.0
3995098,-0.019,-0.997,0.126,-1.776170,-1.776170,2.349128,97.929981,-3.391911,-29.799546,0.0


In [39]:
df_sensor_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3995100 entries, 0 to 3995099
Data columns (total 10 columns):
 #   Column  Dtype  
---  ------  -----  
 0   AccX    float64
 1   AccY    float64
 2   AccZ    float64
 3   GyrX    float64
 4   GyrY    float64
 5   GyrZ    float64
 6   EulerX  float64
 7   EulerY  float64
 8   EulerZ  float64
 9   Fall    float64
dtypes: float64(10)
memory usage: 304.8 MB


In [40]:
df_sensor_data.isnull().sum()

AccX      0
AccY      0
AccZ      0
GyrX      0
GyrY      0
GyrZ      0
EulerX    0
EulerY    0
EulerZ    0
Fall      0
dtype: int64