## Kids Data frame 

In [1]:
#Librairie Importation:
import os
import pandas as pd


In [8]:
import os
import pandas as pd

def load_and_concat_sensor_data(root_dir, i,label):
    concatenated_data_frames = []

    # Get the dynamic folder that matches the pattern *_kid
    dynamic_folder = None
    for d in os.listdir(root_dir):
        if os.path.isdir(os.path.join(root_dir, d)) and '_kid' in d:
            dynamic_folder = d
            break

    if dynamic_folder is None:
        print("No folder matching the pattern '*_kid' found.")
        return concatenated_data_frames
    else:
        # Construct the full path to the dynamic folder
        dynamic_folder_path = os.path.join(root_dir, dynamic_folder).replace("\\", "/")

        # Sensor folders to process
        sensor_folders = ['Gyroscope','rotation', 'LinearAcceleration']

        # Construct paths to the sensor folders
        sensor_paths = {sensor: os.path.join(dynamic_folder_path, sensor).replace("\\", "/") for sensor in sensor_folders}

        # Check if sensor folders exist
        if not all(os.path.exists(sensor_path) for sensor_path in sensor_paths.values()):
            print(f"One or more sensor folders not found: {sensor_paths}")
            return concatenated_data_frames

        # Get the list of .txt files in each sensor folder (assuming they have the same number of files)
        sensor_files = {sensor: sorted([f for f in os.listdir(sensor_paths[sensor]) if f.endswith('.txt')]) for sensor in sensor_folders}

        # Iterate through the files and process them
        for Gyroscope_file,rotation_file, linear_acc_file in zip(sensor_files['Gyroscope'],sensor_files['rotation'], sensor_files['LinearAcceleration']):
            # Construct the full file paths
            Gyroscope_file_path = os.path.join(sensor_paths['Gyroscope'], Gyroscope_file).replace("\\", "/")
            rotation_file_path = os.path.join(sensor_paths['rotation'], rotation_file).replace("\\", "/")
            linear_acc_file_path = os.path.join(sensor_paths['LinearAcceleration'], linear_acc_file).replace("\\", "/")
             # Read the Gyroscope file into a DataFrame
            try:
                Gyroscope_df = pd.read_csv(Gyroscope_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                Gyroscope_df.columns = ['gy_timestamp', 'gy_internal_ts', 'gy_x', 'gy_y', 'gy_z', 'gy_app_id']
                # Calculate magnitude of gyroscope vector
                Gyroscope_df['gy_mag'] = (Gyroscope_df['gy_x']**2 + Gyroscope_df['gy_y']**2 + Gyroscope_df['gy_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {Gyroscope_file_path}: {e}")
                continue
            
            # Read the rotation file into a DataFrame
            try:
                rotation_df = pd.read_csv(rotation_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                rotation_df.columns = ['ro_timestamp', 'ro_internal_ts', 'ro_x', 'ro_y', 'ro_z', 'ro_app_id']
                # Calculate magnitude of rotation vector
                rotation_df['ro_mag'] = (rotation_df['ro_x']**2 + rotation_df['ro_y']**2 + rotation_df['ro_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {rotation_file_path}: {e}")
                continue
            
            # Read the linear acceleration file into a DataFrame
            try:
                linear_acc_df = pd.read_csv(linear_acc_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                linear_acc_df.columns = ['la_timestamp', 'la_internal_ts', 'la_x', 'la_y', 'la_z', 'la_app_id']
                # Calculate magnitude of linear acceleration vector
                linear_acc_df['la_mag'] = (linear_acc_df['la_x']**2 + linear_acc_df['la_y']**2 + linear_acc_df['la_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {linear_acc_file_path}: {e}")
                continue

            # Concatenate the two DataFrames along columns
            concatenated_df = pd.concat([Gyroscope_df,rotation_df, linear_acc_df], axis=1)
            
            # Add 'id' and 'label' columns
            concatenated_df['id'] = i
            concatenated_df['label'] = label
            
            # Append the concatenated DataFrame to the list
            concatenated_data_frames.append(concatenated_df)

    return concatenated_data_frames

# Example usage to load data for Kid1 to Kid25
root_base_dir = 'D:/internship/code/KidsOnThePhone_dataset/Kids'
kids_sensors = {}
kid_label=[1,1,1,1,1,0,1,1,0,0,1,0,0,1,1,1,1,1,1,0,0,0,0,0,0]
for i in range(1, 26):
    root_dir = os.path.join(root_base_dir, f'Kid{i}', 'Sensors')
    kids_sensors[f"kid{i}_sensors"] = load_and_concat_sensor_data(root_dir, i,kid_label[i-1])

# Display the first concatenated DataFrame for kid1_sensors
if "kid1_sensors" in kids_sensors and kids_sensors["kid1_sensors"]:
    print(kids_sensors["kid1_sensors"][23].head(1))


   gy_timestamp  gy_internal_ts     gy_x      gy_y      gy_z  gy_app_id  \
0  1.498776e+12    5.338068e+14 -0.00087 -0.046463  0.036911      105.0   

     gy_mag  ro_timestamp  ro_internal_ts      ro_x  ...    ro_mag  \
0  0.059346  1.498776e+12    5.338069e+14  0.309998  ...  0.349349   

    la_timestamp   la_internal_ts      la_x      la_y      la_z  la_app_id  \
0  1498776153671  533806775477233  0.177094 -0.224823 -0.130081      105.0   

    la_mag  id  label  
0  0.31437   1      1  

[1 rows x 23 columns]


In [16]:
print(kids_sensors["kid25_sensors"][22].head(1))


    gy_timestamp  gy_internal_ts      gy_x      gy_y      gy_z  gy_app_id  \
0  1504801180245    828221169497 -0.048706 -0.081284  0.026337       95.0   

     gy_mag  ro_timestamp  ro_internal_ts      ro_x  ...    ro_mag  \
0  0.098351  1.504801e+12    8.282212e+11  0.223831  ...  0.334444   

   la_timestamp  la_internal_ts      la_x      la_y      la_z  la_app_id  \
0  1.504801e+12    8.282413e+11 -0.017242  0.178253  0.183762       95.0   

     la_mag  id  label  
0  0.256593  25      0  

[1 rows x 23 columns]


In [17]:
print(len(kids_sensors["kid1_sensors"]))


23


## Data Preprocessing : 

In [18]:
# Example usage assuming `kids_sensors` dictionary is populated as before

for key, data_frames in kids_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Count duplicates
        num_duplicates = df.duplicated().sum()
        print(f"Number of duplicates: {num_duplicates}")
        
        # Count null values
        num_null_values = df.isnull().sum().sum()
        print(f"Number of null values: {num_null_values}")
        
        print("-" * 30)


Processing kid1_sensors:
DataFrame 1:
Number of duplicates: 0
Number of null values: 56
------------------------------
DataFrame 2:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 3:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 4:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 5:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 6:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 7:
Number of duplicates: 0
Number of null values: 28
------------------------------
DataFrame 8:
Number of duplicates: 0
Number of null values: 28
------------------------------
DataFrame 9:
Number of duplicates: 0
Number of null values: 28
------------------------------
DataFrame 10:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 11:
Number of duplicates: 0
Nu

In [19]:
# Example usage assuming `kids_sensors` dictionary is populated as before

for key, data_frames in kids_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Drop duplicates and null values inplace
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        
        # Count after dropping
        num_rows_after_cleaning = len(df)
        print(f"Number of rows after cleaning: {num_rows_after_cleaning}")
        
        print("-" * 30)
    
    # No need to append, directly modify the original list in kids_sensors[key]

    print(f"Finished processing {key}")
    print("=" * 40)


Processing kid1_sensors:
DataFrame 1:
Number of rows after cleaning: 1730
------------------------------
DataFrame 2:
Number of rows after cleaning: 1738
------------------------------
DataFrame 3:
Number of rows after cleaning: 1738
------------------------------
DataFrame 4:
Number of rows after cleaning: 1737
------------------------------
DataFrame 5:
Number of rows after cleaning: 1738
------------------------------
DataFrame 6:
Number of rows after cleaning: 1738
------------------------------
DataFrame 7:
Number of rows after cleaning: 1737
------------------------------
DataFrame 8:
Number of rows after cleaning: 1738
------------------------------
DataFrame 9:
Number of rows after cleaning: 1737
------------------------------
DataFrame 10:
Number of rows after cleaning: 1738
------------------------------
DataFrame 11:
Number of rows after cleaning: 1738
------------------------------
DataFrame 12:
Number of rows after cleaning: 1737
------------------------------
DataFrame 13

In [20]:
# Example usage assuming `kids_sensors` dictionary is populated as before

for key, data_frames in kids_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Count duplicates
        num_duplicates = df.duplicated().sum()
        print(f"Number of duplicates: {num_duplicates}")
        
        # Count null values
        num_null_values = df.isnull().sum().sum()
        print(f"Number of null values: {num_null_values}")
        
        print("-" * 30)


Processing kid1_sensors:
DataFrame 1:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 2:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 3:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 4:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 5:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 6:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 7:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 8:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 9:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 10:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 11:
Number of duplicates: 0
Number

In [21]:
print(kids_sensors["kid2_sensors"][2].head(30))

     gy_timestamp   gy_internal_ts      gy_x      gy_y      gy_z  gy_app_id  \
0   1499123805645  881458752269552  0.135696  0.019409  0.059326       99.0   
1   1499123805664  881458772411154  0.088272 -0.037933  0.053543       99.0   
2   1499123805680  881458792552756  0.033981 -0.040024  0.022568       99.0   
3   1499123805696  881458812694357  0.009003  0.007797  0.048767       99.0   
4   1499123805733  881458832835959  0.035065  0.145264  0.123367       99.0   
5   1499123805749  881458852977560  0.032501  0.191391  0.232819       99.0   
6   1499123805767  881458873119162  0.018036  0.181213  0.283493       99.0   
7   1499123805790  881458893260763 -0.032227  0.183075  0.268661       99.0   
8   1499123805802  881458913402365 -0.062943  0.154221  0.222885       99.0   
9   1499123805822  881458933543967 -0.063919  0.112686  0.182983       99.0   
10  1499123805839  881458953685568 -0.127884  0.112762  0.191696       99.0   
11  1499123805856  881458973827170 -0.159531  0.1268

In [22]:
import numpy as np
import pandas as pd

# Initialize the dictionary to store extracted features
extracted_features = {}

# Iterate through each key (e.g., 'kid1_sensors', 'kid2_sensors', etc.) in kids_sensors
for key, data_frames in kids_sensors.items():
    # Initialize lists to store feature values for each DataFrame
    ids = []
    ro_x_means = []
    ro_y_means = []
    ro_z_means = []
    ro_x_maxs = []
    ro_y_maxs = []
    ro_z_maxs = []
    ro_x_mins = []
    ro_y_mins = []
    ro_z_mins = []
    ro_x_rmse = []
    ro_y_rmse = []
    ro_z_rmse = []
    ro_x_stds = []
    ro_y_stds = []
    ro_z_stds = []
    la_x_means = []
    la_y_means = []
    la_z_means = []
    la_x_maxs = []
    la_y_maxs = []
    la_z_maxs = []
    la_x_mins = []
    la_y_mins = []
    la_z_mins = []
    la_x_rmse = []
    la_y_rmse = []
    la_z_rmse = []
    la_x_stds = []
    la_y_stds = []
    la_z_stds = []
    ro_mag_means = []
    ro_mag_maxs = []
    ro_mag_mins = []
    ro_mag_rmse = []
    ro_mag_stds = []
    la_mag_means = []
    la_mag_maxs = []
    la_mag_mins = []
    la_mag_rmse = []
    la_mag_stds = []
    
    gy_x_means = []
    gy_y_means = []
    gy_z_means = []
    gy_x_maxs = []
    gy_y_maxs = []
    gy_z_maxs = []
    gy_x_mins = []
    gy_y_mins = []
    gy_z_mins = []
    gy_x_rmse = []
    gy_y_rmse = []
    gy_z_rmse = []
    gy_x_stds = []
    gy_y_stds = []
    gy_z_stds = []
    gy_mag_means = []
    gy_mag_maxs = []
    gy_mag_mins = []
    gy_mag_rmse = []
    gy_mag_stds = []
    labels = []
    
    # Iterate through each DataFrame in the current key's list of DataFrames
    for df in data_frames:
        # Extract id and label (assuming they are the same for rotation and linear acceleration)
        ids.append(df['id'].iloc[0])
        
        
        #Extract features for gyroscope:
        gy_x_means.append(df['gy_x'].mean())
        gy_y_means.append(df['gy_y'].mean())
        gy_z_means.append(df['gy_z'].mean())
        gy_x_maxs.append(df['gy_x'].max())
        gy_y_maxs.append(df['gy_y'].max())
        gy_z_maxs.append(df['gy_z'].max())
        gy_x_mins.append(df['gy_x'].min())
        gy_y_mins.append(df['gy_y'].min())
        gy_z_mins.append(df['gy_z'].min())
        gy_x_rmse.append(np.sqrt(np.mean(df['gy_x']**2)))
        gy_y_rmse.append(np.sqrt(np.mean(df['gy_y']**2)))
        gy_z_rmse.append(np.sqrt(np.mean(df['gy_z']**2)))
        gy_x_stds.append(df['gy_x'].std())
        gy_y_stds.append(df['gy_y'].std())
        gy_z_stds.append(df['gy_z'].std())

        gy_mag_means.append(df['gy_mag'].mean())
        gy_mag_maxs.append(df['gy_mag'].max())
        gy_mag_mins.append(df['gy_mag'].min())
        gy_mag_rmse.append(np.sqrt(np.mean(df['gy_mag']**2)))
        gy_mag_stds.append(df['gy_mag'].std())

        # Extract features for rotation
        ro_x_means.append(df['ro_x'].mean())
        ro_y_means.append(df['ro_y'].mean())
        ro_z_means.append(df['ro_z'].mean())
        ro_x_maxs.append(df['ro_x'].max())
        ro_y_maxs.append(df['ro_y'].max())
        ro_z_maxs.append(df['ro_z'].max())
        ro_x_mins.append(df['ro_x'].min())
        ro_y_mins.append(df['ro_y'].min())
        ro_z_mins.append(df['ro_z'].min())
        ro_x_rmse.append(np.sqrt(np.mean(df['ro_x']**2)))
        ro_y_rmse.append(np.sqrt(np.mean(df['ro_y']**2)))
        ro_z_rmse.append(np.sqrt(np.mean(df['ro_z']**2)))
        ro_x_stds.append(df['ro_x'].std())
        ro_y_stds.append(df['ro_y'].std())
        ro_z_stds.append(df['ro_z'].std())
        ro_mag_means.append(df['ro_mag'].mean())
        ro_mag_maxs.append(df['ro_mag'].max())
        ro_mag_mins.append(df['ro_mag'].min())
        ro_mag_rmse.append(np.sqrt(np.mean(df['ro_mag']**2)))
        ro_mag_stds.append(df['ro_mag'].std())
        
        # Extract features for linear acceleration
        la_x_means.append(df['la_x'].mean())
        la_y_means.append(df['la_y'].mean())
        la_z_means.append(df['la_z'].mean())
        la_x_maxs.append(df['la_x'].max())
        la_y_maxs.append(df['la_y'].max())
        la_z_maxs.append(df['la_z'].max())
        la_x_mins.append(df['la_x'].min())
        la_y_mins.append(df['la_y'].min())
        la_z_mins.append(df['la_z'].min())
        la_x_rmse.append(np.sqrt(np.mean(df['la_x']**2)))
        la_y_rmse.append(np.sqrt(np.mean(df['la_y']**2)))
        la_z_rmse.append(np.sqrt(np.mean(df['la_z']**2)))
        la_x_stds.append(df['la_x'].std())
        la_y_stds.append(df['la_y'].std())
        la_z_stds.append(df['la_z'].std())

        # Compute la_mag and extract features
        #la_mag = np.sqrt(df['la_x']**2 + df['la_y']**2 + df['la_z']**2)
        la_mag_means.append(df['la_mag'].mean())
        la_mag_maxs.append(df['la_mag'].max())
        la_mag_mins.append(df['la_mag'].min())
        la_mag_rmse.append(np.sqrt(np.mean(df['la_mag']**2)))
        la_mag_stds.append(df['la_mag'].std())

        # Extract label (assuming it's the same for all rows in this DataFrame)
        labels.append(df['label'].iloc[0])
    
    # Create a DataFrame for extracted features for the current key
    extracted_df = pd.DataFrame({
        'id': ids,
        'gy_x_mean': gy_x_means,
        'gy_y_mean': gy_y_means,
        'gy_z_mean': gy_z_means,
        'gy_x_max': gy_x_maxs,
        'gy_y_max': gy_y_maxs,
        'gy_z_max': gy_z_maxs,
        'gy_x_min': gy_x_mins,
        'gy_y_min': gy_y_mins,
        'gy_z_min': gy_z_mins,
        'gy_x_rmse': gy_x_rmse,
        'gy_y_rmse': gy_y_rmse,
        'gy_z_rmse': gy_z_rmse,
        'gy_x_std': gy_x_stds,
        'gy_y_std': gy_y_stds,
        'gy_z_std': gy_z_stds,
        'gy_mag_mean': gy_mag_means,
        'gy_mag_max': gy_mag_maxs,
        'gy_mag_min': gy_mag_mins,
        'gy_mag_rmse': gy_mag_rmse,
        'gy_mag_std': gy_mag_stds,
        'ro_x_mean': ro_x_means,
        'ro_y_mean': ro_y_means,
        'ro_z_mean': ro_z_means,
        'ro_x_max': ro_x_maxs,
        'ro_y_max': ro_y_maxs,
        'ro_z_max': ro_z_maxs,
        'ro_x_min': ro_x_mins,
        'ro_y_min': ro_y_mins,
        'ro_z_min': ro_z_mins,
        'ro_x_rmse': ro_x_rmse,
        'ro_y_rmse': ro_y_rmse,
        'ro_z_rmse': ro_z_rmse,
        'ro_x_std': ro_x_stds,
        'ro_y_std': ro_y_stds,
        'ro_z_std': ro_z_stds,
        'ro_mag_mean': ro_mag_means,
        'ro_mag_max': ro_mag_maxs,
        'ro_mag_min': ro_mag_mins,
        'ro_mag_rmse': ro_mag_rmse,
        'ro_mag_std': ro_mag_stds,
        'la_x_mean': la_x_means,
        'la_y_mean': la_y_means,
        'la_z_mean': la_z_means,
        'la_x_max': la_x_maxs,
        'la_y_max': la_y_maxs,
        'la_z_max': la_z_maxs,
        'la_x_min': la_x_mins,
        'la_y_min': la_y_mins,
        'la_z_min': la_z_mins,
        'la_x_rmse': la_x_rmse,
        'la_y_rmse': la_y_rmse,
        'la_z_rmse': la_z_rmse,
        'la_x_std': la_x_stds,
        'la_y_std': la_y_stds,
        'la_z_std': la_z_stds,
        'la_mag_mean': la_mag_means,
        'la_mag_max': la_mag_maxs,
        'la_mag_min': la_mag_mins,
        'la_mag_rmse': la_mag_rmse,
        'la_mag_std': la_mag_stds,
        'label': labels
    })
    
    # Assign the DataFrame to the corresponding key in extracted_features dictionary
    extracted_features[key] = extracted_df

# Example usage to print the first few rows of the extracted DataFrame for 'kid1_sensors'
if "kid1_sensors" in extracted_features and not extracted_features["kid1_sensors"].empty:
    print(extracted_features["kid1_sensors"].head())


   id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
0   1  -0.007581  -0.009005   0.018674  1.575409  1.336761  3.386673   
1   1   0.005044  -0.002437   0.011505  1.215485  1.590378  0.622864   
2   1   0.007418   0.002964   0.013290  1.426544  2.188156  0.709427   
3   1  -0.000427   0.003145   0.003816  1.370193  1.543365  0.587189   
4   1   0.010251   0.003180   0.009504  0.791412  0.803726  1.021011   

   gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  la_z_std  \
0 -1.983490 -1.281906 -1.291245  ...   0.371903  0.366693  0.304626  0.371865   
1 -1.333755 -2.080521 -1.022186  ...   0.361042  0.196294  0.133895  0.357994   
2 -1.054611 -2.016098 -0.473877  ...   0.368192  0.210187  0.225796  0.365996   
3 -2.198212 -1.418472 -0.691360  ...   0.396760  0.212553  0.179639  0.393470   
4 -1.390442 -1.324173 -0.644333  ...   0.251352  0.233390  0.208717  0.244713   

   la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  label  
0     0

In [25]:
print(extracted_features["kid8_sensors"].head())

   id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
0   8  -0.005500   0.005205   0.055907  0.997223  1.498016  2.276337   
1   8  -0.001523  -0.003691  -0.001113  0.805313  1.780502  0.591125   
2   8  -0.001715  -0.001927   0.002367  0.703400  1.032822  0.729340   
3   8  -0.001867  -0.007003   0.001063  0.608200  0.744308  0.479218   
4   8  -0.001428  -0.000211   0.004525  1.409531  2.029037  0.828094   

   gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  la_z_std  \
0 -2.110809 -2.275970 -0.918625  ...   0.296802  0.303986  0.268829  0.291944   
1 -0.922028 -2.026611 -0.633469  ...   0.265139  0.215658  0.168513  0.261145   
2 -0.809280 -0.741623 -0.739899  ...   0.365585  0.315706  0.195085  0.361765   
3 -0.663300 -0.535416 -0.770569  ...   0.288237  0.240977  0.150637  0.283328   
4 -1.011047 -2.015610 -0.611298  ...   0.301442  0.240488  0.154079  0.296027   

   la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  label  
0     0

In [26]:
print(extracted_features["kid1_sensors"].info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 23 entries, 0 to 22
Data columns (total 62 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           23 non-null     int64  
 1   gy_x_mean    23 non-null     float64
 2   gy_y_mean    23 non-null     float64
 3   gy_z_mean    23 non-null     float64
 4   gy_x_max     23 non-null     float64
 5   gy_y_max     23 non-null     float64
 6   gy_z_max     23 non-null     float64
 7   gy_x_min     23 non-null     float64
 8   gy_y_min     23 non-null     float64
 9   gy_z_min     23 non-null     float64
 10  gy_x_rmse    23 non-null     float64
 11  gy_y_rmse    23 non-null     float64
 12  gy_z_rmse    23 non-null     float64
 13  gy_x_std     23 non-null     float64
 14  gy_y_std     23 non-null     float64
 15  gy_z_std     23 non-null     float64
 16  gy_mag_mean  23 non-null     float64
 17  gy_mag_max   23 non-null     float64
 18  gy_mag_min   23 non-null     float64
 19  gy_mag_rms

In [27]:
# Concatenate all data frames into a single data frame
kids_data = pd.concat(extracted_features.values(), ignore_index=True)


In [28]:
print(kids_data.head(5))
print(kids_data.info())

   id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
0   1  -0.007581  -0.009005   0.018674  1.575409  1.336761  3.386673   
1   1   0.005044  -0.002437   0.011505  1.215485  1.590378  0.622864   
2   1   0.007418   0.002964   0.013290  1.426544  2.188156  0.709427   
3   1  -0.000427   0.003145   0.003816  1.370193  1.543365  0.587189   
4   1   0.010251   0.003180   0.009504  0.791412  0.803726  1.021011   

   gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  la_z_std  \
0 -1.983490 -1.281906 -1.291245  ...   0.371903  0.366693  0.304626  0.371865   
1 -1.333755 -2.080521 -1.022186  ...   0.361042  0.196294  0.133895  0.357994   
2 -1.054611 -2.016098 -0.473877  ...   0.368192  0.210187  0.225796  0.365996   
3 -2.198212 -1.418472 -0.691360  ...   0.396760  0.212553  0.179639  0.393470   
4 -1.390442 -1.324173 -0.644333  ...   0.251352  0.233390  0.208717  0.244713   

   la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  label  
0     0

In [29]:
kids_data = kids_data.sample(frac=1, random_state=42)
print(kids_data.head())

     id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
634  25   0.000894   0.001702  -0.001758  0.153488  0.172836  0.043320   
220  11   0.000230  -0.002235  -0.000820  1.273605  1.601685  0.680222   
426  19  -0.004458   0.002680  -0.002684  0.882263  0.614456  0.896729   
428  19  -0.002085   0.004735  -0.007323  1.269821  0.630142  0.704071   
72    4   0.006371  -0.014016   0.014806  1.701492  2.086624  0.731125   

     gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  \
634 -0.126099 -0.243820 -0.075119  ...   0.146363  0.051765  0.048712   
220 -1.312424 -1.283035 -0.634354  ...   0.505715  0.246228  0.235078   
426 -0.863815 -0.630234 -0.487061  ...   0.539835  0.214278  0.245038   
428 -1.125656 -0.795486 -0.688461  ...   0.268936  0.178997  0.176665   
72  -3.823990 -2.307922 -0.955780  ...   0.954466  0.447212  0.369620   

     la_z_std  la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  \
634  0.086643     0.166585    0.47922

In [31]:
# Define the output file path
output_file = 'kids_3c.csv'

# Save the concatenated DataFrame to a CSV file
kids_data.to_csv(output_file, index=False)

print(f"Concatenated features saved to {output_file}")


Concatenated features saved to kids_3c.csv


In [32]:
print(kids_data.columns)

Index(['id', 'gy_x_mean', 'gy_y_mean', 'gy_z_mean', 'gy_x_max', 'gy_y_max',
       'gy_z_max', 'gy_x_min', 'gy_y_min', 'gy_z_min', 'gy_x_rmse',
       'gy_y_rmse', 'gy_z_rmse', 'gy_x_std', 'gy_y_std', 'gy_z_std',
       'gy_mag_mean', 'gy_mag_max', 'gy_mag_min', 'gy_mag_rmse', 'gy_mag_std',
       'ro_x_mean', 'ro_y_mean', 'ro_z_mean', 'ro_x_max', 'ro_y_max',
       'ro_z_max', 'ro_x_min', 'ro_y_min', 'ro_z_min', 'ro_x_rmse',
       'ro_y_rmse', 'ro_z_rmse', 'ro_x_std', 'ro_y_std', 'ro_z_std',
       'ro_mag_mean', 'ro_mag_max', 'ro_mag_min', 'ro_mag_rmse', 'ro_mag_std',
       'la_x_mean', 'la_y_mean', 'la_z_mean', 'la_x_max', 'la_y_max',
       'la_z_max', 'la_x_min', 'la_y_min', 'la_z_min', 'la_x_rmse',
       'la_y_rmse', 'la_z_rmse', 'la_x_std', 'la_y_std', 'la_z_std',
       'la_mag_mean', 'la_mag_max', 'la_mag_min', 'la_mag_rmse', 'la_mag_std',
       'label'],
      dtype='object')
