## Kids Data frame 

In [2]:
#Librairie Importation:
import os
import pandas as pd


In [2]:
import os
import pandas as pd

def load_and_concat_sensor_data(root_dir, i):
    concatenated_data_frames = []

    # Get the dynamic folder that matches the pattern parent
    dynamic_folder = None
    for d in os.listdir(root_dir):
        if os.path.isdir(os.path.join(root_dir, d)) and 'parent' in d:
            dynamic_folder = d
            break

    if dynamic_folder is None:
        print("No folder matching the pattern '*_parent' found.")
        return concatenated_data_frames
    else:
        # Construct the full path to the dynamic folder
        dynamic_folder_path = os.path.join(root_dir, dynamic_folder).replace("\\", "/")

        # Sensor folders to process
        sensor_folders = ['Gyroscope','rotation', 'LinearAcceleration']

        # Construct paths to the sensor folders
        sensor_paths = {sensor: os.path.join(dynamic_folder_path, sensor).replace("\\", "/") for sensor in sensor_folders}

        # Check if sensor folders exist
        if not all(os.path.exists(sensor_path) for sensor_path in sensor_paths.values()):
            print(f"One or more sensor folders not found: {sensor_paths}")
            return concatenated_data_frames

        # Get the list of .txt files in each sensor folder (assuming they have the same number of files)
        sensor_files = {sensor: sorted([f for f in os.listdir(sensor_paths[sensor]) if f.endswith('.txt')]) for sensor in sensor_folders}

        # Iterate through the files and process them
        for Gyroscope_file,rotation_file, linear_acc_file in zip(sensor_files['Gyroscope'],sensor_files['rotation'], sensor_files['LinearAcceleration']):
            # Construct the full file paths
            Gyroscope_file_path = os.path.join(sensor_paths['Gyroscope'], Gyroscope_file).replace("\\", "/")
            rotation_file_path = os.path.join(sensor_paths['rotation'], rotation_file).replace("\\", "/")
            linear_acc_file_path = os.path.join(sensor_paths['LinearAcceleration'], linear_acc_file).replace("\\", "/")
             # Read the Gyroscope file into a DataFrame
            try:
                Gyroscope_df = pd.read_csv(Gyroscope_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                Gyroscope_df.columns = ['gy_timestamp', 'gy_internal_ts', 'gy_x', 'gy_y', 'gy_z', 'gy_app_id']
                # Calculate magnitude of gyroscope vector
                Gyroscope_df['gy_mag'] = (Gyroscope_df['gy_x']**2 + Gyroscope_df['gy_y']**2 + Gyroscope_df['gy_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {Gyroscope_file_path}: {e}")
                continue
            # Read the rotation file into a DataFrame
            try:
                rotation_df = pd.read_csv(rotation_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                rotation_df.columns = ['ro_timestamp', 'ro_internal_ts', 'ro_x', 'ro_y', 'ro_z', 'ro_app_id']
                # Calculate magnitude of rotation vector
                rotation_df['ro_mag'] = (rotation_df['ro_x']**2 + rotation_df['ro_y']**2 + rotation_df['ro_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {rotation_file_path}: {e}")
                continue
            
            # Read the linear acceleration file into a DataFrame
            try:
                linear_acc_df = pd.read_csv(linear_acc_file_path, delimiter='\t', header=None, usecols=[0, 1, 2, 3, 4, 5])
                linear_acc_df.columns = ['la_timestamp', 'la_internal_ts', 'la_x', 'la_y', 'la_z', 'la_app_id']
                # Calculate magnitude of linear acceleration vector
                linear_acc_df['la_mag'] = (linear_acc_df['la_x']**2 + linear_acc_df['la_y']**2 + linear_acc_df['la_z']**2)**0.5
            except Exception as e:
                print(f"Error reading {linear_acc_file_path}: {e}")
                continue

            # Concatenate the two DataFrames along columns
            concatenated_df = pd.concat([Gyroscope_df,rotation_df, linear_acc_df], axis=1)
            
            # Add 'id' and 'label' columns
            concatenated_df['id'] = i+25
            concatenated_df['label'] = 0
            
            # Append the concatenated DataFrame to the list
            concatenated_data_frames.append(concatenated_df)

    return concatenated_data_frames

# Example usage to load data for Kid1 to Kid25
root_base_dir = 'D:\internship\code\KidsOnThePhone_dataset\Parents'
parents_sensors = {}

for i in range(1, 26):
    root_dir = os.path.join(root_base_dir, f'Parent{i}', 'Sensors')
    parents_sensors[f"parent{i}_sensors"] = load_and_concat_sensor_data(root_dir, i)

# Display the first concatenated DataFrame for kid1_sensors
if "parent1_sensors" in parents_sensors and parents_sensors["parent1_sensors"]:
    print(parents_sensors["parent1_sensors"][0].head(1))


    gy_timestamp   gy_internal_ts      gy_x      gy_y      gy_z  gy_app_id  \
0  1498777468259  535121377667630 -0.004578  0.010895 -0.000671      105.0   

     gy_mag  ro_timestamp  ro_internal_ts      ro_x  ...    ro_mag  \
0  0.011836  1.498777e+12    5.351215e+14  0.256882  ...  0.424786   

    la_timestamp   la_internal_ts      la_x      la_y      la_z  la_app_id  \
0  1498777468262  535121377667630 -0.027222  0.030685  0.206451      105.0   

     la_mag  id  label  
0  0.210487  26      0  

[1 rows x 23 columns]


In [6]:
print(len(parents_sensors["parent3_sensors"]))


17


## Data Preprocessing : 

In [7]:
# Example usage assuming `kids_sensors` dictionary is populated as before

for key, data_frames in parents_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Count duplicates
        num_duplicates = df.duplicated().sum()
        print(f"Number of duplicates: {num_duplicates}")
        
        # Count null values
        num_null_values = df.isnull().sum().sum()
        print(f"Number of null values: {num_null_values}")
        
        print("-" * 30)


Processing parent1_sensors:
DataFrame 1:
Number of duplicates: 0
Number of null values: 28
------------------------------
DataFrame 2:
Number of duplicates: 0
Number of null values: 14
------------------------------
DataFrame 3:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 4:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 5:
Number of duplicates: 0
Number of null values: 14
------------------------------
DataFrame 6:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 7:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 8:
Number of duplicates: 0
Number of null values: 14
------------------------------
DataFrame 9:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 10:
Number of duplicates: 0
Number of null values: 7
------------------------------
DataFrame 11:
Number of duplicates: 0

In [8]:

for key, data_frames in parents_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Drop duplicates and null values inplace
        df.drop_duplicates(inplace=True)
        df.dropna(inplace=True)
        
        # Count after dropping
        num_rows_after_cleaning = len(df)
        print(f"Number of rows after cleaning: {num_rows_after_cleaning}")
        
        print("-" * 30)
    
    # No need to append, directly modify the original list in parents_sensors[key]

    print(f"Finished processing {key}")
    print("=" * 40)


Processing parent1_sensors:
DataFrame 1:
Number of rows after cleaning: 1732
------------------------------
DataFrame 2:
Number of rows after cleaning: 1737
------------------------------
DataFrame 3:
Number of rows after cleaning: 1739
------------------------------
DataFrame 4:
Number of rows after cleaning: 1738
------------------------------
DataFrame 5:
Number of rows after cleaning: 1738
------------------------------
DataFrame 6:
Number of rows after cleaning: 1738
------------------------------
DataFrame 7:
Number of rows after cleaning: 1737
------------------------------
DataFrame 8:
Number of rows after cleaning: 1738
------------------------------
DataFrame 9:
Number of rows after cleaning: 1737
------------------------------
DataFrame 10:
Number of rows after cleaning: 1738
------------------------------
DataFrame 11:
Number of rows after cleaning: 1738
------------------------------
DataFrame 12:
Number of rows after cleaning: 1737
------------------------------
DataFrame

In [9]:
# Example usage assuming `kids_sensors` dictionary is populated as before

for key, data_frames in parents_sensors.items():
    print(f"Processing {key}:")
    for df_idx, df in enumerate(data_frames):
        print(f"DataFrame {df_idx + 1}:")
        
        # Count duplicates
        num_duplicates = df.duplicated().sum()
        print(f"Number of duplicates: {num_duplicates}")
        
        # Count null values
        num_null_values = df.isnull().sum().sum()
        print(f"Number of null values: {num_null_values}")
        
        print("-" * 30)


Processing parent1_sensors:
DataFrame 1:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 2:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 3:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 4:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 5:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 6:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 7:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 8:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 9:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 10:
Number of duplicates: 0
Number of null values: 0
------------------------------
DataFrame 11:
Number of duplicates: 0
Num

In [10]:
print(parents_sensors["parent2_sensors"][2].head(30))

     gy_timestamp  gy_internal_ts      gy_x      gy_y      gy_z  gy_app_id  \
0   1499364820286  67771092179883  0.007370  0.013000 -0.004272       95.0   
1   1499364820308  67771112321485  0.004196  0.003281 -0.001831       95.0   
2   1499364820321  67771132463087  0.000351  0.000778  0.002838       95.0   
3   1499364820339  67771152604688  0.003647 -0.000076 -0.004837       95.0   
4   1499364820357  67771172746290  0.006683 -0.005646 -0.001480       95.0   
5   1499364820374  67771192887891 -0.004929 -0.004913 -0.008347       95.0   
6   1499364820391  67771213029493  0.004547  0.013824 -0.005630       95.0   
7   1499364820427  67771233171094 -0.012421  0.019547 -0.001587       95.0   
8   1499364820444  67771253312696  0.004608  0.010529 -0.013657       95.0   
9   1499364820462  67771273454298  0.012787 -0.002823  0.003860       95.0   
10  1499364820480  67771293595899  0.014999 -0.010025  0.006866       95.0   
11  1499364820497  67771313737501  0.005737  0.000290  0.002502 

In [13]:
import numpy as np
import pandas as pd

# Initialize the dictionary to store extracted features
extracted_features = {}

# Iterate through each key (e.g., 'kid1_sensors', 'kid2_sensors', etc.) in kids_sensors
for key, data_frames in parents_sensors.items():
    # Initialize lists to store feature values for each DataFrame
    ids = []
    ro_x_means = []
    ro_y_means = []
    ro_z_means = []
    ro_x_maxs = []
    ro_y_maxs = []
    ro_z_maxs = []
    ro_x_mins = []
    ro_y_mins = []
    ro_z_mins = []
    ro_x_rmse = []
    ro_y_rmse = []
    ro_z_rmse = []
    ro_x_stds = []
    ro_y_stds = []
    ro_z_stds = []
    la_x_means = []
    la_y_means = []
    la_z_means = []
    la_x_maxs = []
    la_y_maxs = []
    la_z_maxs = []
    la_x_mins = []
    la_y_mins = []
    la_z_mins = []
    la_x_rmse = []
    la_y_rmse = []
    la_z_rmse = []
    la_x_stds = []
    la_y_stds = []
    la_z_stds = []
    ro_mag_means = []
    ro_mag_maxs = []
    ro_mag_mins = []
    ro_mag_rmse = []
    ro_mag_stds = []
    la_mag_means = []
    la_mag_maxs = []
    la_mag_mins = []
    la_mag_rmse = []
    la_mag_stds = []
    
    gy_x_means = []
    gy_y_means = []
    gy_z_means = []
    gy_x_maxs = []
    gy_y_maxs = []
    gy_z_maxs = []
    gy_x_mins = []
    gy_y_mins = []
    gy_z_mins = []
    gy_x_rmse = []
    gy_y_rmse = []
    gy_z_rmse = []
    gy_x_stds = []
    gy_y_stds = []
    gy_z_stds = []
    gy_mag_means = []
    gy_mag_maxs = []
    gy_mag_mins = []
    gy_mag_rmse = []
    gy_mag_stds = []
    labels = []
    
    # Iterate through each DataFrame in the current key's list of DataFrames
    for df in data_frames:
        # Extract id and label (assuming they are the same for rotation and linear acceleration)
        ids.append(df['id'].iloc[0])
        
        
        #Extract features for gyroscope:
        gy_x_means.append(df['gy_x'].mean())
        gy_y_means.append(df['gy_y'].mean())
        gy_z_means.append(df['gy_z'].mean())
        gy_x_maxs.append(df['gy_x'].max())
        gy_y_maxs.append(df['gy_y'].max())
        gy_z_maxs.append(df['gy_z'].max())
        gy_x_mins.append(df['gy_x'].min())
        gy_y_mins.append(df['gy_y'].min())
        gy_z_mins.append(df['gy_z'].min())
        gy_x_rmse.append(np.sqrt(np.mean(df['gy_x']**2)))
        gy_y_rmse.append(np.sqrt(np.mean(df['gy_y']**2)))
        gy_z_rmse.append(np.sqrt(np.mean(df['gy_z']**2)))
        gy_x_stds.append(df['gy_x'].std())
        gy_y_stds.append(df['gy_y'].std())
        gy_z_stds.append(df['gy_z'].std())

        gy_mag_means.append(df['gy_mag'].mean())
        gy_mag_maxs.append(df['gy_mag'].max())
        gy_mag_mins.append(df['gy_mag'].min())
        gy_mag_rmse.append(np.sqrt(np.mean(df['gy_mag']**2)))
        gy_mag_stds.append(df['gy_mag'].std())

        # Extract features for rotation
        ro_x_means.append(df['ro_x'].mean())
        ro_y_means.append(df['ro_y'].mean())
        ro_z_means.append(df['ro_z'].mean())
        ro_x_maxs.append(df['ro_x'].max())
        ro_y_maxs.append(df['ro_y'].max())
        ro_z_maxs.append(df['ro_z'].max())
        ro_x_mins.append(df['ro_x'].min())
        ro_y_mins.append(df['ro_y'].min())
        ro_z_mins.append(df['ro_z'].min())
        ro_x_rmse.append(np.sqrt(np.mean(df['ro_x']**2)))
        ro_y_rmse.append(np.sqrt(np.mean(df['ro_y']**2)))
        ro_z_rmse.append(np.sqrt(np.mean(df['ro_z']**2)))
        ro_x_stds.append(df['ro_x'].std())
        ro_y_stds.append(df['ro_y'].std())
        ro_z_stds.append(df['ro_z'].std())
        ro_mag_means.append(df['ro_mag'].mean())
        ro_mag_maxs.append(df['ro_mag'].max())
        ro_mag_mins.append(df['ro_mag'].min())
        ro_mag_rmse.append(np.sqrt(np.mean(df['ro_mag']**2)))
        ro_mag_stds.append(df['ro_mag'].std())
        
        # Extract features for linear acceleration
        la_x_means.append(df['la_x'].mean())
        la_y_means.append(df['la_y'].mean())
        la_z_means.append(df['la_z'].mean())
        la_x_maxs.append(df['la_x'].max())
        la_y_maxs.append(df['la_y'].max())
        la_z_maxs.append(df['la_z'].max())
        la_x_mins.append(df['la_x'].min())
        la_y_mins.append(df['la_y'].min())
        la_z_mins.append(df['la_z'].min())
        la_x_rmse.append(np.sqrt(np.mean(df['la_x']**2)))
        la_y_rmse.append(np.sqrt(np.mean(df['la_y']**2)))
        la_z_rmse.append(np.sqrt(np.mean(df['la_z']**2)))
        la_x_stds.append(df['la_x'].std())
        la_y_stds.append(df['la_y'].std())
        la_z_stds.append(df['la_z'].std())

        # Compute la_mag and extract features
        #la_mag = np.sqrt(df['la_x']**2 + df['la_y']**2 + df['la_z']**2)
        la_mag_means.append(df['la_mag'].mean())
        la_mag_maxs.append(df['la_mag'].max())
        la_mag_mins.append(df['la_mag'].min())
        la_mag_rmse.append(np.sqrt(np.mean(df['la_mag']**2)))
        la_mag_stds.append(df['la_mag'].std())

        # Extract label (assuming it's the same for all rows in this DataFrame)
        labels.append(df['label'].iloc[0])
    
    # Create a DataFrame for extracted features for the current key
    extracted_df = pd.DataFrame({
        'id': ids,
        'gy_x_mean': gy_x_means,
        'gy_y_mean': gy_y_means,
        'gy_z_mean': gy_z_means,
        'gy_x_max': gy_x_maxs,
        'gy_y_max': gy_y_maxs,
        'gy_z_max': gy_z_maxs,
        'gy_x_min': gy_x_mins,
        'gy_y_min': gy_y_mins,
        'gy_z_min': gy_z_mins,
        'gy_x_rmse': gy_x_rmse,
        'gy_y_rmse': gy_y_rmse,
        'gy_z_rmse': gy_z_rmse,
        'gy_x_std': gy_x_stds,
        'gy_y_std': gy_y_stds,
        'gy_z_std': gy_z_stds,
        'gy_mag_mean': gy_mag_means,
        'gy_mag_max': gy_mag_maxs,
        'gy_mag_min': gy_mag_mins,
        'gy_mag_rmse': gy_mag_rmse,
        'gy_mag_std': gy_mag_stds,
        'ro_x_mean': ro_x_means,
        'ro_y_mean': ro_y_means,
        'ro_z_mean': ro_z_means,
        'ro_x_max': ro_x_maxs,
        'ro_y_max': ro_y_maxs,
        'ro_z_max': ro_z_maxs,
        'ro_x_min': ro_x_mins,
        'ro_y_min': ro_y_mins,
        'ro_z_min': ro_z_mins,
        'ro_x_rmse': ro_x_rmse,
        'ro_y_rmse': ro_y_rmse,
        'ro_z_rmse': ro_z_rmse,
        'ro_x_std': ro_x_stds,
        'ro_y_std': ro_y_stds,
        'ro_z_std': ro_z_stds,
        'ro_mag_mean': ro_mag_means,
        'ro_mag_max': ro_mag_maxs,
        'ro_mag_min': ro_mag_mins,
        'ro_mag_rmse': ro_mag_rmse,
        'ro_mag_std': ro_mag_stds,
        'la_x_mean': la_x_means,
        'la_y_mean': la_y_means,
        'la_z_mean': la_z_means,
        'la_x_max': la_x_maxs,
        'la_y_max': la_y_maxs,
        'la_z_max': la_z_maxs,
        'la_x_min': la_x_mins,
        'la_y_min': la_y_mins,
        'la_z_min': la_z_mins,
        'la_x_rmse': la_x_rmse,
        'la_y_rmse': la_y_rmse,
        'la_z_rmse': la_z_rmse,
        'la_x_std': la_x_stds,
        'la_y_std': la_y_stds,
        'la_z_std': la_z_stds,
        'la_mag_mean': la_mag_means,
        'la_mag_max': la_mag_maxs,
        'la_mag_min': la_mag_mins,
        'la_mag_rmse': la_mag_rmse,
        'la_mag_std': la_mag_stds,
        'label': labels
    })
    
    # Assign the DataFrame to the corresponding key in extracted_features dictionary
    extracted_features[key] = extracted_df

# Example usage to print the first few rows of the extracted DataFrame for 'kid1_sensors'
if "parent1_sensors" in extracted_features and not extracted_features["parent1_sensors"].empty:
    print(extracted_features["parent1_sensors"].head())


   id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
0  26   0.005382   0.006229   0.051065  1.174347  3.238022  3.445114   
1  26   0.008800   0.004438   0.004971  0.307037  0.216614  0.101822   
2  26   0.005707   0.003812   0.002290  0.432327  0.328430  0.272812   
3  26  -0.002370   0.002397  -0.000705  0.368118  0.332596  0.101059   
4  26   0.000099  -0.001993   0.002131  0.360596  0.184464  0.239471   

   gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  la_z_std  \
0 -1.057114 -2.686401 -0.277649  ...   0.272019  0.252229  0.212338  0.269916   
1 -0.237183 -0.220490 -0.073425  ...   0.143083  0.073080  0.085827  0.142348   
2 -0.384491 -0.300980 -0.203369  ...   0.174780  0.126461  0.119795  0.172402   
3 -0.306839 -0.269043 -0.113281  ...   0.167808  0.081594  0.075217  0.159663   
4 -0.318146 -0.254562 -0.250671  ...   0.152900  0.085408  0.116846  0.138199   

   la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  label  
0     0

In [14]:
print(extracted_features["parent1_sensors"].info())


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 18 entries, 0 to 17
Data columns (total 62 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           18 non-null     int64  
 1   gy_x_mean    18 non-null     float64
 2   gy_y_mean    18 non-null     float64
 3   gy_z_mean    18 non-null     float64
 4   gy_x_max     18 non-null     float64
 5   gy_y_max     18 non-null     float64
 6   gy_z_max     18 non-null     float64
 7   gy_x_min     18 non-null     float64
 8   gy_y_min     18 non-null     float64
 9   gy_z_min     18 non-null     float64
 10  gy_x_rmse    18 non-null     float64
 11  gy_y_rmse    18 non-null     float64
 12  gy_z_rmse    18 non-null     float64
 13  gy_x_std     18 non-null     float64
 14  gy_y_std     18 non-null     float64
 15  gy_z_std     18 non-null     float64
 16  gy_mag_mean  18 non-null     float64
 17  gy_mag_max   18 non-null     float64
 18  gy_mag_min   18 non-null     float64
 19  gy_mag_rms

In [16]:
keys = extracted_features.keys()
print("Keys:")
for key in keys:
    print(key)
print(len(extracted_features))

Keys:
parent1_sensors
parent2_sensors
parent3_sensors
parent4_sensors
parent5_sensors
parent6_sensors
parent7_sensors
parent8_sensors
parent9_sensors
parent10_sensors
parent11_sensors
parent12_sensors
parent13_sensors
parent14_sensors
parent15_sensors
parent16_sensors
parent17_sensors
parent18_sensors
parent19_sensors
parent20_sensors
parent21_sensors
parent22_sensors
parent23_sensors
parent24_sensors
parent25_sensors
25


In [17]:
# Concatenate all data frames into a single data frame
parents_data = pd.concat(extracted_features.values(), ignore_index=True)



    

In [18]:
print(parents_data.head(5))
print(parents_data.info())

   id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
0  26   0.005382   0.006229   0.051065  1.174347  3.238022  3.445114   
1  26   0.008800   0.004438   0.004971  0.307037  0.216614  0.101822   
2  26   0.005707   0.003812   0.002290  0.432327  0.328430  0.272812   
3  26  -0.002370   0.002397  -0.000705  0.368118  0.332596  0.101059   
4  26   0.000099  -0.001993   0.002131  0.360596  0.184464  0.239471   

   gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  la_z_std  \
0 -1.057114 -2.686401 -0.277649  ...   0.272019  0.252229  0.212338  0.269916   
1 -0.237183 -0.220490 -0.073425  ...   0.143083  0.073080  0.085827  0.142348   
2 -0.384491 -0.300980 -0.203369  ...   0.174780  0.126461  0.119795  0.172402   
3 -0.306839 -0.269043 -0.113281  ...   0.167808  0.081594  0.075217  0.159663   
4 -0.318146 -0.254562 -0.250671  ...   0.152900  0.085408  0.116846  0.138199   

   la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  label  
0     0

In [19]:
parents_data = parents_data.sample(frac=1, random_state=42)
print(parents_data.head())
print(parents_data.info())

     id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
492  48   0.000348   0.002910  -0.000850  0.418884  0.459259  0.210815   
73   30   0.002272  -0.012039  -0.007248  1.880066  1.704865  2.950195   
347  41   0.003260  -0.000773  -0.006397  1.411987  1.252014  0.956192   
86   30  -0.001534  -0.010550  -0.004989  1.679184  2.591125  0.811890   
455  47   0.002003   0.005221   0.002339  0.644150  0.716812  0.144119   

     gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  \
492 -0.233932 -0.460510 -0.166946  ...   0.221468  0.101556  0.084982   
73  -1.254150 -2.425674 -2.551514  ...   0.339864  0.284197  0.270438   
347 -1.080002 -1.137497 -2.412109  ...   0.509941  0.386451  0.270622   
86  -1.801025 -1.770065 -1.156448  ...   0.334336  0.296876  0.241595   
455 -0.370361 -1.000702 -0.126953  ...   0.258236  0.097325  0.086917   

     la_z_std  la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  \
492  0.152481     0.247567    1.63642

In [20]:
# Define the output file path
output_file = 'parents_3.csv'

# Save the concatenated DataFrame to a CSV file
parents_data.to_csv(output_file, index=False)

print(f"Concatenated features saved to {output_file}")


Concatenated features saved to parents_3.csv


Concat the two csv to have the whole data set : 

In [21]:
import pandas as pd

# Load the CSV files
parents_df = pd.read_csv('parents_3.csv')
kids_df = pd.read_csv('kids_3.csv')

# Concatenate the DataFrames
data= pd.concat([parents_df, kids_df], ignore_index=True)

# Shuffle the combined DataFrame
data = data.sample(frac=1, random_state=42)


# Save the shuffled DataFrame to a new CSV file
data.to_csv('data_3.csv', index=False)



In [22]:
print(data.head(5))

      id  gy_x_mean  gy_y_mean  gy_z_mean  gy_x_max  gy_y_max  gy_z_max  \
319   40   0.001830   0.001090  -0.001095  0.665161  1.021057  0.416946   
956   19  -0.004149   0.002964  -0.012397  0.870743  0.551285  0.375229   
1094  20   0.069002  -0.206461  -0.346723  1.366318  0.643494  0.407059   
86    45   0.002946   0.000428   0.000202  0.713074  0.638000  0.266541   
990   22   0.019549  -0.004408   0.005758  1.593323  1.885162  1.472916   

      gy_x_min  gy_y_min  gy_z_min  ...  la_z_rmse  la_x_std  la_y_std  \
319  -0.791153 -0.527039 -0.190201  ...   0.165692  0.093031  0.098044   
956  -0.869904 -0.697311 -0.947021  ...   0.355238  0.246884  0.221454   
1094 -1.117706 -3.888367 -5.620560  ...   0.496602  0.377996  0.656716   
86   -0.384033 -0.560074 -0.208130  ...   0.317568  0.120684  0.147029   
990  -2.631653 -2.599777 -1.240189  ...   0.339852  0.204047  0.167584   

      la_z_std  la_mag_mean  la_mag_max  la_mag_min  la_mag_rmse  la_mag_std  \
319   0.157816     0.184