# Gutenberg Gait Database | Script to merge the data with GaitRec data 

In [None]:
# Copyright (C)  2021  Djordje Slijepcevic
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

### In order to use this notebook, please download the GaitRec dataset from: [GaitRec dataset](https://figshare.com/collections/GaitRec_A_large-scale_ground_reaction_force_dataset_of_healthy_and_impaired_gait/4788012)
### For futher information regarding the GaitRec dataset refer to: [GaitRec paper](https://www.nature.com/articles/s41597-020-0481-z)

In [None]:
import pandas as pd
import os
import numpy as np

### Utility functions

In [None]:
def add_dataset_id(data):
    # for the GaitRec data we will define the dataset ID as 0
    data['DATASET_ID'] = 0
    return data

def make_unique_id(data1, data2):
    # SUBJECT_ID and SESSION_ID of data2 are changed to ensure their uniqueness (by adding the
    # maximum ID to the IDs of the GaitRec dataset)
    
    max_id = np.max(data1['SUBJECT_ID'].values)
    data2['SUBJECT_ID'] = data2['SUBJECT_ID']+max_id
    
    max_id = np.max(data1['SESSION_ID'].values)
    data2['SESSION_ID'] = data2['SESSION_ID']+max_id
    
    return data2 

def merge_data(data1, data2):
    # prior to merging we need to add a DATASET_ID for data2 and change the
    # SUBJECT_ID and SESSION_ID to ensure their uniqueness
    
    data2 = add_dataset_id(data2)
    data2 = make_unique_id(data1,data2)
    data = pd.concat([data1, data2], ignore_index=True, sort=False)
    return data

### Gutenberg Gait Database

In [None]:
# Path to data
path = 'GutenbergGaitDatabase/'

# Left lower extremity
GRF_F_V_PRO_left = pd.read_csv(os.path.join(path,'GRF_F_V_PRO_left.csv'))
GRF_F_V_RAW_left = pd.read_csv(os.path.join(path,'GRF_F_V_RAW_left.csv'))

GRF_F_AP_PRO_left = pd.read_csv(os.path.join(path,'GRF_F_AP_PRO_left.csv'))
GRF_F_AP_RAW_left = pd.read_csv(os.path.join(path,'GRF_F_AP_RAW_left.csv'))

GRF_F_ML_PRO_left = pd.read_csv(os.path.join(path,'GRF_F_ML_PRO_left.csv'))
GRF_F_ML_RAW_left = pd.read_csv(os.path.join(path,'GRF_F_ML_RAW_left.csv'))

GRF_COP_AP_PRO_left = pd.read_csv(os.path.join(path,'GRF_COP_AP_PRO_left.csv'))
GRF_COP_AP_RAW_left = pd.read_csv(os.path.join(path,'GRF_COP_AP_RAW_left.csv'))

GRF_COP_ML_PRO_left = pd.read_csv(os.path.join(path,'GRF_COP_ML_PRO_left.csv'))
GRF_COP_ML_RAW_left = pd.read_csv(os.path.join(path,'GRF_COP_ML_RAW_left.csv'))

# Right lower extremity
GRF_F_V_PRO_right = pd.read_csv(os.path.join(path,'GRF_F_V_PRO_right.csv'))
GRF_F_V_RAW_right = pd.read_csv(os.path.join(path,'GRF_F_V_RAW_right.csv'))

GRF_F_AP_PRO_right = pd.read_csv(os.path.join(path,'GRF_F_AP_PRO_right.csv'))
GRF_F_AP_RAW_right = pd.read_csv(os.path.join(path,'GRF_F_AP_RAW_right.csv'))

GRF_F_ML_PRO_right = pd.read_csv(os.path.join(path,'GRF_F_ML_PRO_right.csv'))
GRF_F_ML_RAW_right = pd.read_csv(os.path.join(path,'GRF_F_ML_RAW_right.csv'))

GRF_COP_AP_PRO_right = pd.read_csv(os.path.join(path,'GRF_COP_AP_PRO_right.csv'))
GRF_COP_AP_RAW_right = pd.read_csv(os.path.join(path,'GRF_COP_AP_RAW_right.csv'))

GRF_COP_ML_PRO_right = pd.read_csv(os.path.join(path,'GRF_COP_ML_PRO_right.csv'))
GRF_COP_ML_RAW_right = pd.read_csv(os.path.join(path,'GRF_COP_ML_RAW_right.csv'))

# Walking Speed
GRF_walking_speed = pd.read_csv(os.path.join(path,'GRF_walking_speed.csv')) 

# Metadata
GRF_metadata = pd.read_csv(os.path.join(path,'GRF_metadata.csv'))


### GaitRec Dataset

In [None]:
# Path to data
path = 'GaitRec/'

# Left lower extremity
GRF_F_V_PRO_left = merge_data(GRF_F_V_PRO_left, pd.read_csv(os.path.join(path,'GRF_F_V_PRO_left.csv')))
GRF_F_V_RAW_left = merge_data(GRF_F_V_RAW_left, pd.read_csv(os.path.join(path,'GRF_F_V_RAW_left.csv')))

GRF_F_AP_PRO_left = merge_data(GRF_F_AP_PRO_left, pd.read_csv(os.path.join(path,'GRF_F_AP_PRO_left.csv')))
GRF_F_AP_RAW_left = merge_data(GRF_F_AP_RAW_left, pd.read_csv(os.path.join(path,'GRF_F_AP_RAW_left.csv')))

GRF_F_ML_PRO_left = merge_data(GRF_F_ML_PRO_left, pd.read_csv(os.path.join(path,'GRF_F_ML_PRO_left.csv')))
GRF_F_ML_RAW_left = merge_data(GRF_F_ML_RAW_left, pd.read_csv(os.path.join(path,'GRF_F_ML_RAW_left.csv')))

GRF_COP_AP_PRO_left = merge_data(GRF_COP_AP_PRO_left, pd.read_csv(os.path.join(path,'GRF_COP_AP_PRO_left.csv')))
GRF_COP_AP_RAW_left = merge_data(GRF_COP_AP_RAW_left, pd.read_csv(os.path.join(path,'GRF_COP_AP_RAW_left.csv')))

GRF_COP_ML_PRO_left = merge_data(GRF_COP_ML_PRO_left, pd.read_csv(os.path.join(path,'GRF_COP_ML_PRO_left.csv')))
GRF_COP_ML_RAW_left = merge_data(GRF_COP_ML_RAW_left, pd.read_csv(os.path.join(path,'GRF_COP_ML_RAW_left.csv')))

# Right lower extremity
GRF_F_V_PRO_right = merge_data(GRF_F_V_PRO_right, pd.read_csv(os.path.join(path,'GRF_F_V_PRO_right.csv')))
GRF_F_V_RAW_right = merge_data(GRF_F_V_RAW_right, pd.read_csv(os.path.join(path,'GRF_F_V_RAW_right.csv')))

GRF_F_AP_PRO_right = merge_data(GRF_F_AP_PRO_right, pd.read_csv(os.path.join(path,'GRF_F_AP_PRO_right.csv')))
GRF_F_AP_RAW_right = merge_data(GRF_F_AP_RAW_right, pd.read_csv(os.path.join(path,'GRF_F_AP_RAW_right.csv')))

GRF_F_ML_PRO_right = merge_data(GRF_F_ML_PRO_right, pd.read_csv(os.path.join(path,'GRF_F_ML_PRO_right.csv')))
GRF_F_ML_RAW_right = merge_data(GRF_F_ML_RAW_right, pd.read_csv(os.path.join(path,'GRF_F_ML_RAW_right.csv')))

GRF_COP_AP_PRO_right = merge_data(GRF_COP_AP_PRO_right, pd.read_csv(os.path.join(path,'GRF_COP_AP_PRO_right.csv')))
GRF_COP_AP_RAW_right = merge_data(GRF_COP_AP_RAW_right, pd.read_csv(os.path.join(path,'GRF_COP_AP_RAW_right.csv')))

GRF_COP_ML_PRO_right = merge_data(GRF_COP_ML_PRO_right, pd.read_csv(os.path.join(path,'GRF_COP_ML_PRO_right.csv')))
GRF_COP_ML_RAW_right = merge_data(GRF_COP_ML_RAW_right, pd.read_csv(os.path.join(path,'GRF_COP_ML_RAW_right.csv')))


# Walking Speed: is not specified for GaitRec dataset (we will add NaNs for the GaitRec data)
GRF_walking_speed = merge_data(GRF_walking_speed, pd.read_csv(os.path.join(path,'GRF_F_V_PRO_left.csv')).iloc[:,0:3])

# Metadata
GRF_metadata = merge_data(GRF_metadata, pd.read_csv(os.path.join(path,'GRF_metadata.csv')))


## Processed Data

### Left Lower Extremity

In [None]:
GRF_F_V_PRO_left

In [None]:
GRF_F_AP_PRO_left

In [None]:
GRF_F_ML_PRO_left

In [None]:
GRF_COP_AP_PRO_left

In [None]:
GRF_COP_ML_PRO_left

### Right Lower Extremity

In [None]:
GRF_F_V_PRO_right

In [None]:
GRF_F_AP_PRO_right

In [None]:
GRF_F_ML_PRO_right

In [None]:
GRF_COP_AP_PRO_right

In [None]:
GRF_COP_ML_PRO_right

## Raw Data

### Left Lower Extremity


In [None]:
GRF_F_V_RAW_left

In [None]:
GRF_F_AP_RAW_left

In [None]:
GRF_F_ML_RAW_left

In [None]:
GRF_COP_AP_RAW_left

In [None]:
GRF_COP_ML_RAW_left

### Right Lower Extremity

In [None]:
GRF_F_V_RAW_right

In [None]:
GRF_F_AP_RAW_right

In [None]:
GRF_F_ML_RAW_right

In [None]:
GRF_COP_AP_RAW_right

In [None]:
GRF_COP_ML_RAW_right

## Walking speed

In [None]:
GRF_walking_speed

## Metadata

In [None]:
GRF_metadata