In [None]:
import os, csv, json
import pandas as pd

from IPython.display import display

In [None]:
# load metadata for data cleaning 

def load_metadata(participant_number, left_or_right, sample_number):
    sampleDir = os.path.normpath("data2/" + participant_number + "/feet/" + left_or_right + "/samples/" + sample_number)

    # Load points
    points_file = os.path.join(sampleDir, "meta.json")
    with open(points_file, 'r') as file:
        data = file.read()
        meta_data = json.loads(data)
        
    return meta_data

# This part for primary task

In [None]:
def load_sample(participant_number, left_or_right, sample_number):
    sampleDir = os.path.normpath("data2/" + participant_number + "/feet/" + left_or_right + "/samples/" + sample_number)
    
    df = pd.DataFrame()
    
    # load first row of csv
    csv_file = os.path.join(sampleDir, "pressure.csv")
    with open(csv_file, mode='r') as file:
        csv_reader = csv.reader(file)
        pressure_data = next(csv_reader)
        pressure_data = [float(cell) for cell in pressure_data]

    # Load points
    points_file = os.path.join(sampleDir, "points.json")
    with open(points_file, 'r') as file:
        data = file.read()
        points_data = json.loads(data)

    
    # Populate the DataFrame with columns 'pointType_<number>' and corresponding values
    df['input_features'] = [pressure_data]
    for entry in points_data:
        point_type = entry['pointType']
        points = entry['points']
        df[f'pointType_{point_type}'] = [points]
    
    return df


In [None]:
def load_sample_rawAdjusted(participant_number, left_or_right, sample_number):
    sampleDir = os.path.normpath("data2/" + participant_number + "/feet/" + left_or_right + "/samples/" + sample_number)
    
    df = pd.DataFrame()
    
    csv_file = os.path.join(sampleDir, "raw_pressure.csv")

    df_raw = pd.read_csv(csv_file, delimiter=";", decimal=",", header=None, skiprows=4)
    df_raw.columns = [f"Column{i}" for i in range(len(df_raw.columns))]
    df_raw.drop(columns="Column0", axis=1, inplace=True)
    df_raw = df_raw[df_raw.sum(axis=1) != 0]
    average_non_zero = df_raw.mean()
    pressure_data = average_non_zero.tolist()
    # print([pressure_data])

    # Load points
    points_file = os.path.join(sampleDir, "points.json")
    with open(points_file, 'r') as file:
        data = file.read()
        points_data = json.loads(data)

    
    # Populate the DataFrame with columns 'pointType_<number>' and corresponding values
    df['input_features'] = [pressure_data]
    for entry in points_data:
        point_type = entry['pointType']
        points = entry['points']
        df[f'pointType_{point_type}'] = [points]
    
    return df

In [None]:
# df = load_sample_rawAdjusted("12", "left","230")
# df = load_sample("12", "left","230")

In [None]:
df_final = pd.DataFrame()
remove_sway_flag = True

for participant_number in os.listdir("data2"):
    print("Participant number: " + participant_number)
    for left_or_right in ["left", "right"]:
        print(left_or_right)
        for sample_number in os.listdir("data2/"+ participant_number + "/feet/"+ left_or_right +"/samples/"):
            meta_data1 = load_metadata(participant_number, left_or_right,sample_number)
            if remove_sway_flag:
                if "walk" in meta_data1["pressure_type"]:
                    print(sample_number)
                    
                    # load the original data called pressure.csv given by Felix
                    # df = load_sample(participant_number, left_or_right,sample_number)
                    # load manipulated data applied on raw pressure (currently it is the average of non-zero values)
                    df = load_sample_rawAdjusted(participant_number, left_or_right,sample_number)
                    
                    df['participant_number'] = participant_number
                    df['left_or_right'] = left_or_right
                    df['sample_number'] = sample_number
                    df_final = pd.concat([df_final,df])
            else:
                print(sample_number)
                # load the original data called pressure.csv given by Felix
                # df = load_sample(participant_number, left_or_right,sample_number)
                # load manipulated data applied on raw pressure (currently it is the average of non-zero values)
                df = load_sample_rawAdjusted(participant_number, left_or_right,sample_number)
                df['participant_number'] = participant_number
                df['left_or_right'] = left_or_right
                df['sample_number'] = sample_number
                df_final = pd.concat([df_final,df])

df_final.reset_index(inplace=True,drop=True)

# df_final.to_pickle("data_latest.pkl")
# df_final.to_csv("data_latest.csv", index=False)

df_final.to_pickle("data_latest_average_non_zero_raw.pkl")
df_final.to_csv("data_latest_average_non_zero_raw.csv", index=False)

In [None]:
len(df_final)

# This part for the secondary task

In [None]:
# loading data for the secondary task

def load_sample_rawAdjusted_secondary_task(participant_number, left_or_right, sample_number):
    sampleDir = os.path.normpath("data2/" + participant_number + "/feet/" + left_or_right + "/samples/" + sample_number)
    
    csv_file = os.path.join(sampleDir, "raw_pressure.csv")

    df_raw = pd.read_csv(csv_file, delimiter=";", decimal=",", header=None, skiprows=4)
    df_raw.columns = [f"Column{i}" for i in range(len(df_raw.columns))]
    df_raw.drop(columns="Column0", axis=1, inplace=True)
    df_raw = df_raw[df_raw.sum(axis=1) != 0]
    average_non_zero = df_raw.mean()
    pressure_data = average_non_zero.tolist()
    # print([pressure_data])
    
    sampleDir_insole = os.path.normpath("data2/" + participant_number + "/feet/" + left_or_right + "/")
    
    points_file = os.path.join(sampleDir_insole, "insole.json")
    with open(points_file, 'r') as file:
        data = file.read()
        insole_data = json.loads(data)
    df = pd.json_normalize(insole_data) 
    
    # Populate the DataFrame with columns 'pointType_<number>' and corresponding values
    df['input_features'] = [pressure_data]
    
    df = df[['input_features','mfk_1_entlasten',
       'mfk_2_entlasten', 'mfk_3_entlasten', 'mfk_4_entlasten',
       'mfk_5_entlasten', 'zehe_1_entlasten', 'zehe_2_entlasten',
       'zehe_3_entlasten', 'zehe_4_entlasten', 'zehe_5_entlasten',
       'pelotten_hoehe', 'pelotten_form', 'laengsgewoelbe_hoehe',
       'basis_5_entlasten', 'aussenrand_anheben',
       'innenrand_anheben']]
    
    return df

In [None]:
df_final = pd.DataFrame()
remove_sway_flag = True

for participant_number in os.listdir("data2"):
    print("Participant number: " + participant_number)
    for left_or_right in ["left", "right"]:
        print(left_or_right)
        for sample_number in os.listdir("data2/"+ participant_number + "/feet/"+ left_or_right +"/samples/"):
            meta_data1 = load_metadata(participant_number, left_or_right,sample_number)
            if remove_sway_flag:
                if "walk" in meta_data1["pressure_type"]:
                    print(sample_number)
                    
                    # load the original data called pressure.csv given by Felix
                    # df = load_sample(participant_number, left_or_right,sample_number)
                    # load manipulated data applied on raw pressure (currently it is the average of non-zero values)
                    df = load_sample_rawAdjusted_secondary_task(participant_number, left_or_right,sample_number)
                    
                    df['participant_number'] = participant_number
                    df['left_or_right'] = left_or_right
                    df['sample_number'] = sample_number
                    df_final = pd.concat([df_final,df])
            else:
                print(sample_number)
                # load the original data called pressure.csv given by Felix
                # df = load_sample(participant_number, left_or_right,sample_number)
                # load manipulated data applied on raw pressure (currently it is the average of non-zero values)
                df = load_sample_rawAdjusted_secondary_task(participant_number, left_or_right,sample_number)
                df['participant_number'] = participant_number
                df['left_or_right'] = left_or_right
                df['sample_number'] = sample_number
                df_final = pd.concat([df_final,df])

df_final.reset_index(inplace=True,drop=True)

# df_final.to_pickle("data_latest.pkl")
# df_final.to_csv("data_latest.csv", index=False)

df_final.to_pickle("data_latest_average_non_zero_raw_secondary_task_repeated_insole.pkl")
df_final.to_csv("data_latest_average_non_zero_raw_secondary_task_repeated_insole.csv", index=False)

In [None]:
len(df_final)