In [None]:
import numpy as np
from pprint import pprint
import os, csv, json, re

import random, pickle 
import pandas as pd

from sklearn.datasets import make_regression
# from autosklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import train_test_split

import autosklearn

### This Part predicts the coordinates and save it to a predictedpoints.json

Inputs are the model file in pkl (you should put the full path here, currently it is in the same folder as the ipynb code file) and it will read the raw_pressure.csv from the path given (currently it is in the same folder with the file).

Output is the json file with data points (currently it outputs in the same folder)

In [None]:
# the file should always be called raw_pressure.csv, you can change it in the code, but it is hard coded in the load sample function
pressure_folder_path = "." # currently it is just the same location as the current file
json_output_path = "." # currently it outputs to the same folder

'''
the current model 'data_averaged_time5hours_perRun300_model.pkl' is 1 GB in size because of data augmentation, 
if you want a less accurate but smaller size model, use the no augmentation model 'data_averaged_time5hours_perRun300_model_no_augmentation.pkl'
''' 
model_file_full_path = 'data_averaged_time5hours_perRun300_model.pkl'  # here should be the full path for loading the model

In [None]:
def load_sample_xtest(test_path):
    sampleDir = os.path.normpath(test_path)
    csv_file = os.path.join(sampleDir, "raw_pressure.csv")

    df_raw = pd.read_csv(csv_file, delimiter=";", decimal=",", header=None, skiprows=4)
    df_raw.columns = [f"Column{i}" for i in range(len(df_raw.columns))]
    df_raw.drop(columns="Column0", axis=1, inplace=True)
    df_raw = df_raw[df_raw.sum(axis=1) != 0]
    average_non_zero = df_raw.mean()
    pressure_data = average_non_zero.tolist()
    
    return np.asarray(pressure_data).reshape(1,len(pressure_data))

In [None]:
x_test = load_sample_xtest(pressure_folder_path)
x_test.shape

In [None]:
# load model
with open(model_file_full_path, 'rb') as f:
    automl = pickle.load(f)

print(automl.leaderboard())

In [None]:
pprint(automl.show_models(), indent=4)

In [None]:
predictions = automl.predict(x_test)
# print("Mean absolute error score:", mean_absolute_error(y_test, predictions))

In [None]:
def save_sample(df, dtest_path):
    sampleDir = os.path.normpath(dtest_path)
    
    # Create the directory if it doesn't exist
    # os.makedirs(sampleDir, exist_ok=True)

    # Convert DataFrame to a list of dictionaries (each dictionary represents a point)
    points_data = []
    for row in df.items():
        # print(row[0],row[1])
        # print(int(re.findall(r'\d+', row[0])[0]))
        points_data.append({"points": row[1].to_list()[0], "pointType": int(re.findall(r'\d+', row[0])[0])})
        # print(points_data)
    
    # print(points_data)
    # Write the list of dictionaries to the points.json file
    points_file = os.path.join(sampleDir, "predictedPoints.json")
    # print(points_file)
    with open(points_file, 'w') as file:
        json.dump(points_data, file, indent=2)  # Indent for readability

In [None]:
y_prediction = np.asarray(predictions.tolist())

num_columns = 22  # Number of columns (pointType_<num>)
arr_3d = y_prediction.reshape(y_prediction.shape[0], num_columns, 2)


column_names = [f"pointType_{i}" for i in range(num_columns)]
data_dict = {col: arr_3d[:, i, :].tolist() for i, col in enumerate(column_names)}

# Create the DataFrame
df_reversed = pd.DataFrame(data_dict)

In [None]:
save_sample(df_reversed,json_output_path)

### This part predicts the insole parameters and print it, save it as 'insole.json' (using the same json output path specified at the beginning), and save all predictions to a csv file for reference

In [None]:

parameters_list = ['mfk_1_entlasten', 'mfk_2_entlasten',
       'mfk_3_entlasten', 'mfk_4_entlasten', 'mfk_5_entlasten',
       'zehe_1_entlasten', 'zehe_2_entlasten', 'zehe_3_entlasten',
       'zehe_4_entlasten', 'zehe_5_entlasten', 'pelotten_hoehe',
       'pelotten_form', 'laengsgewoelbe_hoehe', 'basis_5_entlasten',
       'aussenrand_anheben', 'innenrand_anheben']

for feature_num, feature_name in enumerate(parameters_list):


    print("Current Feature Classifier:", feature_name)



    # load model
    with open('data_averaged_time5hours_perRun300_secondary_task_'+ feature_name +'model.pkl', 'rb') as f:
        automl = pickle.load(f)

    # print(automl.leaderboard())

    # pprint(automl.show_models(), indent=4)


    predictions = automl.predict(x_test)

    print(predictions)

    df_reversed[feature_name] = predictions

# df_reversed.to_pickle("insole_parameters.pkl")


parameters_dict = df_reversed[parameters_list].T.to_dict()[0]

# Write the list of dictionaries to the points.json file
parameters_file = os.path.join(json_output_path, "insole.json")
# print(points_file)
with open(parameters_file, 'w') as file:
    json.dump(parameters_dict, file, indent=2)  # Indent for readability

df_reversed.to_csv("all_predictions.csv", index=False)