In [58]:
import json
import pandas as pd
from IPython.display import display
pd.set_option('display.precision', 10)
import matplotlib.pyplot as plt
import numpy as np
from sklearn import linear_model
from sklearn.preprocessing import StandardScaler
scale = StandardScaler()
from tensorflow.keras.preprocessing.sequence import pad_sequences
%matplotlib qt

uuid = "99AAC7DE-4106-43AB-AA80-F5816AB4DB85"

class importData():
    def __init__(self):
        return
        
    def import_data(self, uuid, spacing):
        self.uuid = uuid
        raw_df, timestamps_df = self.import_uuid(uuid)
        trimmed_raw_df, trimmed_timestamps_df = self.trim_dataframes(raw_df, timestamps_df)
        split_data_list, split_timestamps_list = self.get_split_datasets(trimmed_raw_df, trimmed_timestamps_df, spacing)
        self.split_data_final, self.split_timestamps_final = self.preprocess_data(split_data_list, split_timestamps_list)
        self.d = self.split_data_final
        self.t = self.split_timestamps_final
        return self.d, self.t
        
    def import_uuid(self, uuid):
        
        # Define the file path
        raw_data_filepath = "Data/" + self.uuid + ".json"
        timestamps_filepath = "Data/Timestamps_" + self.uuid + ".json"
        
        raw_df = self.import_data_json(raw_data_filepath)
        timestamps_df = self.import_timestamp_json(timestamps_filepath)
        return (raw_df, timestamps_df)

    def import_data_json(self, filepath):
        # Read the JSON data from the file
        with open(filepath, 'r') as file:
            data = json.load(file)

        # Extract workoutDatas
        workout_datas = data["workoutDatas"]

        # Create a DataFrame
        df = pd.json_normalize(workout_datas)
        return df

    def import_timestamp_json(self, timestamps_filepath):
        # return pd.read_json(timestamps_filepath)
        with open(timestamps_filepath, 'r') as file:
            data = json.load(file)

        # Step 2: Structure the data
        # Create a dictionary to hold structured data
        structured_data = {
            "startTime": data["startTime"],
            "endTime": data["endTime"],
            "groundLeaveTimes": data["groundLeaveTimes"],
            "airLeaveTimes": data["airLeaveTimes"]
        }

        # Step 3: Create the DataFrame
        # If you want to have each time in a separate row, you can normalize the lists
        df_ground = pd.DataFrame(structured_data["groundLeaveTimes"], columns=["groundLeaveTimes"])
        df_air = pd.DataFrame(structured_data["airLeaveTimes"], columns=["airLeaveTimes"])

        # Combine into a single DataFrame if needed
        df = pd.concat([df_ground, df_air], axis=1)

        # Add startTime and endTime to every row if needed
        df["startTime"] = structured_data["startTime"]
        df["endTime"] = structured_data["endTime"]
        return df
    
    def trim_dataframes(self, raw_df, timestamps_df):
        mask = (raw_df['time'] >= timestamps_df['startTime'][0]) * (raw_df['time'] <= timestamps_df['endTime'][0])
        trimmed_raw_df = raw_df[mask]
        trimmed_raw_df["time"] = trimmed_raw_df["time"].apply(lambda x: x - timestamps_df["startTime"][0])
        timestamps_df["airLeaveTimes"] = timestamps_df["airLeaveTimes"].apply(lambda x: x - timestamps_df["startTime"][0])
        self.end_time = timestamps_df["endTime"][0] - timestamps_df["startTime"][0]
        trimmed_raw_df.pop("id")
        trimmed_raw_df.pop("motion.magneticField.x")
        trimmed_raw_df.pop("motion.magneticField.y")
        trimmed_raw_df.pop("motion.magneticField.z")
        trimmed_raw_df.pop("heartRate")
        trimmed_raw_df.pop("workoutType")
        trimmed_raw_df.pop("motion.rotationMatrix.m11")
        trimmed_raw_df.pop("motion.rotationMatrix.m12")
        trimmed_raw_df.pop("motion.rotationMatrix.m13")
        trimmed_raw_df.pop("motion.rotationMatrix.m21")
        trimmed_raw_df.pop("motion.rotationMatrix.m22")
        trimmed_raw_df.pop("motion.rotationMatrix.m23")
        trimmed_raw_df.pop("motion.rotationMatrix.m31")
        trimmed_raw_df.pop("motion.rotationMatrix.m32")
        trimmed_raw_df.pop("motion.rotationMatrix.m33")
        return (trimmed_raw_df, timestamps_df)

    def get_split_datasets(self, trimmed_raw_df, timestamps_df, spacing):
        start_10s_times = np.arange(0, np.ceil(self.end_time)-10, spacing)
        start_7s_times = np.arange(0, np.ceil(self.end_time)-7, spacing)
        print(start_7s_times)

        datasets_10s = []
        timestamps_10s = []

        sample_frequency = 100
        for start_time in start_7s_times:
            mask = (trimmed_raw_df["time"] > start_time).tolist()
            first_survive = mask.index(1)
            mask = np.array(mask)
            mask[first_survive + 7*sample_frequency:] = 0
            start_time_dataset = trimmed_raw_df[mask]
            start_time_dataset["time"] = start_time_dataset["time"] - start_time
            if len(start_time_dataset) == 7*sample_frequency:
                datasets_10s.append(start_time_dataset)
                mask = (timestamps_df["airLeaveTimes"] > start_time) * (timestamps_df["airLeaveTimes"] < start_time +7)
                start_time_timestamps = np.array(timestamps_df["airLeaveTimes"].tolist())[mask]
                start_time_timestamps = start_time_timestamps - start_time
                timestamps_10s.append(start_time_timestamps)
        return (datasets_10s, timestamps_10s)

    def preprocess_data(self, datasets, timestamp_datasets):
        evaluated_datasets = []
        for dataset in datasets:
            x = dataset.values.reshape(-1,10)
            evaluated_datasets.append(x)

        max_seq_length = max(len(seq) for seq in timestamp_datasets)
        Y_padded = pad_sequences(timestamp_datasets, maxlen=max_seq_length, padding='post', dtype='float32')
        num_samples = len(evaluated_datasets)
        Y_padded = Y_padded.reshape(num_samples, max_seq_length, 1)
        X = np.array(evaluated_datasets).reshape(num_samples, 10, 700, 1)
        return X, Y_padded

In [59]:
# import_data_1 = importData(uuid, 0.1)
# import_data_1.t
import_tool = importData()
d, t = import_tool.import_data(uuid, 0.1)
print(d)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  trimmed_raw_df["time"] = trimmed_raw_df["time"].apply(lambda x: x - timestamps_df["startTime"][0])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  start_time_dataset["time"] = start_time_dataset["time"] - start_time


[0.000e+00 1.000e-01 2.000e-01 ... 1.047e+02 1.048e+02 1.049e+02]
[[[[ 3.58486176e-03]
   [ 9.88531113e-03]
   [ 2.53720880e-02]
   ...
   [ 5.92723191e-02]
   [ 9.97607350e-01]
   [ 3.55814546e-02]]

  [[ 8.03950787e-01]
   [-1.37292325e-01]
   [ 3.22643667e-03]
   ...
   [-4.34377454e-02]
   [ 7.30344832e-01]
   [-6.81696236e-01]]

  [[ 1.60455775e+00]
   [ 4.70404327e-01]
   [ 2.20400915e-01]
   ...
   [-8.78408611e-01]
   [-1.46480143e-01]
   [-4.54908490e-01]]

  ...

  [[ 5.27552295e+00]
   [ 1.76197112e-01]
   [ 2.73867399e-02]
   ...
   [-7.85975277e-01]
   [-2.62954563e-01]
   [-5.59551537e-01]]

  [[ 6.07728982e+00]
   [ 8.46442580e-03]
   [ 7.95156956e-02]
   ...
   [-8.18318367e-01]
   [-5.71033180e-01]
   [ 6.53926954e-02]]

  [[ 6.82721496e+00]
   [-1.17825091e-01]
   [ 1.02417469e-02]
   ...
   [-8.72837484e-01]
   [-3.80014330e-01]
   [-3.06176275e-01]]]


 [[[ 7.97748566e-05]
   [ 4.35042381e-02]
   [-4.42981720e-03]
   ...
   [ 3.31909247e-02]
   [ 9.97830570e-01]
   

In [66]:
import tensorflow as tf

index_to_check = 1000
# Load the TFLite model and allocate tensors
interpreter = tf.lite.Interpreter(model_path="my_model.tflite")
interpreter.allocate_tensors()
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_data = d.astype(np.float32)[index_to_check]
input_data = input_data.reshape(1,10,700,1)
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()
output_data = interpreter.get_tensor(output_details[0]['index'])

print(output_data)
print(t[index_to_check])


[[[ 2.160213  ]
  [ 5.3006144 ]
  [-0.03113973]]]
[[2.5909085]
 [6.0209255]
 [0.       ]]
