In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler


In [None]:
# Constants
ORIGINAL_DATA_PATH = "../../sim_mujoco/data/"
EXPORT_DATA_PATH = "./updated_data/"
ORIGINAL_NAME = "in_place_long"
EXPORT_VER = "v1"

INPUT_FILE = ORIGINAL_DATA_PATH + ORIGINAL_NAME + ".csv"
OUTPUT_FILE = EXPORT_DATA_PATH + ORIGINAL_NAME + "_" + EXPORT_VER + ".csv"


data_rel_paths = [
    # "../../sim_mujoco/data/dataset_backwards.csv", "../../sim_mujoco/data/dataset_forward_sideways.csv", "../../sim_mujoco/data/dataset_misc.csv"
    # "../../sim_mujoco/data/in_place_long.csv"
    INPUT_FILE
]

In [None]:
state_columns = [
    "L_YAW_pos", "L_HAA_pos", "L_HFE_pos", "L_KFE_pos", "L_ANKLE_pos",
    "R_YAW_pos", "R_HAA_pos", "R_HFE_pos", "R_KFE_pos", "R_ANKLE_pos",
    "L_YAW_vel", "L_HAA_vel", "L_HFE_vel", "L_KFE_vel", "L_ANKLE_vel",
    "R_YAW_vel", "R_HAA_vel", "R_HFE_vel", "R_KFE_vel", "R_ANKLE_vel", 
    "vel_x_BF", "vel_y_BF", "vel_z_BF", "normal_vec_x_BF", "normal_vec_y_BF", "normal_vec_z_BF", 
    "omega_x", "omega_y", "omega_z", "vx_des_BF", "vy_des_BF", 
    # "right_foot_t_since_contact", "right_foot_t_since_no_contact", 
    "right_foot_t_since_contact", 
    "right_foot_pos_x_BF", "right_foot_pos_y_BF", "right_foot_pos_z_BF",
    # "left_foot_t_since_contact", "left_foot_t_since_no_contact",
    "left_foot_t_since_contact",
    "left_foot_pos_x_BF", "left_foot_pos_y_BF", "left_foot_pos_z_BF"
]

action_columns = [
    "L_YAW_tau_ff", "L_HAA_tau_ff", "L_HFE_tau_ff", "L_KFE_tau_ff", "L_ANKLE_tau_ff",
    "R_YAW_tau_ff", "R_HAA_tau_ff", "R_HFE_tau_ff", "R_KFE_tau_ff", "R_ANKLE_tau_ff",
    "L_YAW_q_des", "L_HAA_q_des", "L_HFE_q_des", "L_KFE_q_des", "L_ANKLE_q_des",
    "R_YAW_q_des", "R_HAA_q_des", "R_HFE_q_des", "R_KFE_q_des", "R_ANKLE_q_des",
    "L_YAW_q_vel_des", "L_HAA_q_vel_des", "L_HFE_q_vel_des", "L_KFE_q_vel_des", "L_ANKLE_q_vel_des",
    "R_YAW_q_vel_des", "R_HAA_q_vel_des", "R_HFE_q_vel_des", "R_KFE_q_vel_des", "R_ANKLE_q_vel_des"
]

dataset = pd.DataFrame()
for dp in data_rel_paths:
    ds = pd.read_csv(dp)
    dataset = pd.concat((dataset, ds))
num_steps = dataset.shape[0]
states = dataset[state_columns].to_numpy(dtype=np.float64)
actions = dataset[action_columns].to_numpy(dtype=np.float64)

# remove the first 1000 data points, seems like noise (when the robot is floating down.)
states = states[1000:, :]
actions = actions[1000:, :]

In [None]:
# --- VIEW RAW INPUT STATES IN A LINE PLOT
# okay now just try to scale the data
def view_data(factor, data, data_columns):
    plt.figure()
    # create multiple subplots to compare the actions
    for j in range(factor):
        for ii in range(len(data_columns) // factor):
            plt.subplot(1, 5, ii + 1)
            index = (j * (len(data_columns) // factor)) + ii
            plt.plot(data[:, index])
            plt.title(data_columns[index])
        plt.tight_layout()
        plt.show()


# print(len(state_columns))
# view_data(8, states[:1000, :], state_columns)

# since it has 41 columns.
# plt.plot(states[:, len(state_columns) - 1])
# plt.title(state_columns[len(state_columns) - 1])
# plt.show()

In [None]:
# --- create multiple subplots to compare the actions
# view_data(6, actions, action_columns)

In [None]:
# -- See the distribution of the actions
def plot_hist(data, data_columns):
    for i in range(data.shape[1]):
        plt.hist(data[:, i])
        plt.title(data_columns[i])
        plt.show()

# plot_hist(states, state_columns)
# plot_hist(actions, action_columns)

In [None]:
# --- view the covariance matrix of the scaled states
# scale the data then plot the heatmap
scaler_s = MinMaxScaler()
# fit the scaler, and then apply the transform right away
scaled_states = scaler_s.fit_transform(states)

# view_data(8, scaled_states, state_columns)

df = pd.DataFrame(scaled_states, columns=state_columns)
cov_matrix = df.cov()
# sns.heatmap(cov_matrix)
 
# --- view the covariance matrix of the actions, removing two columns
# action_columns2 = [
#     "L_YAW_tau_ff", "L_HAA_tau_ff", "L_HFE_tau_ff", "L_KFE_tau_ff", "L_ANKLE_tau_ff",
#     "R_YAW_tau_ff", "R_HAA_tau_ff", "R_HFE_tau_ff", "R_KFE_tau_ff", "R_ANKLE_tau_ff",
#     "L_YAW_q_des", "L_HAA_q_des", "L_HFE_q_des", "L_KFE_q_des", "L_ANKLE_q_des",
#     "R_YAW_q_des", "R_HAA_q_des", "R_HFE_q_des", "R_KFE_q_des", "R_ANKLE_q_des",
#     "L_YAW_q_vel_des", "L_HAA_q_vel_des", "L_HFE_q_vel_des", "L_KFE_q_vel_des", "L_ANKLE_q_vel_des",
#     "R_YAW_q_vel_des", "R_HAA_q_vel_des",
# ]
# actions2 = dataset[action_columns2].to_numpy(dtype=np.float64)
# df2 = pd.DataFrame(actions2, columns=action_columns2)
# sns.heatmap(df2.cov())

In [None]:
# SAVE THE NORMALIZED DATA
# combine the scaled states and actions
scaled_data = np.concatenate((scaled_states, actions), axis=1)

# create a combined pandas dataframe
df = pd.DataFrame(scaled_data, columns=state_columns + action_columns)

# save the data
df.to_csv(OUTPUT_FILE, index=False)