In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torch.nn.functional as F
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import math
import copy
import sys
from torch.utils.data import DataLoader, TensorDataset, Subset
from sklearn.model_selection import train_test_split
import pickle


In [15]:
def load_dataset(filename):
    with open(filename, 'rb') as f:
        data = pickle.load(f)
        return data

In [16]:
import os

raw_data_path  = '/home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/'
fileroot='24_09_09/'

# Create an empty numpy array to store the dataset
datasets = []
for file in os.listdir(raw_data_path+fileroot):
    if file.endswith(".pkl"):
        data_pkl = load_dataset(raw_data_path + fileroot + file)
        print("Loaded file: ", raw_data_path + fileroot + file)
        # trimp the data
        n = 0 # number of frame from the start
        m = 10 # number of frame from the end
        data_pkl = {key: value[n:-m] if m > 0 else value[n:] for key, value in data_pkl.items()}
        print("Data shape:", data_pkl['timestamp'].shape)
        datasets.append(data_pkl)



Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_06.pkl
Data shape: (47, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_04.pkl
Data shape: (39, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_01.pkl
Data shape: (49, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_10.pkl
Data shape: (44, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_11.pkl
Data shape: (41, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_09.pkl
Data shape: (37, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/raw_data/24_09_14/Khoa_05.pkl
Data shape: (45, 1)
Loaded file:  /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_pr

In [17]:
def robust_normalize_data_with_clipping(data, medians_per_joint_axis, iqrs_per_joint_axis, normalized_data, clipping_percentiles=(1, 99)):
    for joint in range(data.shape[1]):  # For each joint
        for axis in range(data.shape[2]):  # For each axis (x, y, z)
            joint_axis_data = data[:, joint, axis]
            # Determine clipping thresholds based on percentiles
            lower_threshold, upper_threshold = np.percentile(joint_axis_data, clipping_percentiles)
            # Clip the data based on thresholds
            clipped_values = np.clip(joint_axis_data, lower_threshold, upper_threshold)
            # Normalize the clipped data, avoiding division by zero
            if iqrs_per_joint_axis[joint, axis] > 0:
                normalized_values = (clipped_values - medians_per_joint_axis[joint, axis]) / iqrs_per_joint_axis[joint, axis]
            else:
                normalized_values = clipped_values  # Keep original values if IQR is 0
            # Store the normalized values
            normalized_data[:, joint, axis] = normalized_values
    return normalized_data


def calculate_combined_statistics(data_list):
    combined_data = np.concatenate(data_list, axis=0)
    medians = np.median(combined_data, axis=0)
    q75, q25 = np.percentile(combined_data, [75, 25], axis=0)
    iqrs = q75 - q25
    return medians, iqrs

def process_datasets_with_combined_normalization(datasets):
    results = {}
    pos_list, vel_list, acc_list = [], [], []
    for dataset in datasets:
        pos_list.append(dataset['points'])
        vel_list.append(dataset['velocity'])
        acc_list.append(dataset['acceleration'])

    # Calculate combined statistics
    medians_pos, iqrs_pos = calculate_combined_statistics(pos_list)
    medians_vel, iqrs_vel = calculate_combined_statistics(vel_list)
    medians_acc, iqrs_acc = calculate_combined_statistics(acc_list)

    print("Combined statistics:")
    print(f"  Position medians shape: {medians_pos.shape}")
    print(f"  Position IQRs shape: {iqrs_pos.shape}")
    print(f"  Velocity medians shape: {medians_vel.shape}")
    print(f"  Velocity IQRs shape: {iqrs_vel.shape}")

        
    for i, (pos, vel, acc) in enumerate(zip(pos_list, vel_list, acc_list), 1):
        print(i)
        print(pos.shape)
        norm_pos = np.empty_like(pos)
        norm_vel = np.empty_like(vel)
        norm_acc = np.empty_like(acc)
        # print(pos.shape[0])
        
        norm_pos = robust_normalize_data_with_clipping(pos, medians_pos, iqrs_pos, norm_pos)
        norm_vel = robust_normalize_data_with_clipping(vel, medians_vel, iqrs_vel, norm_vel)
        norm_acc = robust_normalize_data_with_clipping(acc, medians_acc, iqrs_acc, norm_acc)

        results[f"dataset{i}_normpos"] = norm_pos
        results[f"dataset{i}_normvel"] = norm_vel
        results[f"dataset{i}_normacc"] = norm_acc

        print(f"Calculated and normalized for dataset{i}:")
        print(f"  Position shape: {norm_pos.shape}")
        print(f"  Velocity shape: {norm_vel.shape}")
        print(f"  Acceleration shape: {norm_acc.shape}")
        print()

    # Store the combined statistics
    results["combined_medians_pos"] = medians_pos
    results["combined_iqrs_pos"] = iqrs_pos
    results["combined_medians_vel"] = medians_vel
    results["combined_iqrs_vel"] = iqrs_vel
    results["combined_medians_acc"] = medians_acc
    results["combined_iqrs_acc"] = iqrs_acc

    return results

# Usage example

results = process_datasets_with_combined_normalization(datasets)

Combined statistics:
  Position medians shape: (6, 3)
  Position IQRs shape: (6, 3)
  Velocity medians shape: (6, 3)
  Velocity IQRs shape: (6, 3)
1
(47, 6, 3)
Calculated and normalized for dataset1:
  Position shape: (47, 6, 3)
  Velocity shape: (47, 6, 3)
  Acceleration shape: (47, 6, 3)

2
(39, 6, 3)
Calculated and normalized for dataset2:
  Position shape: (39, 6, 3)
  Velocity shape: (39, 6, 3)
  Acceleration shape: (39, 6, 3)

3
(49, 6, 3)
Calculated and normalized for dataset3:
  Position shape: (49, 6, 3)
  Velocity shape: (49, 6, 3)
  Acceleration shape: (49, 6, 3)

4
(44, 6, 3)
Calculated and normalized for dataset4:
  Position shape: (44, 6, 3)
  Velocity shape: (44, 6, 3)
  Acceleration shape: (44, 6, 3)

5
(41, 6, 3)
Calculated and normalized for dataset5:
  Position shape: (41, 6, 3)
  Velocity shape: (41, 6, 3)
  Acceleration shape: (41, 6, 3)

6
(37, 6, 3)
Calculated and normalized for dataset6:
  Position shape: (37, 6, 3)
  Velocity shape: (37, 6, 3)
  Acceleration sh

In [18]:
# data=dataset2_pos
# norm_data=results["dataset2_normpos"]
# # Plot the original and normalized data for a specific joint and axis
# joint, axis = 0, 0  # Change as needed
# plt.figure(figsize=(12, 6))
# plt.subplot(1, 2, 1)
# plt.hist(data[:, joint, axis], bins=20, alpha=0.7, label='Original')
# plt.title("Original Data Distribution")
# plt.xlabel("Value")
# plt.ylabel("Frequency")

# plt.subplot(1, 2, 2)
# plt.hist(norm_data[:, joint, axis], bins=20, alpha=0.7, label='Normalized')
# plt.title("Normalized Data Distribution")
# plt.xlabel("Value")
# plt.ylabel("Frequency")

# plt.tight_layout()
# plt.show()

# # Check the median and range of the normalized data
# normalized_median = np.nanmedian(norm_data[:, joint, axis])
# print("Median of normalized data:", normalized_median)

# within_iqr = ((norm_data[:, joint, axis] > -2) & (norm_data[:, joint, axis] < 2)).sum()
# print(f"Data points within [-1, 1] (IQR): {within_iqr} out of {norm_data.shape[0]}")

# within_iqr2 = ((data[:, joint, axis] > -1) & (data[:, joint, axis] < 1)).sum()
# print(f"Original Data points within [-1, 1] (IQR): {within_iqr2} out of {norm_data.shape[0]}")

In [19]:

# def save_results_to_pickle(results, filename):
#     with open(filename, 'wb') as f:
#         pickle.dump(results, f)

#Add to the training DIC instead of new dic?
unnormalised = {}
for i in range(1, len(datasets)+1):
    unnormalised[f"dataset{i}_pos"] = datasets[i-1]['points']
    unnormalised[f"dataset{i}_vel"] = datasets[i-1]['velocity']
    unnormalised[f"dataset{i}_acc"] = datasets[i-1]['acceleration'] 

process_data_path = '/home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/process_data/'
file_root = '24_09_09/'
filename1 = 'training_raw.pkl'
filename2 = 'training_raw_normalize.pkl'
# Save the results
with open(process_data_path + file_root + filename1, 'wb') as f:
    pickle.dump(unnormalised, f)
print(f"Results saved to {process_data_path + file_root + filename1}")

# Save the results
with open(process_data_path + file_root + filename2, 'wb') as f:
    pickle.dump(results, f)
print(f"Results saved to {process_data_path + file_root + filename2}")


# Print the data
# for key, value in results.items():
#     print(f"Key: {key}")
#     print(f"Shape: {value.shape}")



Results saved to /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/process_data/24_09_14/training_raw.pkl
Results saved to /home/augustine/lfd_ws/src/skill_transfer/hmpar_former/data_process/process_data/24_09_14/training_raw_normalize.pkl
