# Description

This notebook takes in the sim_cahce file from the benrules_v2 simulator that runs random simulations on a given number of satellites.  We will extract the satellite data from all the the run simulations and combine into a single pandas dataframe. \

First part of the notebook is dedicated to visualizing satellite position over time.

# Imports

In [1]:
# Data Processing Libraries
import numpy as np
import pandas as pd
# Randomization Libraries to Select Random Points in the simulation.
import random
# Libraries for file reading
import h5py
# Bokeh and plotting related imports
# Plotting Imports
import bokeh.io
bokeh.io.output_notebook()  # Set plot output to embed in notebook.
import bokeh.layouts
import bokeh.plotting
# Other imports for multi-plot figures.
from bokeh.io import output_file, show
from bokeh.layouts import column
from bokeh.plotting import figure
# Memory management libraries for Python
import gc
# Progress Bar Libraries
from tqdm import tqdm

In [2]:
# Remove max columns and row limit on pandas
pd.options.display.max_columns = None
pd.options.display.max_rows = 50000

# Setup Plotting Functions

In [3]:
from bokeh.palettes import Turbo256 as palette
import itertools
from random import randint

def plot_2D_body_time_series(pos_x_list, pos_y_list, plot_width, plot_height, title):
    """
    Accepts lists for x and y dimensions whose elements are time series data and whose
    index represents the number of the body in the simulation.
    
    returns Bokeh figure to plot.
    """
    # Create Bokeh figure to add plots to
    f = bokeh.plotting.figure(
        title = title,
        plot_width = plot_width,
        plot_height = plot_height
    )

    # Generate line for each body.
    # Randomly select color from palette using randint
    for i in range(0,len(pos_x_list)):
        f.line(
            pos_x_list[i],
            pos_y_list[i],
            line_width = 1,
            color = palette[randint(0,255)],
            legend_label = str(i)
        )
    f.legend.location = 'top_left'
    return f

def two_dim_numpy_data_to_2D_pos_list(sat_pos_list):
    """
    Take in a list of satellite position data and split into X and Y position data lists for 
    each dimension.
    """
    pos_x_list = []
    pos_y_list = []
    # Take the 2D (X and Y) time series elements of the sat_pos_list and split into the 
    # Pos_x and Pos_y lists
    for satellite_pos in sat_pos_list:
        temp_x_time_series = satellite_pos[:,0]
        temp_y_time_series = satellite_pos[:,1]
        pos_x_list.append(temp_x_time_series)
        pos_y_list.append(temp_y_time_series)
    return pos_x_list, pos_y_list

def plot_data_conv_3D_np_pos_to_2D_pos_list(pos_np_array):
    """
    Accepts 3D position numpy array and extracts the time steps for each body from 
    the numpy array.  Adds those time series for each dimension to a list of 1D numpy arrays.
    """
    # List of bodies for each dimension whose elements are numpy arrays going back in time.  
    # Each element is a time series for that body and that body's dimension.
    pos_x_list = []
    pos_y_list = []
    # First index of the numpy array is the time step and second is the body number.  Will loop
    # over all bodies and slice out the time steps for each body.
    for current_body_index in range(0, pos_np_array.shape[1]):
        # Get slice of data to create time series of position values
        # Use .to_numpy() to convert to numpy array
        temp_x_time_series = pos_np_array[:, current_body_index, 0]
        temp_y_time_series = pos_np_array[:, current_body_index, 1]
        # Add time series to respective dimension.
        pos_x_list.append(temp_x_time_series)
        pos_y_list.append(temp_y_time_series)
    
    # TODO: Continue with this function to get the position data plotted.
    return pos_x_list, pos_y_list

# Read in HDF5 Files to Lists of Numpy Arrays

In [4]:
# Set folder to read hdf5 cache file from.
in_data_folder = "input_data/"
# Body index in the numpy arrays from the simulation that designates the satellite from 
# the planets
sat_index: int = 10
# Lists for storing the satellite data from each simulation in the cache.
acc_list = []
vel_list = []
pos_list = []
dis_list = []
mass_list = []

# Open the sim_cache, get the data groups, and extract datasets from it.
with h5py.File(in_data_folder + 'sim_cache.hdf5', 'r') as f:
    # Get all the groups and data_set keys
    group_keys = []
    for key in f.keys():
        group_keys.append(key)
    # Loop over all groups in the dataset and extract the numpy arrays
    for group_name in group_keys:
        group = f[group_name]
        # Grab the raw data from the file.
        temp_acc = group['acc'][()]
        temp_vel = group['vel'][()]
        temp_pos = group['pos'][()]
        temp_dis = group['dis'][()]
        temp_mass = group['mass'][()]
        # Extract the Satellite data only
        # Only grabbing X and Y dimensions
        temp_acc = temp_acc[:, sat_index, 0:2]
        temp_vel = temp_vel[:, sat_index, 0:2]
        temp_pos = temp_pos[:, sat_index, 0:2]
        temp_dis = temp_dis[:, sat_index, 0:2]
        temp_mass = temp_mass[sat_index, 0]
        
        acc_list.append(temp_acc)
        vel_list.append(temp_vel)
        pos_list.append(temp_pos)
        dis_list.append(temp_dis)
        mass_list.append(temp_mass)

In [5]:
pos_list[0]

array([[-1.29314456e+11,  7.45151283e+10],
       [-1.28946143e+11,  7.43161938e+10],
       [-1.28577823e+11,  7.41172511e+10],
       ...,
       [ 2.47347397e+13, -2.21999478e+13],
       [ 2.47350501e+13, -2.22002267e+13],
       [ 2.47353605e+13, -2.22005056e+13]], dtype=float32)

In [6]:
pos_list[0][:,0].shape

(79999,)

In [7]:
mass_list

[1125.0, 857.0, 315.0, 2448.0, 2139.0, 2989.0]

# Plot the path of the Specified Satellite

In [8]:
# Convert all the satellite data to a list of time series for plotting.
# Will plot all satellites on the same plot.
pos_x_list, pos_y_list = two_dim_numpy_data_to_2D_pos_list(pos_list)

In [9]:
# Plot the converted data.
fig = plot_2D_body_time_series(
    pos_x_list = pos_x_list,
    pos_y_list = pos_y_list,
    plot_width = 800,
    plot_height = 800,
    title = "Paths of All Satellites for All Simulations"
)

bokeh.plotting.show(fig)

# Convert Acceleration, Velocity, Displacement, and Position Numpy Arrays to Pandas Dataframes

In [10]:
# Set the number of "shotgun" future time steps to predict.
num_ts_to_predict = 10

### Converting Acceleration to Pandas DataFrame

In [11]:
acc_list[0]

array([[ 5.6652954e+02, -3.2639435e+02],
       [ 3.2207565e-03, -1.7782038e-03],
       [ 4.7077877e-03, -2.6938766e-03],
       ...,
       [-8.9558881e-08,  8.0381902e-08],
       [-8.9556629e-08,  8.0379884e-08],
       [-8.9554383e-08,  8.0377866e-08]], dtype=float32)

In [12]:
# Stack all the satellites as bodies into a single numpy array.
stacked_raw_acc = np.stack(acc_list, axis=1)
stacked_raw_acc.shape

(79999, 6, 2)

In [13]:
# Construct the acceleration dataframe from the raw simulator velocity output data.
# Get the size of each dimension in numpy array.
# m-> the number of time steps in the simulation.
# n-> the number of bodies in the simulation.
# r-> number of dimensions in the vector holding the acceleration, displacement, etc.
acc_m,acc_n,acc_r = stacked_raw_acc.shape
# Stack the XY or XYZ arrays of each body into columns, removing a dimension.
# np.column_stack() -> takes a sequence of 1D arrays and stacks them as columns in a 2D matrix.
# np.arange() provides evenly spaced values that repeat n times.  The new index.
# Had to add 1 to arange to go from 1 to 79 instead of 0.
acc_arr = np.column_stack(
    (np.repeat(np.arange(1, acc_m+1, 1),acc_n), 
     np.tile(np.arange(0,acc_n,1),acc_m), 
     stacked_raw_acc[:,:,0:2].reshape(acc_m*acc_n,-1))
)

# Create dataframe from stacked column array.
acc_df = pd.DataFrame(acc_arr)
# Delete intermediate numpy arrays and original data from memory.
del acc_arr
gc.collect()
# Use df.groupby() to group by time step or planent and create MultiIndex for easy data referencing.
acc_df = acc_df.groupby([0,1]).mean()
acc_df.index.names = ['time_step', 'body']
acc_df.columns = ['acc_x', 'acc_y']
# Swapping to time_step then body to keep compatibility with existing code.
#acc_df = acc_df.swaplevel('time_step', 'body').sort_index(level=0)
#acc_df.head(15)
idx = pd.IndexSlice
acc_df.loc[idx[1, :], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,0.0,566.529541,-326.394348
1.0,1.0,0.005125,-0.002897
1.0,2.0,0.00506,-0.002811
1.0,3.0,0.004966,-0.002741
1.0,4.0,0.004897,-0.002705
1.0,5.0,0.004874,-0.002688


In [14]:
# Drop the time steps that could not be used for calculating displacecment.
# Create list of time steps to drop.
beg_drop_index = max(acc_df.index.levels[0]) - num_ts_to_predict + 1
end_drop_index = max(acc_df.index.levels[0]) + 1
drop_list = list(range(int(beg_drop_index), int(end_drop_index)))
# Drop the time steps from the displacement dataframe.
acc_df.drop(drop_list, level=0, inplace=True)
# Ouput the new dataframe
acc_df.head(10)

Unnamed: 0_level_0,Unnamed: 1_level_0,acc_x,acc_y
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1
1.0,0.0,566.529541,-326.394348
1.0,1.0,0.005125,-0.002897
1.0,2.0,0.00506,-0.002811
1.0,3.0,0.004966,-0.002741
1.0,4.0,0.004897,-0.002705
1.0,5.0,0.004874,-0.002688
2.0,0.0,0.003221,-0.001778
2.0,1.0,0.005125,-0.002897
2.0,2.0,0.00506,-0.002812
2.0,3.0,0.004966,-0.002741


In [15]:
acc_df.shape

(479934, 2)

### Converting Velocity to Pandas DataFrame

Dataframes are really slow.  We will need to calculate the future velocities for num_ts_to_predict so that we can shotgun predict multiple time steps at the same time.  We want to do this with numpy arrays before converting to a pandas dataframe for easy grouping and indexing. \
Will first need to go through and calculate the future time steps to shotgun-predict.

In [16]:
vel_list[0]

array([[ 460401.22, -248657.19],
       [ 460403.78, -248658.61],
       [ 460407.56, -248660.77],
       ...,
       [ 388432.9 , -348194.97],
       [ 388432.9 , -348194.97],
       [ 388432.9 , -348194.97]], dtype=float32)

In [17]:
# Stack all the satellites as bodies into a single numpy array.
stacked_raw_vel = np.stack(vel_list, axis=1)
stacked_raw_vel.shape

(79999, 6, 2)

In [18]:
# Create a numpy array with the same first 2 dimensions of the input data (time steps and num bodies)
# the number of columns will be the same as the number of time steps
# being predicted multiplied by the dimensions (2) (X and Y)
vel_pred_cache = np.full(
    (stacked_raw_vel.shape[0], stacked_raw_vel.shape[1], num_ts_to_predict*2),
    np.nan,
    dtype=np.float32
)
# Loop over all time steps in the numpy array.
# Grab groups of bodies from future time steps
for curr_time_step in tqdm(range(0, stacked_raw_vel.shape[0])):
    # Loop over the number of time steps in the future to be grabbing
    # velocities from.
    # Don't do last time steps for displacement dataframe.  Can only look so many
    # time steps into the future before running out of data.
    if curr_time_step < (stacked_raw_vel.shape[0] - num_ts_to_predict):
        for num_ts_in_future in range(1, num_ts_to_predict + 1):
            vel_pred_cache[int(curr_time_step), :, int(2*num_ts_in_future-2):int(2*num_ts_in_future)] = \
            stacked_raw_vel[int(curr_time_step + num_ts_in_future), :, 0:2]
            
# Concatenate the cached numpy array to the original velocity dataset
stacked_raw_vel = np.concatenate((stacked_raw_vel[:,:,0:2], vel_pred_cache), axis=2)
# Delete the cache.
del vel_pred_cache
gc.collect()
#Display first portion of the dataframe
pd.DataFrame(stacked_raw_vel[0])

100%|██████████| 79999/79999 [00:01<00:00, 59865.60it/s]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
0,460401.21875,-248657.1875,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125
1,5888.745605,10211.786133,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633
2,5772.87793,10010.830078,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195
3,6139.719727,10647.751953,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547
4,6677.309082,11580.982422,6681.226074,11578.817383,6685.143066,11576.652344,6689.060059,11574.487305,6692.976562,11572.322266,6696.893066,11570.15625,6700.80957,11567.990234,6704.725586,11565.824219,6708.641602,11563.658203,6712.557617,11561.491211,6716.473633,11559.324219
5,6265.945312,10867.014648,6269.844727,10864.864258,6273.744141,10862.713867,6277.643066,10860.5625,6281.541992,10858.411133,6285.440918,10856.259766,6289.339844,10854.108398,6293.238281,10851.956055,6297.136719,10849.803711,6301.035156,10847.651367,6304.933105,10845.499023


In [19]:
# Construct the velocity dataframe from the raw simulator velocity output data.
# Get the size of each dimension in numpy array.
# m-> the number of time steps in the simulation.
# n-> the number of bodies in the simulation.
# r-> number of dimensions in the vector holding the acceleration, displacement, etc.
vel_m,vel_n,vel_r = stacked_raw_vel.shape
# Stack the XY or XYZ arrays of each body into columns, removing a dimension.
# np.column_stack() -> takes a sequence of 1D arrays and stacks them as columns in a 2D matrix.
# np.arange() provides evenly spaced values that repeat n times.  The new index.
# Had to add 1 to arange to go from 1 to 79 instead of 0.
vel_arr = np.column_stack(
    (np.repeat(np.arange(1, vel_m+1, 1),vel_n), 
     np.tile(np.arange(0,vel_n,1),vel_m), 
    stacked_raw_vel.reshape(vel_m*vel_n,-1))
)
# Create dataframe from stacked column array.
vel_df = pd.DataFrame(vel_arr)
# Delete intermediate numpy arrays and original data from memory.
del vel_arr
gc.collect()
# Use df.groupby() to group by time step or planent and create MultiIndex for easy data referencing.
vel_df = vel_df.groupby([0,1]).mean()
vel_df.index.names = ['time_step', 'body']
# Drop the time steps that could not be used for getting future data.
# Create list of time steps to drop.
beg_drop_index = max(vel_df.index.levels[0]) - num_ts_to_predict + 1
end_drop_index = max(vel_df.index.levels[0]) + 1
drop_list = list(range(int(beg_drop_index), int(end_drop_index)))
vel_df.drop(drop_list, level=0, inplace=True)
# Create list of columns 
cols = ['vel_x', 'vel_y']
for i in range(1, num_ts_to_predict+1):
    cols.append('vel_x_' + str(i))
    cols.append('vel_y_' + str(i))
vel_df.columns = cols
# View final velocity dataframe with future time steps.
idx = pd.IndexSlice
vel_df.loc[idx[0:2,:], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,vel_x,vel_y,vel_x_1,vel_y_1,vel_x_2,vel_y_2,vel_x_3,vel_y_3,vel_x_4,vel_y_4,vel_x_5,vel_y_5,vel_x_6,vel_y_6,vel_x_7,vel_y_7,vel_x_8,vel_y_8,vel_x_9,vel_y_9,vel_x_10,vel_y_10
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
1.0,0.0,460401.21875,-248657.1875,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125
1.0,1.0,5888.745605,10211.786133,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633
1.0,2.0,5772.87793,10010.830078,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195
1.0,3.0,6139.719727,10647.751953,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547
1.0,4.0,6677.309082,11580.982422,6681.226074,11578.817383,6685.143066,11576.652344,6689.060059,11574.487305,6692.976562,11572.322266,6696.893066,11570.15625,6700.80957,11567.990234,6704.725586,11565.824219,6708.641602,11563.658203,6712.557617,11561.491211,6716.473633,11559.324219
1.0,5.0,6265.945312,10867.014648,6269.844727,10864.864258,6273.744141,10862.713867,6277.643066,10860.5625,6281.541992,10858.411133,6285.440918,10856.259766,6289.339844,10854.108398,6293.238281,10851.956055,6297.136719,10849.803711,6301.035156,10847.651367,6304.933105,10845.499023
2.0,0.0,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125,460445.5625,-248682.640625
2.0,1.0,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633,5933.837402,10186.27832
2.0,2.0,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195,5817.398438,9986.073242
2.0,3.0,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547,6183.415039,10623.615234


In [20]:
vel_df.shape

(479934, 22)

### Converting Displacement to Pandas DataFrame

In [21]:
# Stack all the satellites as bodies into a single numpy array.
stacked_raw_dis = np.stack(dis_list, axis=1)
stacked_raw_dis.shape

(79999, 6, 2)

In [22]:
# Create a numpy array with the same first 2 dimensions of the input data (time steps and num bodies)
# the number of columns will be the same as the number of time steps
# being predicted multiplied by the dimensions (2) (X and Y)
dis_pred_cache = np.full(
    (stacked_raw_dis.shape[0], stacked_raw_dis.shape[1], num_ts_to_predict*2),
    np.nan,
    dtype=np.float32
)
# Loop over all time steps in the numpy array.
# Grab groups of bodies from future time steps
for curr_time_step in tqdm(range(0, stacked_raw_dis.shape[0])):
    # Loop over the number of time steps in the future to be grabbing
    # velocities from.
    # Don't do last time steps for displacement dataframe.  Can only look so many
    # time steps into the future before running out of data.
    if curr_time_step < (stacked_raw_dis.shape[0] - num_ts_to_predict):
        for num_ts_in_future in range(1, num_ts_to_predict + 1):
            dis_pred_cache[int(curr_time_step), :, int(2*num_ts_in_future-2):int(2*num_ts_in_future)] = \
            stacked_raw_dis[int(curr_time_step + num_ts_in_future), :, 0:2]
            
# Concatenate the cached numpy array to the original displacement dataset
stacked_raw_dis = np.concatenate((stacked_raw_dis[:,:,0:2], dis_pred_cache), axis=2)
# Delete the cache.
del dis_pred_cache
gc.collect()
#Display first portion of the dataframe
pd.DataFrame(stacked_raw_dis[0])

100%|██████████| 79999/79999 [00:01<00:00, 59213.25it/s]


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21
0,368320960.0,-198925744.0,368323040.0,-198926880.0,368326048.0,-198928608.0,368329248.0,-198930448.0,368332512.0,-198932336.0,368335840.0,-198934256.0,368339232.0,-198936192.0,368342624.0,-198938144.0,368346048.0,-198940112.0,368349504.0,-198942096.0,368352960.0,-198944096.0
1,4710996.5,8169429.0,4714276.5,8167575.0,4717556.0,8165721.0,4720836.0,8163866.5,4724115.5,8162011.5,4727395.5,8160157.0,4730674.5,8158301.5,4733954.0,8156446.0,4737233.0,8154590.5,4740512.0,8152735.0,4743791.0,8150879.0
2,4618302.5,8008664.0,4621540.5,8006865.0,4624779.0,8005065.0,4628017.0,8003265.0,4631255.0,8001465.0,4634493.0,7999665.0,4637731.0,7997864.0,4640969.0,7996063.5,4644206.0,7994262.5,4647444.0,7992461.5,4650681.0,7990660.0
3,4911776.0,8518202.0,4914954.5,8516447.0,4918132.5,8514692.0,4921310.5,8512938.0,4924488.5,8511183.0,4927667.0,8509427.0,4930844.5,8507672.0,4934022.5,8505916.0,4937200.0,8504161.0,4940377.5,8502405.0,4943554.5,8500648.0
4,5341847.5,9264786.0,5344981.0,9263054.0,5348114.5,9261322.0,5351248.0,9259590.0,5354381.0,9257858.0,5357514.5,9256125.0,5360647.5,9254392.0,5363780.5,9252659.0,5366913.5,9250927.0,5370046.0,9249193.0,5373179.0,9247459.0
5,5012756.0,8693612.0,5015876.0,8691891.0,5018995.5,8690171.0,5022114.5,8688450.0,5025233.5,8686729.0,5028352.5,8685008.0,5031472.0,8683287.0,5034590.5,8681565.0,5037709.5,8679843.0,5040828.0,8678121.0,5043946.5,8676399.0


In [23]:
# Construct the displacement dataframe from the raw simulator displacement output data.
# Get the size of each dimension in numpy array.
# m-> the number of time steps in the simulation.
# n-> the number of bodies in the simulation.
# r-> number of dimensions in the vector holding the acceleration, displacement, etc.
dis_m,dis_n,dis_r = stacked_raw_dis.shape
# Stack the XY or XYZ arrays of each body into columns, removing a dimension.
# np.column_stack() -> takes a sequence of 1D arrays and stacks them as columns in a 2D matrix.
# np.arange() provides evenly spaced values that repeat n times.  The new index.
# Had to add 1 to arange to go from 1 to 79 instead of 0.
dis_arr = np.column_stack(
    (np.repeat(np.arange(1, dis_m+1, 1),dis_n), 
     np.tile(np.arange(0,dis_n,1),dis_m), 
     stacked_raw_dis.reshape(dis_m*dis_n,-1))
)
# Create dataframe from stacked column array.
dis_df = pd.DataFrame(dis_arr)
# Delete intermediate numpy arrays and original data from memory.
del dis_arr
gc.collect()
# Use df.groupby() to group by time step or planent and create MultiIndex for easy data referencing.
dis_df = dis_df.groupby([0,1]).mean()
dis_df.index.names = ['time_step', 'body']
# Drop the time steps that could not be used for getting future data.
# Create list of time steps to drop.
beg_drop_index = max(dis_df.index.levels[0]) - num_ts_to_predict + 1
end_drop_index = max(dis_df.index.levels[0]) + 1
drop_list = list(range(int(beg_drop_index), int(end_drop_index)))
dis_df.drop(drop_list, level=0, inplace=True)
# Create list of columns 
cols = ['dis_x', 'dis_y']
for i in range(1, num_ts_to_predict+1):
    cols.append('dis_x_' + str(i))
    cols.append('dis_y_' + str(i))
dis_df.columns = cols
# Drop dis_x and dis_y columns since we won't need then for ML training input.
dis_df.drop(['dis_x', 'dis_y'], axis=1,inplace=True)
# View final velocity dataframe with future time steps.
idx = pd.IndexSlice
dis_df.loc[idx[0:2,:], :]

Unnamed: 0_level_0,Unnamed: 1_level_0,dis_x_1,dis_y_1,dis_x_2,dis_y_2,dis_x_3,dis_y_3,dis_x_4,dis_y_4,dis_x_5,dis_y_5,dis_x_6,dis_y_6,dis_x_7,dis_y_7,dis_x_8,dis_y_8,dis_x_9,dis_y_9,dis_x_10,dis_y_10
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1.0,0.0,368323040.0,-198926880.0,368326048.0,-198928608.0,368329248.0,-198930448.0,368332512.0,-198932336.0,368335840.0,-198934256.0,368339232.0,-198936192.0,368342624.0,-198938144.0,368346048.0,-198940112.0,368349504.0,-198942096.0,368352960.0,-198944096.0
1.0,1.0,4714276.5,8167575.0,4717556.0,8165721.0,4720836.0,8163866.5,4724115.5,8162011.5,4727395.5,8160157.0,4730674.5,8158301.5,4733954.0,8156446.0,4737233.0,8154590.5,4740512.0,8152735.0,4743791.0,8150879.0
1.0,2.0,4621540.5,8006865.0,4624779.0,8005065.0,4628017.0,8003265.0,4631255.0,8001465.0,4634493.0,7999665.0,4637731.0,7997864.0,4640969.0,7996063.5,4644206.0,7994262.5,4647444.0,7992461.5,4650681.0,7990660.0
1.0,3.0,4914954.5,8516447.0,4918132.5,8514692.0,4921310.5,8512938.0,4924488.5,8511183.0,4927667.0,8509427.0,4930844.5,8507672.0,4934022.5,8505916.0,4937200.0,8504161.0,4940377.5,8502405.0,4943554.5,8500648.0
1.0,4.0,5344981.0,9263054.0,5348114.5,9261322.0,5351248.0,9259590.0,5354381.0,9257858.0,5357514.5,9256125.0,5360647.5,9254392.0,5363780.5,9252659.0,5366913.5,9250927.0,5370046.0,9249193.0,5373179.0,9247459.0
1.0,5.0,5015876.0,8691891.0,5018995.5,8690171.0,5022114.5,8688450.0,5025233.5,8686729.0,5028352.5,8685008.0,5031472.0,8683287.0,5034590.5,8681565.0,5037709.5,8679843.0,5040828.0,8678121.0,5043946.5,8676399.0
2.0,0.0,368326048.0,-198928608.0,368329248.0,-198930448.0,368332512.0,-198932336.0,368335840.0,-198934256.0,368339232.0,-198936192.0,368342624.0,-198938144.0,368346048.0,-198940112.0,368349504.0,-198942096.0,368352960.0,-198944096.0,368356448.0,-198946112.0
2.0,1.0,4717556.0,8165721.0,4720836.0,8163866.5,4724115.5,8162011.5,4727395.5,8160157.0,4730674.5,8158301.5,4733954.0,8156446.0,4737233.0,8154590.5,4740512.0,8152735.0,4743791.0,8150879.0,4747070.0,8149022.5
2.0,2.0,4624779.0,8005065.0,4628017.0,8003265.0,4631255.0,8001465.0,4634493.0,7999665.0,4637731.0,7997864.0,4640969.0,7996063.5,4644206.0,7994262.5,4647444.0,7992461.5,4650681.0,7990660.0,4653919.0,7988858.5
2.0,3.0,4918132.5,8514692.0,4921310.5,8512938.0,4924488.5,8511183.0,4927667.0,8509427.0,4930844.5,8507672.0,4934022.5,8505916.0,4937200.0,8504161.0,4940377.5,8502405.0,4943554.5,8500648.0,4946732.0,8498892.0


In [24]:
dis_df.shape

(479934, 20)

### Converting Mass to Pandas DataFrame

In [25]:
stacked_raw_mass = np.array(mass_list)
stacked_raw_mass

array([1125.,  857.,  315., 2448., 2139., 2989.], dtype=float32)

In [26]:
# Construct the mass dataframe from the raw simulator mass output data.
# Get the size of each dimension in numpy array.
# m-> the number of time steps in the simulation.
# n-> the number of bodies in the simulation.
# r-> number of dimensions in the vector holding the acceleration, displacement, etc.
mass_n = stacked_raw_mass.shape[0]
# Stack the Mass column with the indexing columns.  Mass column will be repeated by the number
# of time steps.
# np.column_stack() -> takes a sequence of 1D arrays and stacks them as columns in a 2D matrix.
# np.arange() provides evenly spaced values that repeat n times.  The new index.
# Using displacement dataframe dimensions to copy the masses enough.
mass_arr = np.column_stack(
     (np.repeat(np.arange(1, dis_m+1, 1),mass_n), 
     np.tile(np.arange(0,mass_n,1), dis_m),
     np.tile(stacked_raw_mass.reshape(mass_n,), dis_m))
)
# Create dataframe from stacked column array.
mass_df = pd.DataFrame(mass_arr)
# Delete intermediate numpy arrays and original data from memory.
del mass_arr
gc.collect()
# Use df.groupby() to group by time step or planent and create MultiIndex for easy data referencing.
mass_df = mass_df.groupby([0,1]).mean()
mass_df.index.names = ['time_step', 'body']
mass_df.columns = ['mass']
# Drop the time steps that could not be used for getting future data.
# Create list of time steps to drop.
beg_drop_index = max(mass_df.index.levels[0]) - num_ts_to_predict + 1
end_drop_index = max(mass_df.index.levels[0]) + 1
drop_list = list(range(int(beg_drop_index), int(end_drop_index)))
mass_df.drop(drop_list, axis=0, inplace=True)
mass_df.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass
time_step,body,Unnamed: 2_level_1
1.0,0.0,1125.0
1.0,1.0,857.0
1.0,2.0,315.0
1.0,3.0,2448.0
1.0,4.0,2139.0
1.0,5.0,2989.0
2.0,0.0,1125.0
2.0,1.0,857.0
2.0,2.0,315.0
2.0,3.0,2448.0


In [27]:
mass_df.shape

(479934, 1)

# Merge the Mass, Acceleration, Velocity, and Displacement Data

In [28]:
# Copy mass dataframe to start creating merged version.  Delete mass df afterwards.
merged_data = mass_df.copy(deep=True)
merged_data = pd.merge(merged_data, acc_df, left_index=True, right_index=True, how='outer')
# Delete mass_df and acc_df
del mass_df
del acc_df
gc.collect()
merged_data.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass,acc_x,acc_y
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1.0,0.0,1125.0,566.529541,-326.394348
1.0,1.0,857.0,0.005125,-0.002897
1.0,2.0,315.0,0.00506,-0.002811
1.0,3.0,2448.0,0.004966,-0.002741
1.0,4.0,2139.0,0.004897,-0.002705
1.0,5.0,2989.0,0.004874,-0.002688
2.0,0.0,1125.0,0.003221,-0.001778
2.0,1.0,857.0,0.005125,-0.002897
2.0,2.0,315.0,0.00506,-0.002812
2.0,3.0,2448.0,0.004966,-0.002741


In [29]:
# Merge in the velocity data.
merged_data = pd.merge(merged_data, vel_df, left_index=True, right_index=True, how='outer')
del vel_df
gc.collect()
merged_data.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass,acc_x,acc_y,vel_x,vel_y,vel_x_1,vel_y_1,vel_x_2,vel_y_2,vel_x_3,vel_y_3,vel_x_4,vel_y_4,vel_x_5,vel_y_5,vel_x_6,vel_y_6,vel_x_7,vel_y_7,vel_x_8,vel_y_8,vel_x_9,vel_y_9,vel_x_10,vel_y_10
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
1.0,0.0,1125.0,566.529541,-326.394348,460401.21875,-248657.1875,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125
1.0,1.0,857.0,0.005125,-0.002897,5888.745605,10211.786133,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633
1.0,2.0,315.0,0.00506,-0.002811,5772.87793,10010.830078,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195
1.0,3.0,2448.0,0.004966,-0.002741,6139.719727,10647.751953,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547
1.0,4.0,2139.0,0.004897,-0.002705,6677.309082,11580.982422,6681.226074,11578.817383,6685.143066,11576.652344,6689.060059,11574.487305,6692.976562,11572.322266,6696.893066,11570.15625,6700.80957,11567.990234,6704.725586,11565.824219,6708.641602,11563.658203,6712.557617,11561.491211,6716.473633,11559.324219
1.0,5.0,2989.0,0.004874,-0.002688,6265.945312,10867.014648,6269.844727,10864.864258,6273.744141,10862.713867,6277.643066,10860.5625,6281.541992,10858.411133,6285.440918,10856.259766,6289.339844,10854.108398,6293.238281,10851.956055,6297.136719,10849.803711,6301.035156,10847.651367,6304.933105,10845.499023
2.0,0.0,1125.0,0.003221,-0.001778,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125,460445.5625,-248682.640625
2.0,1.0,857.0,0.005125,-0.002897,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633,5933.837402,10186.27832
2.0,2.0,315.0,0.00506,-0.002812,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195,5817.398438,9986.073242
2.0,3.0,2448.0,0.004966,-0.002741,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547,6183.415039,10623.615234


In [30]:
# Merge in the displacement data
merged_data = pd.merge(merged_data, dis_df, left_index=True, right_index=True, how='outer')
del dis_df
gc.collect()
merged_data.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass,acc_x,acc_y,vel_x,vel_y,vel_x_1,vel_y_1,vel_x_2,vel_y_2,vel_x_3,vel_y_3,vel_x_4,vel_y_4,vel_x_5,vel_y_5,vel_x_6,vel_y_6,vel_x_7,vel_y_7,vel_x_8,vel_y_8,vel_x_9,vel_y_9,vel_x_10,vel_y_10,dis_x_1,dis_y_1,dis_x_2,dis_y_2,dis_x_3,dis_y_3,dis_x_4,dis_y_4,dis_x_5,dis_y_5,dis_x_6,dis_y_6,dis_x_7,dis_y_7,dis_x_8,dis_y_8,dis_x_9,dis_y_9,dis_x_10,dis_y_10
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
1.0,0.0,1125.0,566.529541,-326.394348,460401.21875,-248657.1875,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125,368323040.0,-198926880.0,368326048.0,-198928608.0,368329248.0,-198930448.0,368332512.0,-198932336.0,368335840.0,-198934256.0,368339232.0,-198936192.0,368342624.0,-198938144.0,368346048.0,-198940112.0,368349504.0,-198942096.0,368352960.0,-198944096.0
1.0,1.0,857.0,0.005125,-0.002897,5888.745605,10211.786133,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633,4714276.5,8167575.0,4717556.0,8165721.0,4720836.0,8163866.5,4724115.5,8162011.5,4727395.5,8160157.0,4730674.5,8158301.5,4733954.0,8156446.0,4737233.0,8154590.5,4740512.0,8152735.0,4743791.0,8150879.0
1.0,2.0,315.0,0.00506,-0.002811,5772.87793,10010.830078,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195,4621540.5,8006865.0,4624779.0,8005065.0,4628017.0,8003265.0,4631255.0,8001465.0,4634493.0,7999665.0,4637731.0,7997864.0,4640969.0,7996063.5,4644206.0,7994262.5,4647444.0,7992461.5,4650681.0,7990660.0
1.0,3.0,2448.0,0.004966,-0.002741,6139.719727,10647.751953,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547,4914954.5,8516447.0,4918132.5,8514692.0,4921310.5,8512938.0,4924488.5,8511183.0,4927667.0,8509427.0,4930844.5,8507672.0,4934022.5,8505916.0,4937200.0,8504161.0,4940377.5,8502405.0,4943554.5,8500648.0
1.0,4.0,2139.0,0.004897,-0.002705,6677.309082,11580.982422,6681.226074,11578.817383,6685.143066,11576.652344,6689.060059,11574.487305,6692.976562,11572.322266,6696.893066,11570.15625,6700.80957,11567.990234,6704.725586,11565.824219,6708.641602,11563.658203,6712.557617,11561.491211,6716.473633,11559.324219,5344981.0,9263054.0,5348114.5,9261322.0,5351248.0,9259590.0,5354381.0,9257858.0,5357514.5,9256125.0,5360647.5,9254392.0,5363780.5,9252659.0,5366913.5,9250927.0,5370046.0,9249193.0,5373179.0,9247459.0
1.0,5.0,2989.0,0.004874,-0.002688,6265.945312,10867.014648,6269.844727,10864.864258,6273.744141,10862.713867,6277.643066,10860.5625,6281.541992,10858.411133,6285.440918,10856.259766,6289.339844,10854.108398,6293.238281,10851.956055,6297.136719,10849.803711,6301.035156,10847.651367,6304.933105,10845.499023,5015876.0,8691891.0,5018995.5,8690171.0,5022114.5,8688450.0,5025233.5,8686729.0,5028352.5,8685008.0,5031472.0,8683287.0,5034590.5,8681565.0,5037709.5,8679843.0,5040828.0,8678121.0,5043946.5,8676399.0
2.0,0.0,1125.0,0.003221,-0.001778,460403.78125,-248658.609375,460407.5625,-248660.765625,460411.5625,-248663.0625,460415.65625,-248665.421875,460419.8125,-248667.8125,460424.03125,-248670.234375,460428.28125,-248672.671875,460432.5625,-248675.140625,460436.875,-248677.625,460441.21875,-248680.125,460445.5625,-248682.640625,368326048.0,-198928608.0,368329248.0,-198930448.0,368332512.0,-198932336.0,368335840.0,-198934256.0,368339232.0,-198936192.0,368342624.0,-198938144.0,368346048.0,-198940112.0,368349504.0,-198942096.0,368352960.0,-198944096.0,368356448.0,-198946112.0
2.0,1.0,857.0,0.005125,-0.002897,5892.845703,10209.46875,5896.945312,10207.151367,5901.044922,10204.833008,5905.144531,10202.514648,5909.244141,10200.196289,5913.343262,10197.876953,5917.442383,10195.557617,5921.541504,10193.238281,5925.640137,10190.918945,5929.73877,10188.598633,5933.837402,10186.27832,4717556.0,8165721.0,4720836.0,8163866.5,4724115.5,8162011.5,4727395.5,8160157.0,4730674.5,8158301.5,4733954.0,8156446.0,4737233.0,8154590.5,4740512.0,8152735.0,4743791.0,8150879.0,4747070.0,8149022.5
2.0,2.0,315.0,0.00506,-0.002812,5776.925781,10008.581055,5780.973633,10006.331055,5785.021484,10004.081055,5789.068848,10001.831055,5793.116211,9999.581055,5797.163574,9997.330078,5801.210938,9995.079102,5805.257812,9992.828125,5809.304688,9990.577148,5813.351562,9988.325195,5817.398438,9986.073242,4624779.0,8005065.0,4628017.0,8003265.0,4631255.0,8001465.0,4634493.0,7999665.0,4637731.0,7997864.0,4640969.0,7996063.5,4644206.0,7994262.5,4647444.0,7992461.5,4650681.0,7990660.0,4653919.0,7988858.5
2.0,3.0,2448.0,0.004966,-0.002741,6143.692871,10645.558594,6147.665527,10643.365234,6151.638184,10641.171875,6155.61084,10638.978516,6159.583496,10636.78418,6163.555664,10634.589844,6167.527832,10632.395508,6171.5,10630.201172,6175.47168,10628.005859,6179.443359,10625.810547,6183.415039,10623.615234,4918132.5,8514692.0,4921310.5,8512938.0,4924488.5,8511183.0,4927667.0,8509427.0,4930844.5,8507672.0,4934022.5,8505916.0,4937200.0,8504161.0,4940377.5,8502405.0,4943554.5,8500648.0,4946732.0,8498892.0


In [31]:
# Rearrange columns so they are in the needed order.
# Create list of what the column order should be.
cols = []
cols.extend(['mass', 'acc_x', 'acc_y', 'vel_x', 'vel_y'])
# Loop over all the time steps we wanted to predict and rearrange the columns
# accordingly
for i in range(1,num_ts_to_predict+1):
    cols.append('dis_x_' + str(i))
    cols.append('dis_y_' + str(i))
    cols.append('vel_x_' + str(i))
    cols.append('vel_y_' + str(i))
# Rearrange columns using the create columns list.
merged_data = merged_data[cols]
merged_data.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass,acc_x,acc_y,vel_x,vel_y,dis_x_1,dis_y_1,vel_x_1,vel_y_1,dis_x_2,dis_y_2,vel_x_2,vel_y_2,dis_x_3,dis_y_3,vel_x_3,vel_y_3,dis_x_4,dis_y_4,vel_x_4,vel_y_4,dis_x_5,dis_y_5,vel_x_5,vel_y_5,dis_x_6,dis_y_6,vel_x_6,vel_y_6,dis_x_7,dis_y_7,vel_x_7,vel_y_7,dis_x_8,dis_y_8,vel_x_8,vel_y_8,dis_x_9,dis_y_9,vel_x_9,vel_y_9,dis_x_10,dis_y_10,vel_x_10,vel_y_10
time_step,body,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
1.0,0.0,1125.0,566.529541,-326.394348,460401.21875,-248657.1875,368323040.0,-198926880.0,460403.78125,-248658.609375,368326048.0,-198928608.0,460407.5625,-248660.765625,368329248.0,-198930448.0,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125
1.0,1.0,857.0,0.005125,-0.002897,5888.745605,10211.786133,4714276.5,8167575.0,5892.845703,10209.46875,4717556.0,8165721.0,5896.945312,10207.151367,4720836.0,8163866.5,5901.044922,10204.833008,4724115.5,8162011.5,5905.144531,10202.514648,4727395.5,8160157.0,5909.244141,10200.196289,4730674.5,8158301.5,5913.343262,10197.876953,4733954.0,8156446.0,5917.442383,10195.557617,4737233.0,8154590.5,5921.541504,10193.238281,4740512.0,8152735.0,5925.640137,10190.918945,4743791.0,8150879.0,5929.73877,10188.598633
1.0,2.0,315.0,0.00506,-0.002811,5772.87793,10010.830078,4621540.5,8006865.0,5776.925781,10008.581055,4624779.0,8005065.0,5780.973633,10006.331055,4628017.0,8003265.0,5785.021484,10004.081055,4631255.0,8001465.0,5789.068848,10001.831055,4634493.0,7999665.0,5793.116211,9999.581055,4637731.0,7997864.0,5797.163574,9997.330078,4640969.0,7996063.5,5801.210938,9995.079102,4644206.0,7994262.5,5805.257812,9992.828125,4647444.0,7992461.5,5809.304688,9990.577148,4650681.0,7990660.0,5813.351562,9988.325195
1.0,3.0,2448.0,0.004966,-0.002741,6139.719727,10647.751953,4914954.5,8516447.0,6143.692871,10645.558594,4918132.5,8514692.0,6147.665527,10643.365234,4921310.5,8512938.0,6151.638184,10641.171875,4924488.5,8511183.0,6155.61084,10638.978516,4927667.0,8509427.0,6159.583496,10636.78418,4930844.5,8507672.0,6163.555664,10634.589844,4934022.5,8505916.0,6167.527832,10632.395508,4937200.0,8504161.0,6171.5,10630.201172,4940377.5,8502405.0,6175.47168,10628.005859,4943554.5,8500648.0,6179.443359,10625.810547
1.0,4.0,2139.0,0.004897,-0.002705,6677.309082,11580.982422,5344981.0,9263054.0,6681.226074,11578.817383,5348114.5,9261322.0,6685.143066,11576.652344,5351248.0,9259590.0,6689.060059,11574.487305,5354381.0,9257858.0,6692.976562,11572.322266,5357514.5,9256125.0,6696.893066,11570.15625,5360647.5,9254392.0,6700.80957,11567.990234,5363780.5,9252659.0,6704.725586,11565.824219,5366913.5,9250927.0,6708.641602,11563.658203,5370046.0,9249193.0,6712.557617,11561.491211,5373179.0,9247459.0,6716.473633,11559.324219
1.0,5.0,2989.0,0.004874,-0.002688,6265.945312,10867.014648,5015876.0,8691891.0,6269.844727,10864.864258,5018995.5,8690171.0,6273.744141,10862.713867,5022114.5,8688450.0,6277.643066,10860.5625,5025233.5,8686729.0,6281.541992,10858.411133,5028352.5,8685008.0,6285.440918,10856.259766,5031472.0,8683287.0,6289.339844,10854.108398,5034590.5,8681565.0,6293.238281,10851.956055,5037709.5,8679843.0,6297.136719,10849.803711,5040828.0,8678121.0,6301.035156,10847.651367,5043946.5,8676399.0,6304.933105,10845.499023
2.0,0.0,1125.0,0.003221,-0.001778,460403.78125,-248658.609375,368326048.0,-198928608.0,460407.5625,-248660.765625,368329248.0,-198930448.0,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625
2.0,1.0,857.0,0.005125,-0.002897,5892.845703,10209.46875,4717556.0,8165721.0,5896.945312,10207.151367,4720836.0,8163866.5,5901.044922,10204.833008,4724115.5,8162011.5,5905.144531,10202.514648,4727395.5,8160157.0,5909.244141,10200.196289,4730674.5,8158301.5,5913.343262,10197.876953,4733954.0,8156446.0,5917.442383,10195.557617,4737233.0,8154590.5,5921.541504,10193.238281,4740512.0,8152735.0,5925.640137,10190.918945,4743791.0,8150879.0,5929.73877,10188.598633,4747070.0,8149022.5,5933.837402,10186.27832
2.0,2.0,315.0,0.00506,-0.002812,5776.925781,10008.581055,4624779.0,8005065.0,5780.973633,10006.331055,4628017.0,8003265.0,5785.021484,10004.081055,4631255.0,8001465.0,5789.068848,10001.831055,4634493.0,7999665.0,5793.116211,9999.581055,4637731.0,7997864.0,5797.163574,9997.330078,4640969.0,7996063.5,5801.210938,9995.079102,4644206.0,7994262.5,5805.257812,9992.828125,4647444.0,7992461.5,5809.304688,9990.577148,4650681.0,7990660.0,5813.351562,9988.325195,4653919.0,7988858.5,5817.398438,9986.073242
2.0,3.0,2448.0,0.004966,-0.002741,6143.692871,10645.558594,4918132.5,8514692.0,6147.665527,10643.365234,4921310.5,8512938.0,6151.638184,10641.171875,4924488.5,8511183.0,6155.61084,10638.978516,4927667.0,8509427.0,6159.583496,10636.78418,4930844.5,8507672.0,6163.555664,10634.589844,4934022.5,8505916.0,6167.527832,10632.395508,4937200.0,8504161.0,6171.5,10630.201172,4940377.5,8502405.0,6175.47168,10628.005859,4943554.5,8500648.0,6179.443359,10625.810547,4946732.0,8498892.0,6183.415039,10623.615234


# Create Body Time Series Data Format

In [32]:
merged_data_time_series = merged_data.copy(deep=True)
merged_data_time_series = merged_data_time_series.swaplevel('time_step', 'body').sort_index(level=0)
merged_data_time_series.head(15)

Unnamed: 0_level_0,Unnamed: 1_level_0,mass,acc_x,acc_y,vel_x,vel_y,dis_x_1,dis_y_1,vel_x_1,vel_y_1,dis_x_2,dis_y_2,vel_x_2,vel_y_2,dis_x_3,dis_y_3,vel_x_3,vel_y_3,dis_x_4,dis_y_4,vel_x_4,vel_y_4,dis_x_5,dis_y_5,vel_x_5,vel_y_5,dis_x_6,dis_y_6,vel_x_6,vel_y_6,dis_x_7,dis_y_7,vel_x_7,vel_y_7,dis_x_8,dis_y_8,vel_x_8,vel_y_8,dis_x_9,dis_y_9,vel_x_9,vel_y_9,dis_x_10,dis_y_10,vel_x_10,vel_y_10
body,time_step,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1
0.0,1.0,1125.0,566.529541,-326.394348,460401.21875,-248657.1875,368323040.0,-198926880.0,460403.78125,-248658.609375,368326048.0,-198928608.0,460407.5625,-248660.765625,368329248.0,-198930448.0,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125
0.0,2.0,1125.0,0.003221,-0.001778,460403.78125,-248658.609375,368326048.0,-198928608.0,460407.5625,-248660.765625,368329248.0,-198930448.0,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625
0.0,3.0,1125.0,0.004708,-0.002694,460407.5625,-248660.765625,368329248.0,-198930448.0,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875
0.0,4.0,1125.0,0.005006,-0.002877,460411.5625,-248663.0625,368332512.0,-198932336.0,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875
0.0,5.0,1125.0,0.00513,-0.002953,460415.65625,-248665.421875,368335840.0,-198934256.0,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125
0.0,6.0,1125.0,0.005203,-0.002997,460419.8125,-248667.8125,368339232.0,-198936192.0,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125,368370592.0,-198954288.0,460463.25,-248692.859375
0.0,7.0,1125.0,0.005257,-0.00303,460424.03125,-248670.234375,368342624.0,-198938144.0,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125,368370592.0,-198954288.0,460463.25,-248692.859375,368374208.0,-198956368.0,460467.75,-248695.453125
0.0,8.0,1125.0,0.005301,-0.003056,460428.28125,-248672.671875,368346048.0,-198940112.0,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125,368370592.0,-198954288.0,460463.25,-248692.859375,368374208.0,-198956368.0,460467.75,-248695.453125,368377824.0,-198958448.0,460472.28125,-248698.0625
0.0,9.0,1125.0,0.005341,-0.00308,460432.5625,-248675.140625,368349504.0,-198942096.0,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125,368370592.0,-198954288.0,460463.25,-248692.859375,368374208.0,-198956368.0,460467.75,-248695.453125,368377824.0,-198958448.0,460472.28125,-248698.0625,368381440.0,-198960544.0,460476.8125,-248700.6875
0.0,10.0,1125.0,0.005378,-0.003102,460436.875,-248677.625,368352960.0,-198944096.0,460441.21875,-248680.125,368356448.0,-198946112.0,460445.5625,-248682.640625,368359936.0,-198948144.0,460449.9375,-248685.171875,368363488.0,-198950176.0,460454.34375,-248687.71875,368367040.0,-198952224.0,460458.78125,-248690.28125,368370592.0,-198954288.0,460463.25,-248692.859375,368374208.0,-198956368.0,460467.75,-248695.453125,368377824.0,-198958448.0,460472.28125,-248698.0625,368381440.0,-198960544.0,460476.8125,-248700.6875,368385088.0,-198962656.0,460481.375,-248703.328125


# Attempt Converting Merged Datasets to Numpy Arrays and Save as Both Pd dataframes and Np Arrays

In [33]:
merged_data.to_numpy().shape

(479934, 45)

In [34]:
dim0 = len(merged_data.index.get_level_values(0).unique())
dim1 = len(merged_data.index.get_level_values(1).unique())
dim2 = merged_data.shape[1]
merged_data_ndarray = merged_data.to_numpy().reshape((dim0, dim1, dim2))
merged_data_ndarray[0,0]

array([ 1.12500000e+03,  5.66529541e+02, -3.26394348e+02,  4.60401219e+05,
       -2.48657188e+05,  3.68323040e+08, -1.98926880e+08,  4.60403781e+05,
       -2.48658609e+05,  3.68326048e+08, -1.98928608e+08,  4.60407562e+05,
       -2.48660766e+05,  3.68329248e+08, -1.98930448e+08,  4.60411562e+05,
       -2.48663062e+05,  3.68332512e+08, -1.98932336e+08,  4.60415656e+05,
       -2.48665422e+05,  3.68335840e+08, -1.98934256e+08,  4.60419812e+05,
       -2.48667812e+05,  3.68339232e+08, -1.98936192e+08,  4.60424031e+05,
       -2.48670234e+05,  3.68342624e+08, -1.98938144e+08,  4.60428281e+05,
       -2.48672672e+05,  3.68346048e+08, -1.98940112e+08,  4.60432562e+05,
       -2.48675141e+05,  3.68349504e+08, -1.98942096e+08,  4.60436875e+05,
       -2.48677625e+05,  3.68352960e+08, -1.98944096e+08,  4.60441219e+05,
       -2.48680125e+05])

In [35]:
dim0 = len(merged_data_time_series.index.get_level_values(0).unique())
dim1 = len(merged_data_time_series.index.get_level_values(1).unique())
dim2 = merged_data.shape[1]
merged_data_time_series_ndarray = merged_data_time_series.to_numpy().reshape((dim0, dim1, dim2))
merged_data_time_series_ndarray[0,5]

array([ 1.12500000e+03,  5.20316511e-03, -2.99724448e-03,  4.60419812e+05,
       -2.48667812e+05,  3.68339232e+08, -1.98936192e+08,  4.60424031e+05,
       -2.48670234e+05,  3.68342624e+08, -1.98938144e+08,  4.60428281e+05,
       -2.48672672e+05,  3.68346048e+08, -1.98940112e+08,  4.60432562e+05,
       -2.48675141e+05,  3.68349504e+08, -1.98942096e+08,  4.60436875e+05,
       -2.48677625e+05,  3.68352960e+08, -1.98944096e+08,  4.60441219e+05,
       -2.48680125e+05,  3.68356448e+08, -1.98946112e+08,  4.60445562e+05,
       -2.48682641e+05,  3.68359936e+08, -1.98948144e+08,  4.60449938e+05,
       -2.48685172e+05,  3.68363488e+08, -1.98950176e+08,  4.60454344e+05,
       -2.48687719e+05,  3.68367040e+08, -1.98952224e+08,  4.60458781e+05,
       -2.48690281e+05,  3.68370592e+08, -1.98954288e+08,  4.60463250e+05,
       -2.48692859e+05])

## Save the Numpy Arrays and Pandas Dataframes

In [36]:
# Set the output directory.
out_dir = 'output/'

Save the dataframes by pickling them.

In [37]:
# Save using 'default' as the key.
# https://pythontic.com/pandas/serialization/hdf5
merged_data.to_hdf(out_dir + 'sim_data_df-ts-body.hdf5', 'default')
merged_data_time_series.to_hdf(out_dir + 'sim_data_df-body-ts.hdf5', 'default')

# merged_data.to_pickle(out_dir + 'sim_data_df-ts-body.pkl')
# merged_data_time_series.to_pickle(out_dir + 'sim_data_df-body-ts.pkl')

Save the dataframes to XLSX files to view in Excel. \
Some larger datasets can't be saved to Excel.  Takes forever to parse the file.

In [38]:
# merged_data.to_excel(out_dir + 'sim_data_df-ts-body.xlsx')
# merged_data_time_series.to_excel(out_dir + 'sim_data_df-body-ts.xlsx')

Save the numpy arrays by using numpy's saving function.

In [39]:
np.save(out_dir + 'sim_data_np-ts-body.npy', merged_data_ndarray)
np.save(out_dir + 'sim_data_np-body-ts.npy', merged_data_time_series_ndarray)