# Process Data

In [None]:
import numpy as np
import pandas as pd
import struct
import array
import plotly.express as px
import plotly.graph_objects as go
import math
import time

In [None]:
import onnx

In [None]:
import onnxruntime

In [None]:
pd.options.display.max_columns = None

In [None]:
NUM_PHASE_CHANNEL = 5

In [None]:
def ReadBinary(binaryFile, sampleCount, featureCount):
    bytesPerLine = featureCount*4
    data = []
    with open(binaryFile, "rb") as f:
        for i in np.arange(sampleCount):
            if i % max(int(sampleCount / 10),1) == 0:
                print('Reading binary ' + binaryFile + '...', round(100 * i / sampleCount, 2), "%", end="\r")
            f.seek(i*bytesPerLine)
            bytes = f.read(bytesPerLine)
            floats = struct.unpack('f' * (len(bytes) // struct.calcsize('f')), bytes)

            data.append(np.float32(array.array('f', floats)))
    print('Reading binary ' + binaryFile + '...', 100, "%", end="\r")
    print("")
    return np.concatenate(data).reshape(sampleCount, -1)

In [None]:
def read_csv_style_data(file_path, delimiter=' ', dtype='float32'):
    """
    Read CSV-style data from a file into a NumPy array.

    Parameters:
    - file_path: str, path to the file containing the data.
    - delimiter: str, the delimiter used in the data (default is space).

    Returns:
    - data_array: np.ndarray, the NumPy array containing the data.
    """
    try:
        data_array = np.genfromtxt(file_path, delimiter=delimiter,dtype=None, encoding=None)
        return data_array
    except Exception as e:
        print(f"Error reading data from {file_path}: {e}")
        return None


In [None]:
def read_csv_data(file_path, delimiter=' '):
    """
    Read CSV data from a file into a pandas DataFrame.

    Parameters:
    - file_path: str, path to the file containing the data.
    - delimiter: str, the delimiter used in the data (default is space).

    Returns:
    - data_frame: pd.DataFrame, the pandas DataFrame containing the data.
    """
    try:
        data_frame = pd.read_csv(file_path, delimiter=delimiter, header=None)
        return data_frame
    except Exception as e:
        print(f"Error reading data from {file_path}: {e}")
        return None

In [None]:
#calc phase 2d
def calc_2d_phase(phases, amplitudes):

    phases *= 2.0 * np.pi
    sin_values = np.sin(phases)
    cos_values = np.cos(phases)
    phase_2d = np.column_stack((sin_values, cos_values))
    phase_2d *= amplitudes[:, np.newaxis]
    return phase_2d

def calc_stuff(row):
    pv = row[['PhaseValue1', 'PhaseValue2', 'PhaseValue3', 'PhaseValue4', 'PhaseValue5']].to_numpy().astype(float)
    amps = row[['PhaseAmp1', 'PhaseAmp2', 'PhaseAmp3', 'PhaseAmp4', 'PhaseAmp5']].to_numpy().astype(float)
    p_2d = calc_2d_phase(pv, amps)
    return p_2d
    
    
#calc update delta phase

In [None]:
IN_FEATURES = 364
NUM_SAMPLES = 75266

In [None]:
data =ReadBinary(r"C:\DEV\DATASETS\100STYLE_Preprocessed\data_x.bin", 75266, 364)
print(data.dtype)
data.shape

In [None]:
#Check for nan in input data (generated by animhost)
nan_indices = np.where(np.isnan(data[-1]))
if len(nan_indices[0]) > 0:
    print("Array contains NaN values at indices:")
    for i in range(len(nan_indices[0])):
        print(f"  Row: {nan_indices[0][i]}")
else:
    print("Array does not contain NaN values.")

In [None]:
phaseData = read_csv_style_data(r"C:\DEV\AI4Animation\AI4Animation\SIGGRAPH_2022\PyTorch\PAE\Training\Parameters_10.txt")

In [None]:
sequence = read_csv_data(r"C:\DEV\AI4Animation\AI4Animation\SIGGRAPH_2022\PyTorch\PAE\Dataset\Sequences.txt")

In [None]:
sequence.columns = ["SeqId","Frame","Type", "File","SeqUUID"]
sequence

In [None]:
header = []
[header.append(f"PhaseValue{i+1}") for i in range(NUM_PHASE_CHANNEL)]
[header.append(f"PhaseFreq{i+1}") for i in range(NUM_PHASE_CHANNEL)]
[header.append(f"PhaseAmp{i+1}") for i in range(NUM_PHASE_CHANNEL)]
[header.append(f"PhaseOff{i+1}") for i in range(NUM_PHASE_CHANNEL)]

phaseData = pd.DataFrame(phaseData, columns=header)
phaseData = pd.concat([sequence, phaseData], axis=1)

In [None]:
df_phase_values = np.vstack(phaseData.apply(lambda row: calc_stuff(row), axis=1).to_numpy())
print(df_phase_values.shape)
df_phase_values = np.reshape(df_phase_values, (phaseData.shape[0], NUM_PHASE_CHANNEL * 2))
print(df_phase_values.shape)

In [None]:
columns = [f"Phase2D_X_{i+1}" for i in range(NUM_PHASE_CHANNEL)] + [f"Phase2D_Y_{i+1}" for i in range(NUM_PHASE_CHANNEL)]

# Create a flattened list with X followed by Y
flattened_columns = [column for pair in zip(columns[:NUM_PHASE_CHANNEL], columns[NUM_PHASE_CHANNEL:]) for column in pair]
flattened_columns

In [None]:
df2DPhaseValues = pd.DataFrame(df_phase_values, columns=flattened_columns)
df2DPhaseValues

In [None]:
dfPhaseData = pd.concat([phaseData, df2DPhaseValues], axis=1)
dfPhaseData

In [None]:
#Read InputData
labels =read_csv_data(r"C:\DEV\DATASETS\100STYLE_Preprocessed\metadata.txt",",")
row = labels.iloc[0]
start= 1
end= 365
label_input = []
[label_input.append(row.iloc[i]) for i in range(start,end)]
dfInputData = pd.DataFrame(data, columns=label_input)

seqInputData = read_csv_data(r"C:\DEV\DATASETS\100STYLE_Preprocessed\sequences_mann.txt")
seqInputData.columns = ["SeqId","Frame","Type", "File","SeqUUID"]
dfInputData = pd.concat([seqInputData, dfInputData], axis=1)

In [None]:
dfInputData.columns

In [None]:
def get_window_values(row, phaseValues, selected_columns, window_size=1):
    seq_id,frame  = row.name
    start_frame = max(0, frame - window_size)
    end_frame = frame + window_size 

    window = phaseValues.loc[(seq_id,start_frame):(seq_id,end_frame)]
    if frame % 100 == 0:
        print(f"Progress: {seq_id}/{frame}", end='\r')
    return window[selected_columns].values.flatten()

In [None]:
dfPhaseData.set_index(['SeqId', 'Frame'], inplace=True)
dfInputData.set_index(['SeqId', 'Frame'], inplace=True)

In [None]:
selected_columns = [col for col in dfPhaseData.columns if "Phase2D_" in col]
out = dfInputData.apply(get_window_values, args=(dfPhaseData, selected_columns, 6), axis=1)


In [None]:
out.values[1].shape

In [None]:
dfInputDataMrg = pd.merge(dfInputData, out.to_frame(), on=['SeqId','Frame'],how='inner')
dfInputDataMrg.rename(columns={0: 'PhaseSpace'}, inplace=True)
df_expanded = dfInputDataMrg['PhaseSpace'].apply(pd.Series)
df_expanded.columns = [f"PhaseSpace-{i+1}" for i in range(out.values[0].shape[0])]

In [None]:
df_expanded

In [None]:
dfInputDataExp = pd.concat([dfInputDataMrg, df_expanded], axis=1)
dfInputDataExp.drop('PhaseSpace', axis=1, inplace=True)

In [None]:
dfInputDataExp

In [None]:
fig = go.Figure()

# Extract columns that start with 'PhaseSpace-'
phasespace_columns = [col for col in dfInputDataExp.columns if col.startswith('PhaseSpace-')]
# Filter even and odd indices based on numerical part of column names
even_indices = [col for col in phasespace_columns if int(col.split('-')[-1]) % 2 == 0]
odd_indices = [col for col in phasespace_columns if int(col.split('-')[-1]) % 2 != 0]

for step in range(30):
    row = dfInputDataExp.iloc[step]
    
    # Extract values based on even and odd indices
    p_y = row[even_indices].values.flatten()
    p_x = row[odd_indices].values.flatten()

    fig.add_trace(
            go.Scatter(
                visible=False,
                mode='markers',
                name="𝜈 = " + str(step),
                x=p_x,
                y=p_y))
    
fig.data[0].visible = True

steps = []
for i in range(len(fig.data)):
    step = dict(
        method="update",
        args=[{"visible": [False] * len(fig.data)},
              {"title": "Step: " + str(i)}],  # layout attribute
    )
    step["args"][0]["visible"][i] = True  # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=10,
    steps=steps
)]

fig.update_layout(
    sliders=sliders
)
fig.update_scenes(aspectmode='cube',aspectratio=dict(x=1, y=1))

fig.update_layout(
    title = "2D Phase Vector (12 Frame Window)",
    xaxis=dict(range=[-5, 5]), 
    yaxis=dict(range=[-5, 5]),  
    autosize=False,
    width=500,
    height=600,
    margin=dict(
        l=50,
        r=50,
        b=100,
        t=100,
        pad=4
    ),
)

fig.show()

In [None]:
fig.write_html("file_phase.html", include_plotlyjs=False)

### Build Output Features

In [None]:
OUT_FEATURES = 319
NUM_SAMPLES = 75266

In [None]:
data_out =ReadBinary(r"C:\DEV\DATASETS\100STYLE_Preprocessed\data_y.bin", 75266, 319)

In [None]:
labels =read_csv_data(r"C:\DEV\DATASETS\100STYLE_Preprocessed\metadata.txt",",")
row = labels.iloc[1]

In [None]:
labels =read_csv_data(r"C:\DEV\DATASETS\100STYLE_Preprocessed\metadata.txt",",")
row = labels.iloc[1]
start= 1
label_output = []
[label_output.append(row.iloc[i]) for i in range(start,OUT_FEATURES +1)]
dfOutputData = pd.DataFrame(data_out, columns=label_output)

seqOutData = read_csv_data(r"C:\DEV\DATASETS\100STYLE_Preprocessed\sequences_mann.txt")
seqOutData.columns = ["SeqId","Frame","Type", "File","SeqUUID"]
dfOutputData = pd.concat([seqOutData, dfOutputData], axis=1)

In [None]:
dfOutputData

In [None]:
pd.options.display.max_columns = None

In [None]:
dfPhaseData.set_index(['SeqId', 'Frame'], inplace=True)
dfOutputData.set_index(['SeqId', 'Frame'], inplace=True)

In [None]:
select_phase2d = [col for col in dfPhaseData.columns if "Phase2D_" in col]
select_amplitude = [col for col in dfPhaseData.columns if "PhaseAmp" in col]
select_freq = [col for col in dfPhaseData.columns if "PhaseFreq" in col]
select_combined = select_phase2d + select_amplitude + select_freq

In [None]:
def get_window_values_future(row, phaseValues, selected_columns, window_size=6):
    seq_id,frame  = row.name
    start_frame = frame+1
    end_frame = frame + window_size

    window = phaseValues.loc[(seq_id,start_frame):(seq_id,end_frame)]
    if frame % 100 == 0:
        print(f"Progress: {seq_id}/{frame}", end='\r')
    return window[selected_columns].values.flatten()

In [None]:
out = dfInputData.apply(get_window_values_future, args=(dfPhaseData, select_combined, 6), axis=1)
out.values[1].shape

In [None]:
dfOutDataMrg = pd.merge(dfOutputData, out.to_frame(), on=['SeqId','Frame'],how='inner')
dfOutDataMrg.rename(columns={0: 'PhaseUpdate'}, inplace=True)
dfOutPhaseUpdate = dfOutDataMrg['PhaseUpdate'].apply(pd.Series)
dfOutPhaseUpdate.columns = [f"PhaseUpdate-{i+1}" for i in range(out.values[0].shape[0])]

In [None]:
dfOutPhaseUpdate

In [None]:
dfOutDataExp = pd.concat([dfOutDataMrg, dfOutPhaseUpdate], axis=1)
dfOutDataExp.drop('PhaseUpdate', axis=1, inplace=True)

In [None]:

dfOutDataExp

## Export Data

In [None]:
IN = dfInputDataExp.drop(["Type", "File","SeqUUID"], axis=1)
folder = "../data/"

# Convert DataFrame to a flat float array
flat_in = array.array('d', IN.to_numpy(dtype=np.float32).flatten())

IN_mn =  IN.mean().to_numpy(dtype=np.float32).flatten()
IN_std =  IN.std().replace(0, 1).to_numpy(dtype=np.float32).flatten()

# Save the array to a binary file
with open(folder +'Input.bin', 'wb') as file:
    s = struct.pack('f'*len(flat_in), *flat_in)
    file.write(s)

with open(folder +'InputLabels.txt', 'w') as file:
    for idx, col in enumerate(IN.columns):
        file.write(f"[{idx}] {col}\n")

with open(folder +'InputShape.txt', 'w') as file:
    file.write(f"{len(IN)}\n{len(IN.columns)}")

with open(folder +'InputNormalization.txt', 'w') as file:
    file.write(" ".join(map(str, IN_mn)) + "\n")
    file.write(" ".join(map(str, IN_std)) + "\n")


In [None]:
OUT = dfOutDataExp.drop(["SeqId","Frame","Type", "File","SeqUUID"], axis=1)

flat_out = array.array('d', OUT.to_numpy(dtype=np.float32).flatten())

OUT_mn =  OUT.mean().to_numpy(dtype=np.float32).flatten()
OUT_std =  OUT.std().replace(0, 1).to_numpy(dtype=np.float32).flatten()

# Save the array to a binary file
with open(folder +'Output.bin', 'wb') as file:
    s = struct.pack('f'*len(flat_out), *flat_out)
    file.write(s)

with open(folder +'OutputLabels.txt', 'w') as file:
    for idx, col in enumerate(OUT.columns):
        file.write(f"[{idx}] {col}\n")

with open(folder +'OutputShape.txt', 'w') as file:
    file.write(f"{len(OUT)}\n{len(OUT.columns)}")

with open(folder +'OutputNormalization.txt', 'w') as file:
    file.write(" ".join(map(str, OUT_mn)) + "\n")
    file.write(" ".join(map(str, OUT_std)) + "\n")


In [None]:
mn =  IN.mean()
std =  IN.std().replace(0, 1)
print(mn)
with open('fabulous_stats.txt', 'w') as file:
    file.write(" ".join(map(str, mn)) + "\n")
    file.write(" ".join(map(str, std)) + "\n")

In [None]:
IN

In [None]:
IN.style.set_table_styles([
    {'selector': 'div',
     'props': [('max-width', '800px'), ('overflow-x', 'auto')]}
])
IN.iloc[:3]

In [None]:
IN.iloc[:3]

## Inference

In [None]:
model_path = r"C:\DEV\AI4Animation\AI4Animation\SIGGRAPH_2022\PyTorch\GNN\Training\144.onnx"

In [None]:
#onnx setup
session = onnxruntime.InferenceSession(model_path)
inputs = session.get_inputs()
outputs = session.get_outputs()

In [None]:
[print(f"{n.name},{n.shape}") for n in inputs]
[print(n.name) for n in outputs]
"ok"

In [None]:
xin = IN.iloc[17000].to_numpy(dtype=np.float32).reshape(1,-1)
xin.shape

In [None]:
# Time before code execution
start_time = time.time()

# Your fabulous code goes here
result = session.run(["Y","W"], {"X" : xin})

# Time after code execution
end_time = time.time()
delta_time = end_time - start_time
print(f"Code execution took {delta_time:.4f} seconds. Werk it, queen!")

In [None]:
yout = result[0]
yout.shape

In [None]:
dfOutDataExp.columns[5:]

In [None]:
dfInfRes = pd.DataFrame(yout, columns=dfOutDataExp.columns[5:])

In [None]:
dfInfRes

In [None]:
fig = px.scatter(dfInfRes, x="delta_x", y="delta_y")

for i in range(6,12):
    fig.add_scatter(x=dfInfRes[f"out_root_pos_x_{i}"], y=dfInfRes[f"out_root_pos_y_{i}"], mode="markers", marker_symbol="x")

fig.update_traces(marker=dict(size=12,
                              opacity=0.5,
                              line=dict(width=2,
                                        color='DarkSlateGrey')),
                  selector=dict(mode='markers'))
fig.update_layout(
    width = 800,
    height = 800,
    title = "fixed-ratio axes"
)
fig.update_yaxes(
    scaleanchor = "x",
    scaleratio = 1,
  )

fig.show()

In [None]:
x_columns = dfInfRes.filter(like="out_jpos_x_").values
y_columns = dfInfRes.filter(like="out_jpos_y_").values
z_columns = dfInfRes.filter(like="out_jpos_z_").values

# Flatten the arrays if needed
x_array = x_columns.flatten()
y_array = y_columns.flatten()
z_array = z_columns.flatten()

In [None]:
inx = IN.iloc[17000].filter(like="jpos_x_").values.flatten()
iny = IN.iloc[17000].filter(like="jpos_y_").values.flatten()
inz = IN.iloc[17000].filter(like="jpos_z_").values.flatten()


In [None]:
scatter_data = pd.DataFrame({'X': x_array, 'Y': y_array, 'Z': z_array})


max_range = max(scatter_data[['X', 'Y', 'Z']].max())
min_range = min(scatter_data[['X', 'Y', 'Z']].min())

fig = px.scatter_3d(scatter_data, x='X', y='Z', z='Y', title='Out Joint Positions', labels={'X': 'X Values', 'Y': 'Y Values', 'Z': 'Z Values'})
fig.update_traces(name='Out Joint Positions', showlegend = True)
fig.add_scatter3d(x=inx, y=inz, z=iny, mode="markers", name='In Joint Positions')

fig.update_layout(scene=dict(aspectmode="cube", xaxis=dict(range=[min_range, max_range]), yaxis=dict(range=[min_range, max_range]), zaxis=dict(range=[min_range, max_range])))

fig.update_layout(
    width = 800,
    height = 800,
    title = "Joint Position In & Out (Next Frame)"
)
fig.show()

In [None]:
fig.write_html("file.html", include_plotlyjs=False)