# utils 

> utility module for data_io, numerics, etc.
> for example, generating a pandas DataFrame for testing purpose

In [None]:
#| default_exp utils 

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import numpy as np
import pandas as pd
from datetime import datetime
from functools import reduce


## Generate state

In [None]:
#| export
def generate_state()->pd.DataFrame:
    """
    generate a pandas DataFrame for testing purpose
    """
    ts = pd.to_datetime(datetime.now())
    ts_ind1 = ts + pd.to_timedelta(np.arange(0, 4 * 20, 20), "ms")
    s = np.arange(12)
    a_1 = s[:4]
    a_2 = s[4:8]
    a_3 = s[8:]
    df_ss = pd.DataFrame(
        {"timestep": ts_ind1, "velocity": a_1, "thrust": a_2, "brake": a_3}
    )  # .set_index('timestep')
    df_ss.columns.name = "qtuple"
    ui_t = df_ss.loc[:, ["velocity", "thrust"]]
    state = df_ss.stack().swaplevel(0, 1)
    state.name = "state"
    state.index.names = ["rows", "idx"]
    state.sort_index(inplace=True)
    return state

In [None]:
#| hide
generate_state()

rows      idx
brake     0                               8
          1                               9
          2                              10
          3                              11
thrust    0                               4
          1                               5
          2                               6
          3                               7
timestep  0      2023-12-08 17:21:15.079054
          1      2023-12-08 17:21:15.099054
          2      2023-12-08 17:21:15.119054
          3      2023-12-08 17:21:15.139054
velocity  0                               0
          1                               1
          2                               2
          3                               3
Name: state, dtype: object

## Generate action

In [None]:
#| export
def generate_action()->pd.DataFrame:
    """
    generate a pandas DataFrame for testing purpose
    """

    s = np.arange(12)
    ts = pd.to_datetime(datetime.now())
    a = len(s) + np.arange(15)
    speed_ser = pd.Series(np.linspace(40, 60, 3), name="speed")
    row_array = a.reshape(3, 5).transpose()
    rows_df = pd.DataFrame(row_array)
    rows_df.columns = [f"r{i}" for i in np.arange(3)]
    ts_ind0 = ts + pd.to_timedelta(np.arange(5 * 20, 8 * 20, 20), "ms")
    ts_ser = pd.Series(ts_ind0, name="timestep")
    throttle_ser = pd.Series(np.linspace(0, 1.0, 5), name="throttle")
    # throttle_ser
    dfs = [rows_df, ts_ser, speed_ser, throttle_ser]
    action = (
        reduce(
            lambda left, right: pd.merge(
                left, right, how="outer", left_index=True, right_index=True
            ),
            dfs,
        )
        .stack()
        .swaplevel(0, 1)
        .sort_index()
    )
    
    action.name = "action"
    action.index.names = ["rows", "idx"]
    return action

In [None]:
#| hide
generate_action()

rows      idx
r0        0                              12
          1                              13
          2                              14
          3                              15
          4                              16
r1        0                              17
          1                              18
          2                              19
          3                              20
          4                              21
r2        0                              22
          1                              23
          2                              24
          3                              25
          4                              26
speed     0                            40.0
          1                            50.0
          2                            60.0
throttle  0                             0.0
          1                            0.25
          2                             0.5
          3                            0.75
          4       

## Generate reward

In [None]:
#| export

def generate_reward()->pd.DataFrame:
    """
    generate a pandas DataFrame for testing purpose
    """
    ts = pd.to_datetime(datetime.now())
    ts_ind1 = ts + pd.to_timedelta(np.arange(0, 4 * 20, 20), "ms")
    s = np.arange(12)
    a = len(s) + np.arange(15)
    reward = (
        pd.DataFrame({"work": len(s) + len(a), "timestep": ts_ind1[0]}, index=[0])
        .stack()
        .swaplevel(0, 1)
        .sort_index()
    )
    # reward_index = (reward.name,  ts_ind[0], 0)
    reward.index.names = ["rows", "idx"]
    reward.name = "reward"
    return reward


In [None]:
#| hide
generate_reward()

rows      idx
timestep  0      2023-12-08 17:21:15.110153
work      0                              27
Name: reward, dtype: object

## Generate nstate

In [None]:
#| export
def generate_nstate()->pd.DataFrame:
    """
    generate a pandas DataFrame for testing purpose
    """
    ts = pd.to_datetime(datetime.now())
    s = np.arange(12)
    a = len(s) + np.arange(15)
    reward = generate_reward()
    
    ts_ind = ts + pd.to_timedelta(5, "s") + pd.to_timedelta(np.arange(0, 4 * 20, 20), "ms")
    s = (
            np.arange(12) + len(s) + len(a) + len(reward) - 1
    )  # exclude the timestamp in reward
    a1 = s[:4]
    a2 = s[4:8]
    a3 = s[8:]
    nstate = (
        pd.DataFrame({"timestep": ts_ind, "velocity": a1, "thrust": a2, "brake": a3})
        # .set_index('timestamp')
        .stack()
        .swaplevel(0, 1)
        .sort_index()
    )
    nstate.name = "nstate"
    nstate.index.names = ["rows", "idx"]
    return nstate

In [None]:
#| hide
generate_nstate()

rows      idx
brake     0                              36
          1                              37
          2                              38
          3                              39
thrust    0                              32
          1                              33
          2                              34
          3                              35
timestep  0      2023-12-08 17:21:20.123105
          1      2023-12-08 17:21:20.143105
          2      2023-12-08 17:21:20.163105
          3      2023-12-08 17:21:20.183105
velocity  0                              28
          1                              29
          2                              30
          3                              31
Name: nstate, dtype: object

## Generate observation

In [None]:
#| export
def generate_observation()-> list[pd.Series]:
    """
    generate a list of pandas Series for testing purpose
    """

    ts = pd.to_datetime(datetime.now())
    state = generate_state()
    action = generate_action()
    reward = generate_reward()
    nstate = generate_nstate()
    
    timestamp = pd.Series([ts], name="timestamp")
    timestamp.index = pd.MultiIndex.from_product(
        [timestamp.index, [0]], names=["rows", "idx"]
    )
    timestamp_index = (timestamp.name, "", 0)
    state_index = [(state.name, *i) for i in state.index]
    reward_index = [(reward.name, *i) for i in reward.index]
    action_index = [(action.name, *i) for i in action.index]
    nstate_index = [(nstate.name, *i) for i in nstate.index]
    
    multiindex = pd.MultiIndex.from_tuples(
        [timestamp_index, *state_index, *action_index, *reward_index, *nstate_index]
    )
    observation_list = [timestamp, state, action, reward, nstate]
    observation = pd.concat(observation_list)
    observation.index = multiindex
    observation0 = observation.copy()
    observation0.loc["timestamp", "", 0] = ts + pd.Timedelta(1, "h")
    observation1 = observation.copy()
    observation1.loc["timestamp", "", 0] = ts + pd.Timedelta(2, "h")
    observation2 = observation.copy()
    observation2.loc["timestamp", "", 0] = ts + pd.Timedelta(3, "h")
    observation3 = observation.copy()
    observation3.loc["timestamp", "", 0] = ts + pd.Timedelta(4, "h")
    observation4 = observation.copy()
    observation4.loc["timestamp", "", 0] = ts + pd.Timedelta(5, "h")
    observation_list = [
        observation0,
        observation1,
        observation2,
        observation3,
        observation4,
    ]
    return observation_list


In [None]:
#| hide
generate_observation()

[timestamp            0    2023-12-08 18:21:15.140414
 state      brake     0                             8
                      1                             9
                      2                            10
                      3                            11
                                      ...            
 nstate     timestep  3    2023-12-08 17:21:20.205199
            velocity  0                            28
                      1                            29
                      2                            30
                      3                            31
 Length: 61, dtype: object,
 timestamp            0    2023-12-08 19:21:15.140414
 state      brake     0                             8
                      1                             9
                      2                            10
                      3                            11
                                      ...            
 nstate     timestep  3    2023-12-08 17:21:20.205199


## Generate MultiIndex DataFrame

In [None]:
#| export
def generate_df_multiindex()->pd.DataFrame:

    observation_list = generate_observation()
    dfs_epi = pd.concat(observation_list, axis=1).transpose()
    dfs_epi.columns.names = ["qtuple", "rows", "idx"]
    # dfs_epi
    
    dfs_episode = dfs_epi.copy()
    dfs_episode.set_index(("timestamp", "", 0), inplace=True)
    dfs_episode.sort_index(axis=1, inplace=True)
    dfs_episode.index.name = "timestamp"
    idx = pd.IndexSlice
    state_cols_float = [("state", col) for col in ["brake", "thrust", "velocity"]]
    action_cols_float = [("action", col) for col in ["r0", "r1", "r2", "speed", "throttle"]]
    reward_cols_float = [("reward", "work")]
    nstate_cols_float = [("nstate", col) for col in ["brake", "thrust", "velocity"]]
    for col in action_cols_float + state_cols_float + reward_cols_float + nstate_cols_float:
        dfs_episode[col[0], col[1]] = dfs_episode[col[0], col[1]].astype(
            "float"
        )  # float16 not allowed in parquet
    return dfs_episode


In [None]:
#| hide
generate_df_multiindex()

qtuple,action,action,action,action,action,action,action,action,action,action,...,state,state,state,state,state,state,state,state,state,state
rows,r0,r0,r0,r0,r0,r1,r1,r1,r1,r1,...,thrust,thrust,timestep,timestep,timestep,timestep,velocity,velocity,velocity,velocity
idx,0,1,2,3,4,0,1,2,3,4,...,2,3,0,1,2,3,0,1,2,3
timestamp,Unnamed: 1_level_3,Unnamed: 2_level_3,Unnamed: 3_level_3,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3
2023-12-08 18:21:15.165397,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:15.165555,2023-12-08 17:21:15.185555,2023-12-08 17:21:15.205555,2023-12-08 17:21:15.225555,0.0,1.0,2.0,3.0
2023-12-08 19:21:15.165397,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:15.165555,2023-12-08 17:21:15.185555,2023-12-08 17:21:15.205555,2023-12-08 17:21:15.225555,0.0,1.0,2.0,3.0
2023-12-08 20:21:15.165397,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:15.165555,2023-12-08 17:21:15.185555,2023-12-08 17:21:15.205555,2023-12-08 17:21:15.225555,0.0,1.0,2.0,3.0
2023-12-08 21:21:15.165397,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:15.165555,2023-12-08 17:21:15.185555,2023-12-08 17:21:15.205555,2023-12-08 17:21:15.225555,0.0,1.0,2.0,3.0
2023-12-08 22:21:15.165397,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:15.165555,2023-12-08 17:21:15.185555,2023-12-08 17:21:15.205555,2023-12-08 17:21:15.225555,0.0,1.0,2.0,3.0


## prepend two levels of index "vehicle" and "driver" to the DataFrame object

In [None]:
#| export
from eos.data_io.config import drivers_by_id, trucks_by_id

2023-12-08 17:21:15.369829: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-08 17:21:15.369869: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-08 17:21:15.369876: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-08 17:21:15.375329: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-08 17:21:17.599627: I te

In [None]:
#|export
def generate_eos_df()->pd.DataFrame:
    """
    generate a pandas DataFrame for testing purpose
    """
    dfs_episode = generate_df_multiindex()
    
    ts = pd.to_datetime(datetime.now())
    dfs_episode = pd.concat(
        [dfs_episode], keys=[drivers_by_id["wang-cheng"].pid], names=["driver"]
    )
    dfs_episode = pd.concat(
        [dfs_episode], keys=[trucks_by_id["VB7"].vid], names=["vehicle"]
    )
    episodestart = ts - pd.Timedelta(1, "h")
    dfs_episode = pd.concat([dfs_episode], keys=[ts], names=["episodestart"])
    dfs_episode = dfs_episode.swaplevel(1, 0, axis=0)
    dfs_episode = dfs_episode.swaplevel(1, 2, axis=0)
    dfs_episode.sort_index(inplace=True)
    return dfs_episode


In [None]:
#|hide
generate_eos_df()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,qtuple,action,action,action,action,action,action,action,action,action,action,...,state,state,state,state,state,state,state,state,state,state
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,rows,r0,r0,r0,r0,r0,r1,r1,r1,r1,r1,...,thrust,thrust,timestep,timestep,timestep,timestep,velocity,velocity,velocity,velocity
Unnamed: 0_level_2,Unnamed: 1_level_2,Unnamed: 2_level_2,idx,0,1,2,3,4,0,1,2,3,4,...,2,3,0,1,2,3,0,1,2,3
vehicle,driver,episodestart,timestamp,Unnamed: 4_level_3,Unnamed: 5_level_3,Unnamed: 6_level_3,Unnamed: 7_level_3,Unnamed: 8_level_3,Unnamed: 9_level_3,Unnamed: 10_level_3,Unnamed: 11_level_3,Unnamed: 12_level_3,Unnamed: 13_level_3,Unnamed: 14_level_3,Unnamed: 15_level_3,Unnamed: 16_level_3,Unnamed: 17_level_3,Unnamed: 18_level_3,Unnamed: 19_level_3,Unnamed: 20_level_3,Unnamed: 21_level_3,Unnamed: 22_level_3,Unnamed: 23_level_3,Unnamed: 24_level_3
VB7,wang-cheng,2023-12-08 17:21:17.817434,2023-12-08 18:21:17.783837,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:17.784184,2023-12-08 17:21:17.804184,2023-12-08 17:21:17.824184,2023-12-08 17:21:17.844184,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 17:21:17.817434,2023-12-08 19:21:17.783837,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:17.784184,2023-12-08 17:21:17.804184,2023-12-08 17:21:17.824184,2023-12-08 17:21:17.844184,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 17:21:17.817434,2023-12-08 20:21:17.783837,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:17.784184,2023-12-08 17:21:17.804184,2023-12-08 17:21:17.824184,2023-12-08 17:21:17.844184,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 17:21:17.817434,2023-12-08 21:21:17.783837,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:17.784184,2023-12-08 17:21:17.804184,2023-12-08 17:21:17.824184,2023-12-08 17:21:17.844184,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 17:21:17.817434,2023-12-08 22:21:17.783837,12.0,13.0,14.0,15.0,16.0,17.0,18.0,19.0,20.0,21.0,...,6.0,7.0,2023-12-08 17:21:17.784184,2023-12-08 17:21:17.804184,2023-12-08 17:21:17.824184,2023-12-08 17:21:17.844184,0.0,1.0,2.0,3.0


In [None]:
assert isinstance(generate_eos_df().index, pd.MultiIndex), f"dfs_episode.index is not a MultiIndex"

In [None]:
from fastcore.test import *

In [None]:
test_eq(isinstance(generate_eos_df().index, pd.MultiIndex), True)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()