# numerics 

> numerics module for numpy and pandas related data processing 

In [None]:
#| default_exp numerics

In [None]:
#| hide
from nbdev.showdoc import *

In [None]:
#| export
import math
from datetime import datetime
from functools import reduce
from typing import Dict, List, Optional, Tuple, Union, cast
from zoneinfo import ZoneInfo
import numpy as np
import pandas as pd
import tensorflow as tf

2023-12-08 20:06:40.792276: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-08 20:06:40.792311: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-08 20:06:40.792319: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-08 20:06:40.797752: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
#| export
def assemble_state_ser(
    state_columns: pd.DataFrame, tz: ZoneInfo
) -> Tuple[pd.Series, int]:
    """
    assemble state df from state_columns dataframe
    order is vital for the model:
    "timestep, velocity, thrust, brake"
    contiguous storage in each measurement
    due to sort_index, output:
    [col0: brake, col1: thrust, col2: timestep, col3: velocity]
    return:
    state: pd.Series
    table_row_start: int
    """

    # state_columns['timestep'] = pd.to_datetime(datetime.now().timestamp(), unit='us').tz_localize(tz)
    state: pd.Series = cast(
        pd.Series,
        (state_columns.stack().swaplevel(0, 1)),
    )
    state.name = "state"
    state.index.names = ["rows", "idx"]
    state.sort_index(
        inplace=True
    )  # sort by rows and idx (brake, thrust, timestep, velocity)
    # str_as_type = f"datetime64[us,{tz.key}]"  # type: ignore
    # state['timestep'].astype(str_as_type, copy=False)

    vel_stats = state["velocity"].astype("float").describe()

    # 0~20km/h; 7~30km/h; 10~40km/h; 20~50km/h; ...
    # average concept
    # 10; 18; 25; 35; 45; 55; 65; 75; 85; 95; 105
    #   13; 18; 22; 27; 32; 37; 42; 47; 52; 57; 62;
    # here upper bound rule adopted
    if vel_stats["max"] < 20:
        table_row_start = 0
    elif vel_stats["max"] < 30:
        table_row_start = 1
    elif vel_stats["max"] < 120:
        table_row_start = math.floor((vel_stats["max"] - 30) / 10) + 2
    else:
        table_row_start = 16  # cycle higher than 120km/h!
    # get the row of the table

    return state, table_row_start

In [None]:
#| hide
from data_io_nbdev.utils import generate_eos_df, generate_state

2023-12-08 20:06:42.233027: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-08 20:06:42.260295: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-12-08 20:06:42.260537: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:894] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [None]:
#| hide
df = generate_eos_df()
df["state"]

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,rows,brake,brake,brake,brake,thrust,thrust,thrust,thrust,timestep,timestep,timestep,timestep,velocity,velocity,velocity,velocity
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,idx,0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3
vehicle,driver,episodestart,timestamp,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2
VB7,wang-cheng,2023-12-08 20:06:42.415823,2023-12-08 21:06:42.387738,8.0,9.0,10.0,11.0,4.0,5.0,6.0,7.0,2023-12-08 20:06:42.388113,2023-12-08 20:06:42.408113,2023-12-08 20:06:42.428113,2023-12-08 20:06:42.448113,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 20:06:42.415823,2023-12-08 22:06:42.387738,8.0,9.0,10.0,11.0,4.0,5.0,6.0,7.0,2023-12-08 20:06:42.388113,2023-12-08 20:06:42.408113,2023-12-08 20:06:42.428113,2023-12-08 20:06:42.448113,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 20:06:42.415823,2023-12-08 23:06:42.387738,8.0,9.0,10.0,11.0,4.0,5.0,6.0,7.0,2023-12-08 20:06:42.388113,2023-12-08 20:06:42.408113,2023-12-08 20:06:42.428113,2023-12-08 20:06:42.448113,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 20:06:42.415823,2023-12-09 00:06:42.387738,8.0,9.0,10.0,11.0,4.0,5.0,6.0,7.0,2023-12-08 20:06:42.388113,2023-12-08 20:06:42.408113,2023-12-08 20:06:42.428113,2023-12-08 20:06:42.448113,0.0,1.0,2.0,3.0
VB7,wang-cheng,2023-12-08 20:06:42.415823,2023-12-09 01:06:42.387738,8.0,9.0,10.0,11.0,4.0,5.0,6.0,7.0,2023-12-08 20:06:42.388113,2023-12-08 20:06:42.408113,2023-12-08 20:06:42.428113,2023-12-08 20:06:42.448113,0.0,1.0,2.0,3.0


In [None]:
#| hide
state = df['state'][["timestep", "velocity", "thrust", "brake"]].iloc[0]
# state = df['state']["timestep"]
# state["timestep"].values
state = pd.DataFrame([state["timestep"].values, state["velocity"].values, state["thrust"].values, state["brake"].values]).T
state.columns = ["timestep", "velocity", "thrust", "brake"]
state

Unnamed: 0,timestep,velocity,thrust,brake
0,2023-12-08T20:06:42.388113000,0.0,4.0,8.0
1,2023-12-08T20:06:42.408113000,1.0,5.0,9.0
2,2023-12-08T20:06:42.428113000,2.0,6.0,10.0
3,2023-12-08T20:06:42.448113000,3.0,7.0,11.0


In [None]:
#| hide
state_ser = state.stack().swaplevel(0, 1)
state_ser.name = "state"
state_ser.index.names = ["rows", "idx"]
state_ser.sort_index(inplace=True)
state_ser

rows      idx
brake     0                                8.0
          1                                9.0
          2                               10.0
          3                               11.0
thrust    0                                4.0
          1                                5.0
          2                                6.0
          3                                7.0
timestep  0      2023-12-08T20:06:42.388113000
          1      2023-12-08T20:06:42.408113000
          2      2023-12-08T20:06:42.428113000
          3      2023-12-08T20:06:42.448113000
velocity  0                                0.0
          1                                1.0
          2                                2.0
          3                                3.0
Name: state, dtype: object

In [None]:
#| hide
tz = ZoneInfo("Asia/Shanghai")
# state = df['state'].stack
ser_state, row_start = assemble_state_ser(state, tz)

In [None]:
#| hide
assemble_state_ser(state, tz)[0]

rows      idx
brake     0                                8.0
          1                                9.0
          2                               10.0
          3                               11.0
thrust    0                                4.0
          1                                5.0
          2                                6.0
          3                                7.0
timestep  0      2023-12-08T20:06:42.388113000
          1      2023-12-08T20:06:42.408113000
          2      2023-12-08T20:06:42.428113000
          3      2023-12-08T20:06:42.448113000
velocity  0                                0.0
          1                                1.0
          2                                2.0
          3                                3.0
Name: state, dtype: object

In [None]:
assert assemble_state_ser(state, tz)[1] == 0

In [None]:
assert isinstance(assemble_state_ser(state, tz)[0], pd.Series) == True

In [None]:
from fastcore.test import *

In [None]:
test_eq(isinstance(assemble_state_ser(state, tz)[0], pd.Series), True)

In [None]:
#| hide
import nbdev; nbdev.nbdev_export()