In [15]:
import pandas as pd
import numpy as np
from pandas.api.types import is_numeric_dtype as is_numeric
from pandas.api.types import is_datetime64_any_dtype as is_datetime

In [2]:
size = 32

df = pd.DataFrame({
    'x': np.arange(size),
    'y': np.random.randn(size),
    'z': np.random.randint(0, 10, size),
    'dt': pd.date_range('2021-01-01', '2021-02-02', periods=size),
    's': list('abcdefgh')*4, 
})
df

Unnamed: 0,x,y,z,dt,s
0,0,-1.109149,3,2021-01-01 00:00:00.000000000,a
1,1,0.02456,7,2021-01-02 00:46:27.096774193,b
2,2,1.796598,9,2021-01-03 01:32:54.193548387,c
3,3,0.680598,0,2021-01-04 02:19:21.290322580,d
4,4,1.413756,3,2021-01-05 03:05:48.387096774,e
5,5,-0.701078,5,2021-01-06 03:52:15.483870967,f
6,6,-0.854902,2,2021-01-07 04:38:42.580645161,g
7,7,-2.365971,4,2021-01-08 05:25:09.677419354,h
8,8,0.118245,5,2021-01-09 06:11:36.774193548,a
9,9,-2.029436,5,2021-01-10 06:58:03.870967741,b


In [5]:
coords = np.array([
    ['2021-01-04_11:28:41.7391', '0.4320105703440861'],
    ['2021-01-03_17:59:15.2795', '0.31664139353333337'],
    ['2021-01-03_04:28:19.3789', '0.07541493292903229'],
    ['2021-01-03_07:39:07.8261', '0.01248629103225808'],
    ['2021-01-04_03:31:40.6211', '-0.03995424388172042'],
    ['2021-01-04_17:50:18.6335', '-0.01897802991612902'],
    ['2021-01-05_04:10:26.087' ,'0.09639114689462368'],
    ['2021-01-05_05:45:50.3106', '0.2012722167225807'],
    ['2021-01-04_22:36:31.3043', '0.3271295005161291'],
    ['2021-01-04_15:27:12.2981', '0.3795700354301076'],
    ['2021-01-03_15:36:08.9441', '0.3795700354301076']])

path = "M2021-01-02_05:24:58.1366,1.134713738191398L2021-01-01_09:32:25.3416,1.061296989311828L2021-01-01_06:21:36.8944,0.9773921334494625L2021-01-01_11:07:49.5652,0.8829991706043012L2021-01-02_04:37:16.0248,0.7990943147419356L2021-01-02_16:32:47.7019,0.8200705287075271L2021-01-02_18:08:11.9255,0.924951598535484L2021-01-02_13:21:59.2547,1.0822732032774196L2021-01-02_07:00:22.3602,1.134713738191398L2021-01-01_21:27:57.0186,1.134713738191398Z"

In [13]:
def dt_series_to_unix(s: pd.Series):
    """Converts datetime series to unix int"""

    try:
        s = pd.to_datetime(s)
        print(s)
        return (s - pd.Timestamp("1970-01-01")) // pd.Timedelta("1s")
    
    except:
        raise TypeError 

def path_to_coords(svg_path: str, xtype: str=None) -> np.array:
    """From SVG path to numpy array of coordinates, each row being a (row, col) point"""
    indices_str = [
        pt.replace("M", "").replace("Z", "").replace("_", " ").split(",") for pt in svg_path.split("L")
    ]
    if xtype == "datetime":
        indices_str = [[dt_series_to_unix(pd.to_datetime(e[0])), float(e[1])] for e in indices_str]

    return indices_str# np.array(indices_str, dtype=float)

In [14]:
path_to_coords(path, xtype="datetime")

2021-01-02 05:24:58.136600
2021-01-01 09:32:25.341600
2021-01-01 06:21:36.894400
2021-01-01 11:07:49.565200
2021-01-02 04:37:16.024800
2021-01-02 16:32:47.701900
2021-01-02 18:08:11.925500
2021-01-02 13:21:59.254700
2021-01-02 07:00:22.360200
2021-01-01 21:27:57.018600


[[1609565098, 1.134713738191398],
 [1609493545, 1.061296989311828],
 [1609482096, 0.9773921334494625],
 [1609499269, 0.8829991706043012],
 [1609562236, 0.7990943147419356],
 [1609605167, 0.8200705287075271],
 [1609610891, 0.924951598535484],
 [1609593719, 1.0822732032774196],
 [1609570822, 1.134713738191398],
 [1609536477, 1.134713738191398]]

In [16]:
def check_col_type(s: pd.Series) -> str:
    """Checks pd.Series dtype"""

    if is_numeric(s):
        return "numeric"
    else:
        try:
            pd.to_datetime(s)
            return "datetime"

        except Exception as e:
            return "categorical"