In [1]:
import numba
import numpy as np
import pandas as pd
from dateutil.parser import parse as dtparse

In [2]:
def pp(start, end, n):
    start_u = start.value//10**9
    end_u = end.value//10**9

    return pd.Series((10**9*np.random.randint(start_u, end_u, n)).view('M8[ns]'))

In [3]:
n = 1_000_000
start = pd.Timestamp(dtparse('01/01/2015'))
end = pd.Timestamp(dtparse('31/12/2019'))
data = pp(start, end, n)

In [4]:
data

0        2015-03-27 12:05:24
1        2019-03-16 17:43:45
2        2018-11-19 23:13:09
3        2015-10-10 04:52:34
4        2016-04-25 16:29:25
                 ...        
999995   2015-07-31 06:42:15
999996   2015-05-14 22:40:48
999997   2019-02-10 07:51:43
999998   2016-08-28 02:09:44
999999   2019-09-15 22:34:19
Length: 1000000, dtype: datetime64[ns]

In [5]:
dom, dow, dim = data.dt.dayofweek, data.dt.days_in_month, data.dt.day
dom.name = 'DayOfMonth'
dow.name = 'DayOfWeek'
dim.name = 'DaysInMonth'
dom = dom.to_frame()
dow = dow.to_frame()
dim = dim.to_frame() 

In [6]:
df = dom.join(dow).join(dim)

In [7]:
df

Unnamed: 0,DayOfMonth,DayOfWeek,DaysInMonth
0,4,31,27
1,5,31,16
2,0,30,19
3,5,31,10
4,0,30,25
...,...,...,...
999995,4,31,31
999996,3,31,14
999997,6,28,10
999998,6,31,28


In [8]:
def sine_dates_np(t, period, comp):
    a = (t * (2*np.pi/period))[:, None]
    b = np.arange(1, comp + 1) * 2*np.pi / comp  # np.linspace(1, np.pi*2, comp+1)[:-1][None, :]
    return np.sin(a * b).squeeze() 


In [9]:
sine_dates_np(np.arange(1, 8), 7, 4)

array([[ 0.98709105,  0.3161852 , -0.88581055, -0.5999282 ],
       [ 0.3161852 , -0.5999282 ,  0.82211556, -0.95994973],
       [-0.88581055,  0.82211556,  0.12280994, -0.93609474],
       [-0.5999282 , -0.95994973, -0.93609474, -0.53790266],
       [ 0.69364143,  0.99928823,  0.74597408,  0.07539256],
       [ 0.82211556, -0.93609474,  0.24376058,  0.65853887],
       [-0.43030122,  0.77685322, -0.97220684,  0.97834055]])

In [10]:
ds = df.head(10)

In [11]:
sine_dates_np(ds.DayOfWeek, 7, 4)

array([[-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.99360014,  0.22446354,  0.94289174, -0.43747158],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.99360014,  0.22446354,  0.94289174, -0.43747158],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ],
       [-0.27063163, -0.52106493, -0.73260905, -0.8894756 ]])

In [12]:
sine_dates_np(ds.DayOfMonth, ds.DaysInMonth, 4) 

array([[ 0.99410527,  0.21556014, -0.94736356, -0.42098489],
       [ 0.05730986, -0.11443134,  0.17117666, -0.22735931],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.97536797, -0.43030122,  0.78553281,  0.77685322],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [ 0.4348596 , -0.78318064,  0.97564601, -0.97395443],
       [ 0.88450181, -0.82530523, -0.11443134,  0.93207808],
       [ 0.81638478,  0.94293805,  0.27272439, -0.62793683],
       [ 0.91983974, -0.72169585, -0.35360535,  0.9991306 ]])

In [58]:
@numba.njit()
def sine_dates_numba(time, period, comp):
    pi = 3.141592653589793
    out = np.zeros((len(time), comp)) 
    for i in range(len(time)):
        for j in range(comp):
            b = (2 + j) * 2 * pi / comp
            out[i, j] = np.sin(time[i] * (2*pi/period[i]) * b * (j + 1))
    return out

sine_dates_numba(ds.DayOfMonth.to_numpy(), ds.DaysInMonth.to_numpy(), 4)

array([[ 0.81245301,  0.29222346,  0.55893585,  0.05866066],
       [-0.96532141,  0.70215715,  0.99990268,  0.47972993],
       [ 0.63158376,  0.88700116, -0.81917606,  0.52503598],
       [-0.28721037, -0.76686341,  0.98436173,  0.2264782 ],
       [ 0.85558374,  0.0615215 ,  0.12280994,  0.74597408],
       [ 0.7800269 ,  0.44167631, -0.79252142,  0.45972893],
       [-0.97395443,  0.77365968,  0.98038367,  0.75408604],
       [ 0.        ,  0.        ,  0.        ,  0.        ],
       [-0.99992091,  0.99928823,  0.07539256,  0.12544227],
       [ 0.        ,  0.        ,  0.        ,  0.        ]])

In [67]:
%timeit a = sine_dates_numba(df.DayOfMonth.to_numpy(), df.DaysInMonth.to_numpy(), 4) 

%timeit b = sine_dates_np(df.DayOfMonth, df.DaysInMonth, 4) 

98.9 ms ± 1.48 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)
29.8 ms ± 92.3 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


AttributeError: module 'numba' has no attribute 'datetime'