In [179]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import math
import numpy as np
import pandas as pd

from sktime.transformations.series.date import DateTimeFeatures

In [4]:
class FixedEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(FixedEmbedding, self).__init__()

        w = torch.zeros(c_in, d_model).float()
        w.require_grad = False

        position = torch.arange(0, c_in).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        w[:, 0::2] = torch.sin(position * div_term)
        w[:, 1::2] = torch.cos(position * div_term)

        self.emb = nn.Embedding(c_in, d_model)
        self.emb.weight = nn.Parameter(w, requires_grad=False)

    def forward(self, x):
        return self.emb(x).detach()

In [70]:
w = torch.zeros(C_IN, D_MODEL).float()
w.require_grad = False

position = torch.arange(0, C_IN).float().unsqueeze(1)
div_term = (torch.arange(0, D_MODEL, 2).float() * -(math.log(10000.0) / D_MODEL)).exp()

w[:, 0::2] = torch.sin(position * div_term)[:w[:, 0::2].shape[0],:w[:, 0::2].shape[1]]
w[:, 1::2] = torch.cos(position * div_term)[:w[:, 1::2].shape[0],:w[:, 1::2].shape[1]]

# emb = nn.Embedding(C_IN, D_MODEL)
# emb.weight = nn.Parameter(w, requires_grad=False)

In [73]:
conv_layer = nn.Conv1d(in_channels=1, out_channels=1, kernel_size=1)

In [78]:
tst_input = torch.tensor([1,2,3,4]).float().reshape((1,-1))

conv_layer(tst_input)

tensor([[0.3960, 0.3152, 0.2343, 0.1534]], grad_fn=<SqueezeBackward1>)

In [79]:
conv_layer.kernel_size

(1,)

In [22]:

m = nn.Conv1d(in_channels=1, out_channels=2, kernel_size=3, stride=1, padding=0)
input = torch.tensor([[1, 2, 3, 4, 5]], dtype=torch.float32)
output = m(input)
output

tensor([[ 0.4709,  0.6300,  0.7892],
        [-2.1107, -3.2507, -4.3907]], grad_fn=<SqueezeBackward1>)

In [19]:
input = np.array([[1, 2, 3, 4, 5],[1, 2, 3, 4, 5]])#.reshape(1,-1)
kernel_filt = m.weight.detach().numpy()[0][0]
kernel_bias =m.bias.detach().numpy()
k_size=3
out_lst = []
for n_i,arry in enumerate(input):
    temp_out = []
    for i in range(len(arry)):
        slc_lst = arry[i:i+k_size]
        if len(slc_lst)==k_size:
            temp_out.append(np.sum(slc_lst*kernel_filt)+kernel_bias[n_i])

    out_lst.append(temp_out)
    # break
out_lst


[[1.6338245868682861, 2.2841799557209015, 2.934535324573517],
 [1.7435266077518463, 2.3938819766044617, 3.044237345457077]]

In [None]:
class PositionalEmbedding(nn.Module):
    def __init__(self, d_emb, max_len=5000):
        super(PositionalEmbedding, self).__init__()
        # Compute the positional encodings once in log space.
        pe = torch.zeros(max_len, d_emb).float()
        pe.require_grad = False

        position = torch.arange(0, max_len).float().unsqueeze(1)
        div_term = (torch.arange(0, d_emb, 2).float() * -(math.log(10000.0) / d_emb)).exp()

        pe[:, 0::2] = torch.sin(position * div_term)[:pe[:, 0::2].shape[0],:pe[:, 0::2].shape[1]]
        pe[:, 1::2] = torch.cos(position * div_term)[:pe[:, 1::2].shape[0],:pe[:, 1::2].shape[1]]

        pe = pe.unsqueeze(0)
        self.register_buffer('pe', pe)

    def forward(self, x):
        return self.pe[:, :x.size(2)]

In [64]:
max_len = 2
d_emb = 4

pe = torch.zeros(max_len, d_emb).float()
pe.require_grad = False

position = torch.arange(0, max_len).float().unsqueeze(1)
div_term = (torch.arange(0, d_emb, 2).float() * -(math.log(10000.0) / d_emb)).exp()

pe[:, 0::2] = torch.sin(position * div_term)#[:pe[:, 0::2].shape[0],:pe[:, 0::2].shape[1]]
pe[:, 1::2] = torch.cos(position * div_term)#[:pe[:, 1::2].shape[0],:pe[:, 1::2].shape[1]]

# pe = pe.unsqueeze(0)

In [66]:
pe.T

tensor([[0.0000, 0.8415],
        [1.0000, 0.5403],
        [0.0000, 0.0100],
        [1.0000, 0.9999]])

In [68]:
print(torch.sin(torch.tensor(1)))
print(torch.cos(torch.tensor(1/(10**2))))
print(torch.sin(torch.tensor(1/10**4)))
print(torch.cos(torch.tensor(1/10**6)))

tensor(0.8415)
tensor(0.9999)
tensor(1.0000e-04)
tensor(1.)


In [2]:


seq_len = 2
d_emb = 4
POS_EMB = torch.zeros(d_emb, seq_len).float()
POS = torch.arange(0, seq_len).repeat(d_emb,1)
DIV = 10**(4*torch.arange(0, d_emb).reshape(-1,1).repeat(1,seq_len)*2/d_emb)

POS_EMB[0::2, :] = torch.sin(POS/DIV)[0::2, :]
POS_EMB[1::2, :] = torch.cos(POS/DIV)[1::2, :]

In [5]:
POS_EMB

tensor([[0.0000e+00, 8.4147e-01],
        [1.0000e+00, 9.9995e-01],
        [0.0000e+00, 1.0000e-04],
        [1.0000e+00, 1.0000e+00]])

In [4]:
class FixedEmbedding(nn.Module):
    def __init__(self, c_in, d_model):
        super(FixedEmbedding, self).__init__()

        w = torch.zeros(c_in, d_model).float()
        w.require_grad = False

        position = torch.arange(0, c_in).float().unsqueeze(1)
        div_term = (torch.arange(0, d_model, 2).float() * -(math.log(10000.0) / d_model)).exp()

        w[:, 0::2] = torch.sin(position * div_term)
        w[:, 1::2] = torch.cos(position * div_term)

        self.emb = nn.Embedding(c_in, d_model)
        self.emb.weight = nn.Parameter(w, requires_grad=False)

    def forward(self, x):
        return self.emb(x).detach()

class TemporalEmbedding(nn.Module):
    def __init__(self, d_model, embed_type='fixed', freq='h'):
        super(TemporalEmbedding, self).__init__()

        minute_size = 4; hour_size = 24
        weekday_size = 7; day_size = 32; month_size = 13

        Embed = FixedEmbedding if embed_type=='fixed' else nn.Embedding
        if freq=='t':
            self.minute_embed = Embed(minute_size, d_model)
        self.hour_embed = Embed(hour_size, d_model)
        self.weekday_embed = Embed(weekday_size, d_model)
        self.day_embed = Embed(day_size, d_model)
        self.month_embed = Embed(month_size, d_model)
    
    def forward(self, x):
        x = x.long()
        
        minute_x = self.minute_embed(x[:,:,4]) if hasattr(self, 'minute_embed') else 0.
        hour_x = self.hour_embed(x[:,:,3])
        weekday_x = self.weekday_embed(x[:,:,2])
        day_x = self.day_embed(x[:,:,1])
        month_x = self.month_embed(x[:,:,0])
        
        return hour_x + weekday_x + day_x + month_x + minute_x

In [None]:
D_MODEL=4

In [8]:
pd.tseries.offsets.YearEnd

pandas._libs.tslibs.offsets.YearEnd

In [9]:
pd.tseries.frequencies.to_offset('h')

<Hour>

In [11]:
10//15

0

In [14]:
np.vstack([1,2,])

array([[1],
       [2]])

In [15]:
class TimeFeature:
    def __init__(self):
        pass

    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        pass

    def __repr__(self):
        return self.__class__.__name__ + "()"

class SecondOfMinute(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.second / 59.0 - 0.5

class MinuteOfHour(TimeFeature):
    """Minute of hour encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.minute / 59.0 - 0.5

class HourOfDay(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.hour / 23.0 - 0.5

class DayOfWeek(TimeFeature):
    """Hour of day encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return index.dayofweek / 6.0 - 0.5

class DayOfMonth(TimeFeature):
    """Day of month encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.day - 1) / 30.0 - 0.5

class DayOfYear(TimeFeature):
    """Day of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.dayofyear - 1) / 365.0 - 0.5

class MonthOfYear(TimeFeature):
    """Month of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.month - 1) / 11.0 - 0.5

class WeekOfYear(TimeFeature):
    """Week of year encoded as value between [-0.5, 0.5]"""
    def __call__(self, index: pd.DatetimeIndex) -> np.ndarray:
        return (index.week - 1) / 52.0 - 0.5

In [20]:
test_dt = pd.to_datetime('2024-05-01')
chk = WeekOfYear()
chk(test_dt)

-0.17307692307692307

In [21]:
test_dt.week

18

In [35]:
test_embed = nn.Embedding(10,3, padding_idx=0)

test_input = torch.tensor([[[1,2,0,0],[2,3,4,4]]])


embed_out = test_embed(test_input)


In [37]:
test_embed.weight.shape

torch.Size([10, 3])

In [199]:
dt_arry = np.array([pd.to_datetime('2024-01-01'), pd.to_datetime('2024-01-02')])

seq_out = 1
seq_in =2

stay_date = pd.to_datetime('2024-10-30')
leads= np.arange(start=seq_out, stop=seq_out+seq_in)

dt_category = {'month':['month_of_year','month_of_quarter','quarter_of_year'],
               'day': ['day_of_year','day_of_week','day_of_quarter','week_of_year','week_of_month','week_of_quarter']}
booking_dates = stay_date-pd.to_timedelta(leads, unit='days')
dt_feats = DateTimeFeatures(feature_scope='comprehensive', ts_freq='D').fit_transform(pd.DataFrame(index=booking_dates.sort_values()))

dt_feats[dt_category['month']].to_numpy().shape

  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(


(2, 3)

In [292]:
booking_dates.min()

Timestamp('2024-10-28 00:00:00')

In [250]:
dt_feats[dt_category['month']]

Unnamed: 0,month_of_year,month_of_quarter,quarter_of_year
2024-10-28,10,1,4
2024-10-29,10,1,4


In [258]:
dt_feats[dt_category['month']].to_numpy().reshape(2,-1)

array([[10,  1,  4],
       [10,  1,  4]], dtype=int64)

In [213]:
def get_dates_vector(dates_arry: pd.DatetimeIndex, seq_in:int, seq_out:int, sub_dates_asc_order:bool=False):
    dt_category = {'month':['month_of_year','month_of_quarter','quarter_of_year'],
               'day': ['day_of_year','day_of_week','day_of_quarter','week_of_year','week_of_month','week_of_quarter']}
    leads= np.arange(start=seq_out, stop=seq_out+seq_in)
    month_out_arry = []
    day_out_arry = []
    for date in dates_arry:
        booking_dates = date-pd.to_timedelta(leads, unit='days')
        dt_feats = DateTimeFeatures(feature_scope='comprehensive', ts_freq='D').fit_transform(pd.DataFrame(index=booking_dates.sort_values()))
        dt_feats = dt_feats.sort_index(ascending=sub_dates_asc_order)
        month_out_arry.append(dt_feats[dt_category['month']].to_numpy())
        day_out_arry.append(dt_feats[dt_category['day']].to_numpy())
    
    return np.array(month_out_arry), np.array(day_out_arry)
        
month_arry , day_arry = get_dates_vector(dates_arry=pd.to_datetime(['2024-01-01','2024-01-02','2024-01-03']), seq_in=5,seq_out=2)


  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(


In [321]:
month_arry.shape

(2, 2, 3)

In [322]:
W_tst = torch.randn(3,2, requires_grad=True)

torch.tensor(month_arry).float().matmul(W_tst).shape

torch.Size([2, 2, 2])

In [304]:
DT_FEATS = DateTimeFeatures(feature_scope='comprehensive', ts_freq='D').fit_transform(pd.DataFrame(index=pd.date_range(start='2015-01-01', end='2024-04-12')))
DT_FEATS= DT_FEATS.reset_index().rename(columns={'index':'date'})
DT_FEATS['date'] = pd.to_datetime(DT_FEATS['date'])

def get_dates_vector(dates_arry: pd.DatetimeIndex, seq_in:int, seq_out:int, sub_dates_asc_order:bool=False):
    dt_category = {'month':['month_of_year','month_of_quarter','quarter_of_year'],
               'day': ['day_of_year','day_of_week','day_of_quarter','week_of_year','week_of_month','week_of_quarter']}
    leads= np.arange(start=seq_out, stop=seq_out+seq_in)
    month_out_arry = []
    day_out_arry = []
    for date in dates_arry:
        booking_dates = date-pd.to_timedelta(leads, unit='days')
        filt_dt_feats = DT_FEATS[(DT_FEATS.date>=booking_dates.min())&(DT_FEATS.date<=booking_dates.max())].sort_values('date', ascending=sub_dates_asc_order)
        month_out_arry.append(filt_dt_feats[dt_category['month']].to_numpy())
        day_out_arry.append(filt_dt_feats[dt_category['day']].to_numpy())
    
    return np.array(month_out_arry), np.array(day_out_arry)

  DUMMIES = DUMMIES.replace(
  DUMMIES = DUMMIES.replace(


In [323]:
# get_dates_vector(dates_arry=pd.date_range(start='2024-01-01',end='2024-01-02', freq='1D'), seq_in=2, seq_out=2)

dates_arry = pd.date_range(start='2024-01-01',end='2024-01-02', freq='1D')
month_arry, day_arry = get_dates_vector(dates_arry=dates_arry, seq_in=2, seq_out=2)
day_arry.shape

(2, 2, 6)

In [302]:
np.array(month_out_arry).shape

(2, 2, 3)