In [1]:
import pandas as pd
import lightgbm as lgb
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [2]:
from s4_reg.core import s4regressor as regressor
from s4_reg.src_dataloaders_original import StandardScaler
from s4_reg.src_utils_visualize import prediction_result as post

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import pickle

with open("tests/prepared_data.pkl", "rb") as tf:
    dict_prepared_data = pickle.load(tf)

def make_s4_features(
    data,
    target,
    seq_len_target=180,
    pred_len_target=1,
    d_model_target=10, # 2048
    seq_len_others=10,
    pred_len_others=1,
    d_model_others=10
    ):
    
    data = data.reset_index()
    data['date'] = pd.to_datetime(data['Date'])
    data = data.drop(['Date'],axis=1)

    assert seq_len_target > seq_len_others
       
    model_target = regressor(
        dataset = data,
        target = target,
        size = [seq_len_target, pred_len_target],
        features = 'S',
        d_model = d_model_target,
        device = 'cpu'
    )
    
    feat_df_target = model_target.get_features(data)
    stock_data = feat_df_target[target]
    feat_df_target = feat_df_target.drop([target], axis=1)
    
    model_others = regressor(
        dataset = data,
        target = target,
        size = [seq_len_others, pred_len_others],
        features = 'MS',
        d_model = d_model_others,
        device = 'cpu'
    )
    
    feat_df_others = model_others.get_features(data).iloc[seq_len_target-seq_len_others:,:]
    feat_df_others = feat_df_others.drop([target], axis=1)
    feat_df_others.columns = [f'exog_feat_{i+1}' for i in range(len(feat_df_others.columns))]

    features = pd.concat([
                          feat_df_target,
                          feat_df_others
                          ], axis=1)
    
    return features.iloc[:-1,:], pd.DataFrame(stock_data.iloc[:-1])

targets = [
    '4584.T',
    '1557.T',
    '8789.T',
    '1893.T',
    'MSFT'
]

features = {}
for i, target in enumerate(targets):
    feat, stock = make_s4_features(dict_prepared_data[target], target)
    if i==0:
        features[target] = feat
        stock_data = stock
    else:
        features[target] = feat
        stock_data = pd.concat([stock_data, stock], axis=1)   
    

CUDA extension for Cauchy multiplication not found. Install by going to extensions/cauchy/ and running `python setup.py install`. This should speed up end-to-end training by 10-50%
Falling back on slow Cauchy kernel. Install at least one of pykeops or the CUDA extension for memory efficiency.
Falling back on slow Vandermonde kernel. Install pykeops for improved memory efficiency.


In [4]:
pd.to_pickle(features, './data/features_test.pkl')
pd.DataFrame(stock_data).to_csv('./data/stock_data_test.csv')

In [5]:
features = pd.read_pickle('./data/features_test.pkl')
stock_data = pd.read_csv('./data/stock_data_test.csv', index_col='Unnamed: 0')

In [6]:
display(features)
display(stock_data)

{'4584.T':           feat_1      feat_2     feat_3      feat_4      feat_5      feat_6  \
 180   734.689087  123.499207  30.843113 -345.803986  433.608459 -449.363556   
 181   634.747009  103.663925  28.473894 -294.388885  373.312225 -384.783264   
 182   676.864746  112.292336  32.621391 -316.591156  401.190735 -412.538086   
 183   694.850098  118.005768  34.235714 -324.470490  412.803833 -425.951752   
 184   684.634338  116.040680  32.078613 -317.571198  406.474396 -417.737030   
 ...          ...         ...        ...         ...         ...         ...   
 1294  223.451508   39.885864  11.101674 -102.470436  138.206482 -143.450790   
 1295  230.035980   42.742805   9.579368 -105.303726  140.051514 -141.415268   
 1296  226.375488   37.585320  12.369206 -103.315849  140.573196 -144.255249   
 1297  230.569122   39.036400  13.432136 -105.590363  142.691833 -142.796387   
 1298  226.544662   41.507702  13.038601 -102.488571  138.436844 -143.210434   
 
           feat_7      feat_

Unnamed: 0,4584.T,1557.T,8789.T,1893.T,MSFT
180,661.0,28860.0,137.0,533.62700,11123.248
181,708.0,29190.0,140.0,543.87270,10875.842
182,730.0,29370.0,145.0,529.35803,10690.548
183,718.0,29630.0,144.0,547.28790,11058.452
184,732.0,29800.0,141.0,552.41077,11266.821
...,...,...,...,...,...
1294,243.0,55640.0,80.0,680.00000,41467.410
1295,241.0,55680.0,79.0,681.00000,42251.793
1296,244.0,55600.0,69.0,675.00000,41581.720
1297,241.0,55650.0,68.0,678.00000,41555.848
