In [7]:
import pandas as pd
import numpy as np

In [8]:
def create_time_windows(df, window_size, stride):
    # 입력 유효성 검사
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input must be a pandas DataFrame")
    if not isinstance(window_size, int) or window_size <= 0:
        raise ValueError("window_size must be a positive integer")
    if not isinstance(stride, int) or stride <= 0:
        raise ValueError("stride must be a positive integer")
    
    # 결과를 저장할 딕셔너리 생성
    result_dict = {}
    
    # 각 열에 대해 반복
    for column in df.columns:
        # 시간 윈도우 생성
        windows = {}
        for i in range(0, len(df) - window_size + 1, stride):
            window = df[column].iloc[i:i+window_size].values
            windows[df.index[i+window_size-1]] = window
        
        # 윈도우 데이터로 새 DataFrame 생성
        window_df = pd.DataFrame.from_dict(windows, orient='index')
        window_df.columns = [f"{column}_t-{window_size-i-1}" for i in range(window_size)]
        
        # 결과 딕셔너리에 추가
        result_dict[column] = window_df
    
    return result_dict

In [9]:
    df = pd.read_parquet(r"C:/Users/yjahn/Desktop/DnS/data/NAVER_20190806_20240804.parquet")

In [10]:
df.shape

(1232, 6)

In [11]:
window_size = 5  # window 크기
stride = 2  # stride 크기
df_list = create_time_windows(df, window_size, stride)

In [12]:
print(df.shape)
print(df.columns)
print(df.head(5))

(1232, 6)
Index(['시가', '고가', '저가', '종가', '거래량', '등락률'], dtype='object')
                시가      고가      저가      종가     거래량       등락률
날짜                                                          
2019-08-06  134000  140000  132000  138500  610095  0.362319
2019-08-07  139000  139500  136000  137500  336416 -0.722022
2019-08-08  139500  142000  136500  140000  496443  1.818182
2019-08-09  142000  145000  140500  142500  368133  1.785714
2019-08-12  142500  144500  140500  140500  217292 -1.403509


In [13]:
df_list['종가']

Unnamed: 0,종가_t-4,종가_t-3,종가_t-2,종가_t-1,종가_t-0
2019-08-12,138500,137500,140000,142500,140500
2019-08-14,140000,142500,140500,142500,139000
2019-08-19,140500,142500,139000,139500,139000
2019-08-21,139000,139500,139000,145500,147500
2019-08-23,139000,145500,147500,148000,148500
...,...,...,...,...,...
2024-07-22,171600,173400,174900,172200,172700
2024-07-24,174900,172200,172700,170600,171700
2024-07-26,172700,170600,171700,178000,174800
2024-07-30,171700,178000,174800,175000,173300


In [14]:
from acf import calculate_acf
calculate_acf(df_list['종가'], window_size=window_size)

Unnamed: 0,lag_0,lag_1,lag_2,lag_3,lag_4
2019-08-12,1.0,0.335135,-0.427703,-0.345946,-0.061486
2019-08-14,1.0,-0.593814,0.379381,-0.461856,0.176289
2019-08-19,1.0,-0.041379,-0.077011,-0.331034,-0.050575
2019-08-21,1.0,0.358921,-0.239430,-0.368516,-0.250975
2019-08-23,1.0,0.188581,-0.122023,-0.260522,-0.306036
...,...,...,...,...,...
2024-07-22,1.0,-0.158338,-0.538934,0.142467,0.054805
2024-07-24,1.0,0.019153,0.088366,-0.430867,-0.176652
2024-07-26,1.0,0.154870,-0.404805,-0.218894,-0.031170
2024-07-30,1.0,-0.432986,0.024016,-0.255940,0.164909


In [15]:
from buffett import calculate_buffett_index
buffett = calculate_buffett_index(df['종가'], 'KOR')
create_time_windows(buffett.to_frame(), window_size, stride)['종가']

Unnamed: 0,종가_t-4,종가_t-3,종가_t-2,종가_t-1,종가_t-0
2019-08-12,0.000008,0.000008,0.000008,0.000008,0.000008
2019-08-14,0.000008,0.000008,0.000008,0.000008,0.000008
2019-08-19,0.000008,0.000008,0.000008,0.000008,0.000008
2019-08-21,0.000008,0.000008,0.000008,0.000008,0.000008
2019-08-23,0.000008,0.000008,0.000008,0.000008,0.000008
...,...,...,...,...,...
2024-07-22,0.000010,0.000010,0.000010,0.000010,0.000010
2024-07-24,0.000010,0.000010,0.000010,0.000009,0.000010
2024-07-26,0.000010,0.000009,0.000010,0.000010,0.000010
2024-07-30,0.000010,0.000010,0.000010,0.000010,0.000010


In [16]:
from deMartini import demartini_index
de = demartini_index(df['종가'])
create_time_windows(de.to_frame(), window_size, stride)['rsi']

Unnamed: 0,rsi_t-4,rsi_t-3,rsi_t-2,rsi_t-1,rsi_t-0
2019-08-12,,,,,
2019-08-14,,,,,
2019-08-19,,,,,
2019-08-21,,,,,
2019-08-23,,,,,
...,...,...,...,...,...
2024-07-22,50.985180,53.657237,55.818759,51.190435,51.984428
2024-07-24,55.818759,51.190435,51.984428,48.421677,50.341511
2024-07-26,51.984428,48.421677,50.341511,59.613368,54.089073
2024-07-30,50.341511,59.613368,54.089073,54.373659,51.454076


In [17]:
from div_each_before import div_each_before

deb = div_each_before(df['종가'])
create_time_windows(deb.to_frame(), window_size, stride)['종가']


Unnamed: 0,종가_t-4,종가_t-3,종가_t-2,종가_t-1,종가_t-0
2019-08-12,1.000000,0.992780,1.018182,1.017857,0.985965
2019-08-14,1.018182,1.017857,0.985965,1.014235,0.975439
2019-08-19,0.985965,1.014235,0.975439,1.003597,0.996416
2019-08-21,0.975439,1.003597,0.996416,1.046763,1.013746
2019-08-23,0.996416,1.046763,1.013746,1.003390,1.003378
...,...,...,...,...,...
2024-07-22,0.973341,1.010490,1.008651,0.984563,1.002904
2024-07-24,1.008651,0.984563,1.002904,0.987840,1.006448
2024-07-26,1.002904,0.987840,1.006448,1.036692,0.982022
2024-07-30,1.006448,1.036692,0.982022,1.001144,0.990286


In [18]:
from fractional_difference import fractional_difference
fracdiff = fractional_difference(df['종가'], 0.3)
create_time_windows(fracdiff.to_frame(), window_size, stride)[0]

Unnamed: 0,0_t-4,0_t-3,0_t-2,0_t-1,0_t-0
2019-08-12,138500.000000,95950.000000,84207.500000,77821.750000,69306.243750
2019-08-14,84207.500000,77821.750000,69306.243750,67418.901625,60085.066169
2019-08-19,69306.243750,67418.901625,60085.066169,58767.057742,56133.411962
2019-08-21,60085.066169,58767.057742,56133.411962,60947.256106,59437.461140
2019-08-23,56133.411962,60947.256106,59437.461140,57254.869581,55748.637486
...,...,...,...,...,...
2024-07-22,6715.588628,9655.879641,10929.406979,7738.272201,8871.848221
2024-07-24,10929.406979,7738.272201,8871.848221,6804.924331,8576.530272
2024-07-26,8871.848221,6804.924331,8576.530272,14798.648392,9744.012717
2024-07-30,8576.530272,14798.648392,9744.012717,10284.301644,8524.998855


In [19]:
df['종가']

날짜
2019-08-06    138500
2019-08-07    137500
2019-08-08    140000
2019-08-09    142500
2019-08-12    140500
               ...  
2024-07-29    175000
2024-07-30    173300
2024-07-31    174000
2024-08-01    175000
2024-08-02    171400
Name: 종가, Length: 1232, dtype: int64

In [20]:
from pivot import calculate_pivot_points
pivot_points = calculate_pivot_points(df['고가'], df['저가'], df['종가'])
create_time_windows(pivot_points, window_size, stride)['Pivot']

Unnamed: 0,Pivot_t-4,Pivot_t-3,Pivot_t-2,Pivot_t-1,Pivot_t-0
2019-08-12,136833.333333,137666.666667,139500.000000,142666.666667,141833.333333
2019-08-14,139500.000000,142666.666667,141833.333333,141833.333333,140166.666667
2019-08-19,141833.333333,141833.333333,140166.666667,139166.666667,139333.333333
2019-08-21,140166.666667,139166.666667,139333.333333,143333.333333,146166.666667
2019-08-23,139333.333333,143333.333333,146166.666667,148666.666667,147666.666667
...,...,...,...,...,...
2024-07-22,172966.666667,173166.666667,173900.000000,172233.333333,172433.333333
2024-07-24,173900.000000,172233.333333,172433.333333,172733.333333,170400.000000
2024-07-26,172433.333333,172733.333333,170400.000000,175566.666667,176100.000000
2024-07-30,170400.000000,175566.666667,176100.000000,174933.333333,174166.666667


In [21]:

from sonar import sonar_indicator
sn = sonar_indicator(df, window_size=14)
create_time_windows(sn.to_frame(), window_size, stride)[0]

Unnamed: 0,0_t-4,0_t-3,0_t-2,0_t-1,0_t-0
2019-08-12,,-0.004671,0.000876,0.0023,-0.001322
2019-08-14,0.000876,0.0023,-0.001322,-0.000579,-0.000881
2019-08-19,-0.001322,-0.000579,-0.000881,-0.000688,-0.000557
2019-08-21,-0.000881,-0.000688,-0.000557,-0.001435,-0.001351
2019-08-23,-0.000557,-0.001435,-0.001351,-0.001208,-0.001097


In [22]:
from stocastic import stochastic_fast, stochastic_slow
stfa = stochastic_fast(df)

날짜
2019-08-06    81.250000
2019-08-07    68.750000
2019-08-08    80.000000
2019-08-09    80.769231
2019-08-12    65.384615
                ...    
2024-07-29    54.794521
2024-07-30    43.150685
2024-07-31    47.945205
2024-08-01    59.259259
2024-08-02    36.065574
Length: 1232, dtype: float64


In [23]:
stfa

{'fastk': 날짜
 2019-08-06    81.250000
 2019-08-07    68.750000
 2019-08-08    80.000000
 2019-08-09    80.769231
 2019-08-12    65.384615
                 ...    
 2024-07-29    54.794521
 2024-07-30    43.150685
 2024-07-31    47.945205
 2024-08-01    59.259259
 2024-08-02    36.065574
 Length: 1232, dtype: float64,
 'fastd': 날짜
 2019-08-06    81.250000
 2019-08-07    75.000000
 2019-08-08    76.666667
 2019-08-09    76.506410
 2019-08-12    75.384615
                 ...    
 2024-07-29    61.664457
 2024-07-30    50.456621
 2024-07-31    48.630137
 2024-08-01    50.118383
 2024-08-02    47.756680
 Length: 1232, dtype: float64}

In [27]:
create_time_windows(stfa['fastk'].to_frame(), window_size, stride)[0]

Unnamed: 0,0_t-4,0_t-3,0_t-2,0_t-1,0_t-0
2019-08-12,81.250000,68.750000,80.000000,80.769231,65.384615
2019-08-14,80.000000,80.769231,65.384615,80.769231,53.846154
2019-08-19,65.384615,80.769231,53.846154,57.692308,53.846154
2019-08-21,53.846154,57.692308,53.846154,100.000000,93.939394
2019-08-23,53.846154,100.000000,93.939394,86.486486,89.189189
...,...,...,...,...,...
2024-07-22,54.545455,62.727273,69.545455,57.272727,59.545455
2024-07-24,69.545455,57.272727,59.545455,50.000000,53.738318
2024-07-26,59.545455,50.000000,53.738318,76.774194,53.424658
2024-07-30,53.738318,76.774194,53.424658,54.794521,43.150685


In [25]:
from time_delay import time_delay_embedding
time_delay_embedding(df['종가'], 103, 7)

Unnamed: 0,t-0,t-103,t-206,t-309,t-412,t-515,t-618
0,138500,183000,237500,301000,381500,452500,322000
1,137500,186000,244000,298000,383500,454000,326000
2,140000,188000,240500,283000,385500,444500,331500
3,142500,188000,242000,280000,388500,409500,327500
4,140500,190500,231500,279000,391500,399000,322000
...,...,...,...,...,...,...,...
609,333000,249000,188000,190000,210000,200000,175000
610,328500,252500,185000,186100,208000,204000,173300
611,322000,246500,180500,188100,206500,195000,174000
612,313000,240000,182500,188200,201000,192000,175000


In [26]:
from vix import calculate_vix
calVix = calculate_vix(df['종가'], window_size)
create_time_windows(calVix.to_frame(), window_size, stride)['종가']

날짜
2019-08-07          NaN
2019-08-08    28.359796
2019-08-09    23.010917
2019-08-12    26.544909
2019-08-13    24.177943
                ...    
2024-07-29    33.533306
2024-07-30    32.881682
2024-07-31    32.774627
2024-08-01    16.208445
2024-08-02    17.758558
Name: 종가, Length: 1231, dtype: float64


Unnamed: 0,종가_t-4,종가_t-3,종가_t-2,종가_t-1,종가_t-0
2019-08-13,,28.359796,23.010917,26.544909,24.177943
2019-08-16,23.010917,26.544909,24.177943,32.068731,29.072432
2019-08-20,24.177943,32.068731,29.072432,24.076076,41.152325
2019-08-22,29.072432,24.076076,41.152325,41.100980,30.996825
2019-08-26,41.152325,41.100980,30.996825,31.036869,45.373920
...,...,...,...,...,...
2024-07-23,24.080181,25.495351,27.303984,26.066695,19.055606
2024-07-25,27.303984,26.066695,19.055606,17.680923,32.515715
2024-07-29,19.055606,17.680923,32.515715,33.506901,33.533306
2024-07-31,32.515715,33.506901,33.533306,32.881682,32.774627


In [21]:
from williams import williams_r
will = williams_r(df, 5) 
create_time_windows(will.to_frame(), window_size, stride)[0]

날짜
2019-08-06   -18.750000
2019-08-07   -31.250000
2019-08-08   -20.000000
2019-08-09   -19.230769
2019-08-12   -34.615385
                ...    
2024-07-29   -34.426230
2024-07-30   -48.360656
2024-07-31   -53.608247
2024-08-01   -53.333333
2024-08-02   -86.153846
Length: 1232, dtype: float64


Unnamed: 0,0_t-4,0_t-3,0_t-2,0_t-1,0_t-0
2019-08-12,-18.750000,-31.250000,-20.000000,-19.230769,-34.615385
2019-08-14,-20.000000,-19.230769,-34.615385,-27.777778,-70.588235
2019-08-19,-34.615385,-27.777778,-70.588235,-73.333333,-78.571429
2019-08-21,-70.588235,-73.333333,-78.571429,-0.000000,-9.090909
2019-08-23,-78.571429,-0.000000,-9.090909,-19.230769,-16.000000
...,...,...,...,...,...
2024-07-22,-84.033613,-78.095238,-63.809524,-77.570093,-54.687500
2024-07-24,-63.809524,-77.570093,-54.687500,-88.888889,-53.000000
2024-07-26,-54.687500,-88.888889,-53.000000,-9.836066,-36.065574
2024-07-30,-53.000000,-9.836066,-36.065574,-34.426230,-48.360656
