In [7]:
from sklearn.preprocessing import RobustScaler, StandardScaler, MinMaxScaler
import pandas as pd
import numpy as np

class DataProcessor:
    def __init__(self, df):
        self.df = df
    
    def PartialScaler(self, col, seq_len=8, scaler_type="Robust", path="./", save=False):
        scaler_dict = {
            "Robust": RobustScaler(),
            "Standard": StandardScaler(),
            "MinMax": MinMaxScaler()
        }
        
        scaler = scaler_dict[scaler_type]
        col_scaled = f'{col}_partially_scaled'
        
        self.df[col_scaled] = 0
        
        data = self.df[col].values.reshape(-1, 1)
        scaled_data = scaler.fit_transform(data)
        scaled_data = scaled_data.reshape(-1)
        
        window_data = np.lib.stride_tricks.sliding_window_view(scaled_data, window_shap3e=(seq_len,))
        print(window_data)
        window_data_sum = np.sum(window_data, axis=1)
        print(window_data_sum)
        self.df.loc[:seq_len-1, col_scaled] = window_data_sum[:seq_len]
        print(self.df)
        print(self.df.loc[:seq_len-1, col_scaled])
        self.df.loc[seq_len-1:, col_scaled] = np.cumsum(window_data_sum)[seq_len-1:]
        
        if save:
            self.df.to_csv(path, index=False)

            
# Example usage
data = {
    'A': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'B': [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
}

df = pd.DataFrame(data)
processor = DataProcessor(df)
processor.PartialScaler('A', seq_len=4)
print(processor.df)


[[-1.         -0.81818182 -0.63636364 -0.45454545]
 [-0.81818182 -0.63636364 -0.45454545 -0.27272727]
 [-0.63636364 -0.45454545 -0.27272727 -0.09090909]
 [-0.45454545 -0.27272727 -0.09090909  0.09090909]
 [-0.27272727 -0.09090909  0.09090909  0.27272727]
 [-0.09090909  0.09090909  0.27272727  0.45454545]
 [ 0.09090909  0.27272727  0.45454545  0.63636364]
 [ 0.27272727  0.45454545  0.63636364  0.81818182]
 [ 0.45454545  0.63636364  0.81818182  1.        ]]
[-2.90909091 -2.18181818 -1.45454545 -0.72727273  0.          0.72727273
  1.45454545  2.18181818  2.90909091]
     A   B  A_partially_scaled
0    1   2           -2.909091
1    2   4           -2.181818
2    3   6           -1.454545
3    4   8           -0.727273
4    5  10            0.000000
5    6  12            0.000000
6    7  14            0.000000
7    8  16            0.000000
8    9  18            0.000000
9   10  20            0.000000
10  11  22            0.000000
11  12  24            0.000000
0   -2.909091
1   -2.18181

ValueError: Must have equal len keys and value when setting with an iterable

In [62]:
def get_sub_seqs(x_arr, seq_len=100, stride=1,partial_scaling = False):
    """

    Parameters
    ----------
    x_arr: np.array, required
        input original data with shape [time_length, channels]

    seq_len: int, optional (default=100)
        Size of window used to create subsequences from the data

    stride: int, optional (default=1)
        number of time points the window will move between two subsequences

    Returns
    -------
    x_seqs: np.array
        Split sub-sequences of input time-series data
    """

    seq_starts = np.arange(0, x_arr.shape[0] - seq_len + 1, stride)
    x_seqs = np.array([x_arr[i:i + seq_len] for i in seq_starts])
    if partial_scaling == True:
        
        num_cols = 0 # 배열의 열(column) 개수
        x_ori = x_seqs[:,:,-1] # 0 feature 내 0번째 축

        x_ori_2 = x_seqs[:,:,-1][:,:seq_len-1] # 0 feature 내 0번째 축
        # 추가할 열(column)의 개수
        num_additional_cols = 1

        mean = np.mean(x_ori_2,axis=1) # 평균 계산
        std = np.std(x_ori_2,axis=1) # 표준 편차 계산
        x_scaled = (x_ori - mean[:, np.newaxis]) / std[:, np.newaxis]
        x_scaled = np.reshape(x_scaled,(-1,seq_len,1))
        x_seqs = np.concatenate((x_seqs, x_scaled), axis=-1)


    return x_seqs
# Example usage
data = {
    'A': [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12],
    'B': [2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24]
}
df = pd.DataFrame(data)
arrat=get_sub_seqs(df, seq_len=4,partial_scaling = True)
print(arrat.shape)

0      2
1      4
2      6
3      8
4     10
5     12
6     14
7     16
8     18
9     20
10    22
11    24
Name: B, dtype: int64
[[ 2  4  6  8]
 [ 4  6  8 10]
 [ 6  8 10 12]
 [ 8 10 12 14]
 [10 12 14 16]
 [12 14 16 18]
 [14 16 18 20]
 [16 18 20 22]
 [18 20 22 24]]
(9, 4, 3)


In [59]:
arrat

array([[[ 1.        ,  2.        , -1.22474487],
        [ 2.        ,  4.        ,  0.        ],
        [ 3.        ,  6.        ,  1.22474487],
        [ 4.        ,  8.        ,  2.44948974]],

       [[ 2.        ,  4.        , -1.22474487],
        [ 3.        ,  6.        ,  0.        ],
        [ 4.        ,  8.        ,  1.22474487],
        [ 5.        , 10.        ,  2.44948974]],

       [[ 3.        ,  6.        , -1.22474487],
        [ 4.        ,  8.        ,  0.        ],
        [ 5.        , 10.        ,  1.22474487],
        [ 6.        , 12.        ,  2.44948974]],

       [[ 4.        ,  8.        , -1.22474487],
        [ 5.        , 10.        ,  0.        ],
        [ 6.        , 12.        ,  1.22474487],
        [ 7.        , 14.        ,  2.44948974]],

       [[ 5.        , 10.        , -1.22474487],
        [ 6.        , 12.        ,  0.        ],
        [ 7.        , 14.        ,  1.22474487],
        [ 8.        , 16.        ,  2.44948974]],

       [[ 

In [15]:
arrat[:,:,0][0,:]

array([1, 2, 3, 4])