In [39]:
import pandas as pd
import re

In [40]:
df = pd.read_csv('train/TRAIN_A.csv')

In [41]:
filtered_columns = [col for col in df.columns if not (col == 'timestamp' or col == 'anomaly' or col.endswith('_flag'))]
print(filtered_columns)

['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'M1', 'M2', 'M3', 'M4', 'M5', 'M6', 'M7', 'M8', 'M9', 'M10', 'M11', 'M12', 'M13', 'M14', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17', 'P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24', 'P25', 'P26']


In [42]:
# 정규표현식으로 제외할 컬럼 패턴 정의
exclude_pattern = re.compile(r'(timestamp|anomaly|_flag$|M\d+)')

# 제외된 컬럼 필터링
filtered_columns = [col for col in df.columns if not exclude_pattern.search(col)]
print(filtered_columns)

['Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'P1', 'P2', 'P3', 'P4', 'P5', 'P6', 'P7', 'P8', 'P9', 'P10', 'P11', 'P12', 'P13', 'P14', 'P15', 'P16', 'P17', 'P18', 'P19', 'P20', 'P21', 'P22', 'P23', 'P24', 'P25', 'P26']


In [43]:
# from sklearn.preprocessing import StandardScaler, MinMaxScaler

# # 정규화
# def Standardization(df, col_name):
#     standard_scaler = StandardScaler()
#     for name in col_name:
#         if name == 'anomaly' or 'timestamp':
#             pass
#         df[f'std_{col_name}'] = standard_scaler.fit_transform(df[[col_name]])
    
#     return df.copy()

from sklearn.preprocessing import StandardScaler

# 정규화 함수
def Standardization(df, col_names):
    df_tmp = df.copy()
    standard_scaler = StandardScaler()
    for name in col_names:
        # 'anomaly'와 'timestamp'는 처리하지 않음
        if name in ['anomaly', 'timestamp']:
            continue
        # 정규화된 컬럼 추가
        df_tmp[f'std_{name}'] = standard_scaler.fit_transform(df[[name]])
    
    return df_tmp.copy()

In [44]:
df_std = Standardization(df, filtered_columns)
df_std.head()

Unnamed: 0,timestamp,Q1,Q2,Q3,Q4,Q5,M1,M2,M3,M4,...,std_P17,std_P18,std_P19,std_P20,std_P21,std_P22,std_P23,std_P24,std_P25,std_P26
0,24/05/27 00:00,17880,37151.001,24834.001,6321,85828,0,0,0,1,...,1.596575,1.375805,1.153395,1.297997,0.850367,0.652636,0.64652,1.243416,1.259918,1.263207
1,24/05/27 00:01,17970,37069.001,25016.001,6367,85212,0,0,0,1,...,1.596575,1.375805,1.153395,1.297997,0.850367,0.652636,0.64652,1.251012,1.252925,1.256254
2,24/05/27 00:02,17280,37345.0,24462.0,6431,85655,0,0,0,1,...,1.596575,1.375805,1.153395,1.297997,0.850367,0.652636,0.64652,1.203101,1.241853,1.241767
3,24/05/27 00:03,17280,37345.0,24462.0,6431,85619,0,0,0,1,...,1.596575,1.375805,1.153395,1.297997,0.850367,0.652636,0.64652,1.228809,1.230781,1.27074
4,24/05/27 00:04,17920,37075.0,24896.001,6206,85619,0,0,0,1,...,1.596575,1.375805,1.153395,1.297997,0.850367,0.652636,0.64652,1.228809,1.230781,1.27074


In [46]:
df_std[['Q1','P1']].head()

Unnamed: 0,Q1,P1
0,17880,3.206
1,17970,3.206
2,17280,3.2115
3,17280,3.2115
4,17920,3.2005


In [47]:
df_std[['std_Q1','std_P1']].head()

Unnamed: 0,std_Q1,std_P1
0,-0.505804,-1.896483
1,-0.493704,-1.896483
2,-0.586473,-1.784703
3,-0.586473,-1.784703
4,-0.500426,-2.008262


In [None]:
df_std.describe()

Unnamed: 0,Q1,Q2,Q3,Q4,Q5,M1,M2,M3,M4,M5,...,std_P17,std_P18,std_P19,std_P20,std_P21,std_P22,std_P23,std_P24,std_P25,std_P26
count,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,...,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0,44101.0
mean,21642.090882,38889.877793,26553.82275,4713.11036,91646.376001,0.0,0.0,0.0,0.999977,0.242988,...,3.495598e-15,-1.010527e-15,1.917939e-15,1.376585e-15,-7.630508e-16,-2.794416e-15,-2.990334e-15,-1.30956e-15,1.536413e-15,-1.13942e-15
std,7437.92174,8867.575713,8215.647448,2580.822258,4582.532974,0.0,0.0,0.0,0.004762,0.428893,...,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011
min,15080.0,15641.0,0.0,-3228.0,73424.0,0.0,0.0,0.0,0.0,0.0,...,-2.645364,-2.608061,-2.593201,-2.606376,-2.549878,-2.544475,-2.532264,-2.244196,-2.24413,-2.278488
25%,17250.0,32873.0,17435.0,3273.0,89417.0,0.0,0.0,0.0,1.0,0.0,...,-0.7122774,-0.7380831,-0.7500833,-0.7344261,-0.725788,-0.7177515,-0.7327582,-0.7174528,-0.7505535,-0.7498573
50%,17620.0,36766.001,24848.0,4849.0,92719.0,0.0,0.0,0.0,1.0,0.0,...,-0.01338599,-0.1304853,-0.1099755,-0.1620236,-0.1498715,-0.1400932,-0.1336084,-0.1512781,-0.1567358,-0.1379413
75%,18340.0,48387.002,33600.0,6367.0,94841.0,0.0,0.0,0.0,1.0,0.0,...,0.7259974,0.6498821,0.6445376,0.5722849,0.6201424,0.6009362,0.6376084,0.5714857,0.5897611,0.5678482
max,37690.0,54093.999,42921.001,12955.0,101960.0,0.0,0.0,0.0,1.0,1.0,...,3.610645,5.23191,4.899991,5.148639,5.012126,4.719681,4.673986,4.329039,4.297188,4.30946


In [38]:
df_std.columns

Index(['timestamp', 'Q1', 'Q2', 'Q3', 'Q4', 'Q5', 'M1', 'M2', 'M3', 'M4',
       ...
       'std_P17', 'std_P18', 'std_P19', 'std_P20', 'std_P21', 'std_P22',
       'std_P23', 'std_P24', 'std_P25', 'std_P26'],
      dtype='object', length=104)

In [None]:
class STACK:
    def __init__(self, max_size=5):
        self.stack = []
        self.top = -1
        self.max = max_size-1
def isEmpty(self):
    if self.top == -1:
        return True
    else:
        return False

def isFull(self):
    if self.top == self.max:
        return True
    else:
        return False    
def push(self, item):
        if self.isFullStack() == True:
            print("Stack is full.")
        else:
            self.stack.append(item)
            self.top += 1
def pop(self):
        if self.isEmptyStack() == True:
            print("Stack is empty.")
        else:
            pop_item = self.stack[self.top]
            del self.stack[self.top]
            self.top -= 1
            return pop_item