In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch

from ts_transformers.data import anomaly_dataset

class SPCRules(object):
    def __init__(self, data, mean, std):
        super().__init__()
        self.data = data
        self.n = len(data)
        self.index = np.array(range(self.n))
        self.mean = mean
        self.std = std
        self.upper_c = mean + std
        self.upper_b = mean + 2 * std
        self.upper_a = mean + 3 * std
        self.lower_c = mean - std
        self.lower_b = mean - 2 * std
        self.lower_a = mean - 3 * std
#         print(self.data.shape, self.mean.shape, self.std.shape, self.upper_c.shape, self.lower_c.shape)
    
    def detect(self, idx: int):
        if idx == 1:
            # One point beyond the 3 σ control limit
            out = (self.data < self.lower_a) | (self.data > self.upper_a)
        elif idx == 2:
            # Nine or more points on one side of the centerline without crossing
            counter = 9
            upside = (self.data > self.mean).rolling(counter).sum()
            downside = (self.data < self.mean).rolling(counter).sum()
            out = (upside >= counter) | (downside >= counter)
        elif idx == 3:
            # Two out of three points in zone A or beyond
            counter = 3
            upside = (self.data > self.upper_b).rolling(counter).sum()
            downside = (self.data < self.lower_b).rolling(counter).sum()
            counter -= 1
            out = (upside >= counter) | (downside >= counter)
        elif idx == 4:
            # Four out of five points in zone B or beyond
            counter = 5
            upside = (self.data > self.upper_c).rolling(counter).sum()
            downside = (self.data < self.lower_c).rolling(counter).sum()
            counter -= 1
            out = (upside >= counter) | (downside >= counter)
        elif idx == 5:
            # Fifteen points are all in zone c
            counter = 15
            out = (self.data < self.upper_c) & (self.data > self.lower_c)
            out = out.rolling(counter).sum()
            out = (out == counter)
        elif idx == 6:
            # Eight continual points with none in zone c
            counter = 8
            out = (self.data > self.upper_c) | (self.data < self.lower_c)
            out = out.rolling(counter).sum()
            out = (out == counter)
        else:
            raise ValueError("Only implement rule 1~6")
        return out
    
    # Six or more points are continually increasing or decreasing
    def rule7(self):
        pass
    
    # Fourteen or more points alternate in direction
    def rule8(self):
        ofc8_ind = []
        for i in range(self.n - 13):
            d = self.data[i:i+14]
            idx = self.index[i:i+14]
            diff = list(v - u for u, v in zip(d, d[1:]))
            if all(u * v < 0):
                pass

In [6]:
csv_lst = ["09112001", "17112001", "20112001", "30102001"]
for filename in csv_lst:
    df = pd.read_csv(f"data/DMDS/csv_fe/{filename}.csv")
    features_num = 15
    features_name = df.columns[:features_num]
    datas = df.iloc[:, :features_num]
    datas.columns = ["" for _ in range(datas.shape[1])]
    means = df.iloc[:, features_num: 2 * features_num]
    means.columns = ["" for _ in range(means.shape[1])]
    stds = df.iloc[:, 2 * features_num: 3 * features_num]
    stds.columns = ["" for _ in range(stds.shape[1])]

    rule_gen = SPCRules(datas, means, stds)
    rule_lst = []
    for i in range(1, 7):
        rule_df = rule_gen.detect(idx = i)
        rule_df.columns = [f"{name}_rule{i}" for name in features_name]
        rule_lst.append(rule_df)

    df_fe = pd.concat([df.iloc[:, :features_num]] + rule_lst, axis = 1)
    
    # remove dummy rule columns
    remove_cols = ['LC51_03CV_rule2', 'LC51_03X_rule2', 'P51_06_rule2', 'T51_01_rule2', 'F51_01_rule2', 'P57_03_rule2', 'FC57_03PV_rule2', 'FC57_03CV_rule2', 'FC57_03X_rule2', 'T51_01_rule4', 'P57_03_rule4', 'FC57_03PV_rule4', 'FC57_03CV_rule4', 'FC57_03X_rule4', 'T51_01_rule5', 'T51_01_rule6', 'FC57_03PV_rule6']
    df_fe.drop(columns = remove_cols, inplace = True)
    df_fe.to_csv(f"data/DMDS/csv_fe/{filename}_spc_label.csv", index = False)

In [None]:
df_fe.columns, len(df_fe.columns)
df_fe.columns[15:]

In [None]:
rule_df = df_fe.iloc[:, 15:]
remove_cols = []
for i, item in enumerate(rule_df.sum(axis = 0)):
    rate = item / rule_df.shape[0] * 100
    if rate > 40:
        print(f"{rule_df.columns[i]}: {rate:.2f}%")
        remove_cols.append(rule_df.columns[i])
        
print(len(remove_cols))
print(remove_cols)

In [None]:
def print_rule(rule):
    print(rule.shape)
    print(np.sum(rule.to_numpy(), axis=(1, 0)))
    print()
    
print_rule(rule1_out)
print_rule(rule2_out)
print_rule(rule5_out)
print_rule(rule6_out)
print_rule(rule7_out)
print_rule(rule8_out)

In [7]:
df = pd.read_csv(f"data/DMDS/csv_fe/17112001_spc_label.csv")

In [8]:
df.columns[15:]

Index(['LC51_03CV_rule1', 'LC51_03X_rule1', 'LC51_03PV_rule1', 'P51_06_rule1',
       'T51_01_rule1', 'F51_01_rule1', 'P57_03_rule1', 'P57_04_rule1',
       'FC57_03PV_rule1', 'FC57_03CV_rule1', 'FC57_03X_rule1', 'F74_00_rule1',
       'LC74_20CV_rule1', 'LC74_20X_rule1', 'LC74_20PV_rule1',
       'LC51_03PV_rule2', 'P57_04_rule2', 'F74_00_rule2', 'LC74_20CV_rule2',
       'LC74_20X_rule2', 'LC74_20PV_rule2', 'LC51_03CV_rule3',
       'LC51_03X_rule3', 'LC51_03PV_rule3', 'P51_06_rule3', 'T51_01_rule3',
       'F51_01_rule3', 'P57_03_rule3', 'P57_04_rule3', 'FC57_03PV_rule3',
       'FC57_03CV_rule3', 'FC57_03X_rule3', 'F74_00_rule3', 'LC74_20CV_rule3',
       'LC74_20X_rule3', 'LC74_20PV_rule3', 'LC51_03CV_rule4',
       'LC51_03X_rule4', 'LC51_03PV_rule4', 'P51_06_rule4', 'F51_01_rule4',
       'P57_04_rule4', 'F74_00_rule4', 'LC74_20CV_rule4', 'LC74_20X_rule4',
       'LC74_20PV_rule4', 'LC51_03CV_rule5', 'LC51_03X_rule5',
       'LC51_03PV_rule5', 'P51_06_rule5', 'F51_01_rule5', 'P5