In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import glob

In [None]:
def txt2csv(filename, durations: list, labels: list, save_csv: bool = True, save_dir: str = "./csv"): # filename: *.txt
    # read txt as dataframe
    df = pd.read_csv(filename, sep = '	', header=None)
    df.drop(columns = [0], inplace = True)
    columns = [
        "P51_05", "P51_06", "T51_01", "F51_01", "LC51_03CV", # Actuator01
        "LC51_03X", "LC51_03PV", "TC51_05", "T51_08", "D51_01",
        "D51_02", "F51_02", "PC51_01", "T51_06", "P51_03", "T51_07",
        "P57_03", "P57_04", "T57_03", "FC57_03PV", "FC57_03CV", "FC57_03X", # Actuator02
        "P74_00", "P74_01", "T74_00", "F74_00", "LC74_20CV", "LC74_20X", # Actuator03
        "LC74_20PV", "F74_30", "P74_30", "T74_30"
    ]
    df.columns = columns
    
    # set class label
    class_label = np.zeros((df.shape[0],), dtype = "int8")
    for duration, label in zip(durations, labels):
        class_label[duration[0]: duration[1]] = label

    anomaly_label = np.asarray((class_label != 0), dtype = "int8")
    
    df['fault_label'] = class_label
    df['anomaly_label'] = anomaly_label
    
    if save_csv:
        file = filename.split('/')[-1].split('.')[0] + ".csv"
        save_dir += '/'
#         print(save_dir, file)
        df.to_csv(save_dir + file, index = False)
        
    return df

def my_plot(x, Ys, label_lst, title = "", figsize = (16, 9)):
    plt.figure(figsize = figsize, dpi = 100)
    plt.title(title)
    for y, label in zip(Ys, label_lst):
        plt.plot(x, y, label = label)
    plt.legend()
    plt.show()
    
def analysis(file: str, start: int, end: int, columns: list):
    df = pd.read_csv(file)
    Ys = df[columns][start: end].to_numpy().transpose()
    x = np.arange(start, end)
    my_plot(x, Ys, columns, file)

In [None]:
# txt to csv
file = "./raw/30102001.txt"
durations = [
    (58800, 59800),
    (57340, 57890)
]
labels = [18, 18]
df = txt2csv(file, durations, labels)

file = "./raw/09112001.txt"
durations = [
    (57275, 57550),
    (58520, 58625),
    (58830, 59300),
    (60650, 60700),
    (60870, 60960)
]
labels = [16, 18, 18, 16, 16]
df = txt2csv(file, durations, labels)

file = "./raw/17112001.txt"
durations = [
    (54600, 54700),
    (56670, 56770),
    (53780, 53794),
    (54193, 54215),
    (55482, 55517),
    (55977, 56015),
    (57030, 57072),
    (57475, 57530),
    (57675, 57800),
    (58150, 58325),
]
labels = [18, 16, 17, 17, 19, 19, 19, 16, 16, 19]
df = txt2csv(file, durations, labels)

file = "./raw/20112001.txt"
durations = [
    (37780, 38400),
    (44400, None),
]
labels = [17, 17]
df = txt2csv(file, durations, labels)

In [None]:
df

## EDA

In [None]:
filename = "./csv/01112001.csv"
df = pd.read_csv(filename)

In [None]:
# fault in 30102001.csv
file = "./csv/30102001.csv"
start, end = 58800, 59800
col_lst = ["LC51_03CV", "LC51_03X", "LC51_03PV"]
analysis(file, start, end, col_lst)

start, end = 57340, 57890
col_lst = ["F74_00", "LC74_20CV", "LC74_20X", "LC74_20PV"]
analysis(file, start, end, col_lst)

In [None]:
# fault in 09112001.csv
file = "./csv/09112001.csv"
start, end = 57275, 57550
col_lst = ["LC51_03CV", "LC51_03X", "LC51_03PV"]
analysis(file, start, end, col_lst)

start, end = 58830, 59300
analysis(file, start, end, col_lst)

start, end = 58520, 58625
analysis(file, start, end, col_lst)

start, end = 60650, 60700
col_lst = ["LC74_20CV", "LC74_20X", "LC74_20PV"]
analysis(file, start, end, col_lst)

start, end = 60870, 60960
analysis(file, start, end, col_lst)

In [None]:
# fault in 171122001.csv
start, end = 54600, 54700
file = "./csv/17112001.csv"
col_lst = ["LC51_03CV", "LC51_03X", "LC51_03PV"]
analysis(file, start, end, col_lst)

start, end = 56670, 56770
analysis(file, start, end, col_lst)

start, end = 37780, 58400
col_lst = ["P51_06", "T51_01", "F51"]
analysis(file, start, end, col_lst)

## Test TSAnomalyDataset

In [1]:
import pandas as pd
import sys
sys.path.append("../../")
from ts_transformers.data import TSAnomalyDataset, TSAnomalyConfig
from ts_transformers.data import SPCAnomalyDataset, SPCAnomalyConfig

In [2]:
# test TSAnomalyDataset (base dataset)
config = TSAnomalyConfig(
    target_col="anomaly_label",
    window_size=256,
)

filepath = "./csv/17112001.csv"
df = pd.read_csv(filepath)
ts_dataset = TSAnomalyDataset(df, config)

x, y = ts_dataset[0]
print(x.shape, y.shape)

# test SPCDataset (derived class)
config = SPCAnomalyConfig(
    spc_col="fault_label",
    target_col="anomaly_label",
    window_size=256,
)
spc_dataset = SPCAnomalyDataset(df, config)

x, spc, y = spc_dataset[0]
print(x.shape, spc.shape, y.shape)

torch.Size([256, 33]) torch.Size([])
torch.Size([256, 33]) torch.Size([]) torch.Size([])
