# Anomaly detection for time series

### 1. Method (IForest/LOF/SAND) TEST with compression

In [1]:
from Package import *
import random
random.seed(20250412)
random_state = random.randint(1, 4294967295)

path ="/home/guoyou/OutlierDetection/TSB-UAD/data/SED/"
filenames = Filenames2List(path)[:1]
ROC_mean, PR_mean, F_mean, Time_mean, n = 0, 0, 0, 0, 0

for filename in filenames:
    # 0. Get compressed data and transform it
    df = pd.read_csv(path+filename, header=None).dropna().to_numpy()
    max_length = df.shape[0]
    data = df[:max_length,0].astype(float)
    label = df[:max_length,1].astype(int)
    slidingWindow = find_length(data)
    X_data = Window(window = slidingWindow).convert(data).to_numpy()
    snr = estimate_snr_from_file(path+filename,snr_min=25)
    df_comp = compress(path, filename, snr=snr)


    # 1. Prepare the compressed data for unsupervised method
    df = pd.read_csv(path+filename, header=None).dropna()
    df_selected = df.loc[df_comp['index']]  # df_selected = df.loc[df_comp.index]
    df_selected[0] = df_comp['value'].values
    df_selected = df_selected.to_numpy()
    data_selected = df_selected[:,0].astype(float)
    label_selected = df_selected[:,1].astype(int)
    slidingWindow_selected = find_length(data_selected)

    #****************************************#
    # if(model=="IForest"):
    slidingWindow_selected  = 1
    #****************************************#
    X_data_selected = Window(window = slidingWindow_selected).convert(data_selected).to_numpy()


    # 2. Model selection for testing
    contamination = 0.1
    model_functions = {
    'IForest': lambda: IF(df, df_comp, X_data_selected, label_selected, 
                        slidingWindow_selected, contamination=contamination, random_state=random_state),
    'SAND': lambda: sand(df, df_comp, data_selected, label_selected, 
                        slidingWindow_selected, contamination=contamination, random_state=random_state),
    'LOF': lambda: lof(df, df_comp, X_data_selected, label_selected, 
                        slidingWindow_selected, contamination=contamination, random_state=random_state),
                    }
    ROC, PR, F, execution_time = model_functions['IForest']()
    print(f"{filename[:30]:<30}:  AUC_ROC = {ROC:<10.5f} AUC_PR = {PR:<10.5f} "\
            f"F = {F:<10.5f} Time = {execution_time:<3.2f}S")
    ROC_mean += ROC
    PR_mean += PR
    F_mean += F
    Time_mean += execution_time 
    n += 1

print(f"Average AUC_ROC :  {ROC_mean/n} \nAverage AUC_PR :   {PR_mean/n} \nAverage F :   {F_mean/n}  \nAverage Time :   {Time_mean/n}S\n")

sed.out                       :  AUC_ROC = 0.99866    AUC_PR = 0.11573    F = 0.05556    Time = 0.12S
Average AUC_ROC :  0.9986622592308959 
Average AUC_PR :   0.11572913856294584 
Average F :   0.05555555555555555  
Average Time :   0.12S



### 2. Method (IForest/LOF/SAND) TEST with originial data

In [2]:
import random
random.seed(20250412)
random_state = random.randint(1, 4294967295)

path ="/home/guoyou/OutlierDetection/TSB-UAD/data/SED/"
filenames = Filenames2List(path)[:]
ROC_mean, PR_mean, F_mean, Time_mean, n = 0, 0, 0, 0, 0

for filename in filenames:
    # 0. Get data
    df = pd.read_csv(path+filename, header=None).dropna().to_numpy()
    max_length = df.shape[0]
    data = df[:max_length,0].astype(float)
    label = df[:max_length,1].astype(int)
    slidingWindow = find_length(data)
    X_data = Window(window = slidingWindow).convert(data).to_numpy()




    #****************************************#
    # if(model=="IForest"):
    slidingWindow = 1
    #****************************************#
    X_data = Window(window = slidingWindow).convert(data).to_numpy()


    # 2. Model selection for testing
    contamination = 0.1
    model_functions = {
            'IForest': lambda: IF_o(df, X_data, label, slidingWindow, contamination, random_state = random_state),
            'SAND': lambda: sand_o(df,  data, label, slidingWindow, contamination),
            'LOF': lambda: lof_o(df,  X_data, label, 
                                slidingWindow, contamination=contamination),
                    }
    ROC, PR, F, execution_time = model_functions['IForest']()
    print(f"{filename[:30]:<30}:  AUC_ROC = {ROC:<10.5f} AUC_PR = {PR:<10.5f} "\
            f"F = {F:<10.5f} Time = {execution_time:<3.2f}S")
    ROC_mean += ROC
    PR_mean += PR
    F_mean += F
    Time_mean += execution_time 
    n += 1

print(f"Average AUC_ROC :  {ROC_mean/n} \nAverage AUC_PR :   {PR_mean/n} \nAverage F :   {F_mean/n}  \nAverage Time :   {Time_mean/n}S\n")

sed.out                       :  AUC_ROC = 0.80754    AUC_PR = 0.00713    F = 0.02360    Time = 1.74S
Average AUC_ROC :  0.8075407627921852 
Average AUC_PR :   0.0071291005278464326 
Average F :   0.02359882005899705  
Average Time :   1.74S

