In [1]:
!pip install ruptures



In [2]:
import multiprocessing
import os
import sys
import pandas as pd
import matplotlib.pyplot as plt
import json

if __name__ == '__main__':
    multiprocessing.set_start_method('spawn')

# This file has to be put directly in folder "main/"
#   all other dependent packages need to be in sub folders of "main/"
if os.name != 'nt':
    sys.path.append(os.getcwd())

    

In [3]:

import gpbasics.global_parameters as global_param

global_param.init(tf_parallel=os.cpu_count())

import gpbasics.Statistics.CovarianceMatrix as cov
import gpbasics.DataHandling.DatasetHandler as dsh
import gpbasics.DataHandling.DataInput as di
import gpmretrieval.Experiments.Experiment as exp
import gpmretrieval.AutomaticGpmRetrieval as agr
import gpbasics.KernelBasics.BaseKernels as bk
import gpbasics.MeanFunctionBasics.BaseMeanFunctions as bmf
import gpbasics.Metrics.Metrics as met
import gpmretrieval.KernelExpansionStrategies.KernelExpansionStrategy as kexp
import gpbasics.Optimizer.Fitter as f
import gpbasics.Metrics.MatrixHandlingTypes as mht
import gpmretrieval.autogpmr_parameters as auto_gpm_param
import tensorflow as tf
import numpy as np
import logging
from PIC import pic
from sklearn.cluster import KMeans, DBSCAN, AgglomerativeClustering
import random

INFO: Process-982:Initialization of global parameters finished.


In [4]:
#--------- Config section ----------

global_param.p_max_threads = os.cpu_count()
global_param.p_used_base_kernel = [bk.PeriodicKernel,
                                   bk.SquaredExponentialKernel,
                                   bk.LinearKernel]

global_param.p_used_base_mean_functions = [bmf.ConstantMeanFunction, bmf.LinearMeanFunction]

global_param.p_default_hierarchical_kernel_expansion = \
    kexp.KernelExpansionStrategyType.BasicHierarchical

global_param.p_gradient_fitter = f.VariationalSgdFitter

auto_gpm_param.p_model_selection_with_test_data = True

global_param.p_dtype = tf.float64

global_param.p_cov_matrix_jitter = tf.constant(1e-8, dtype=global_param.p_dtype)


dataset_name = "data/dd_test_basic_anomaly0.csv"
segment_length = 5
number_of_clusters = 2
method = "KLD" # cov, likelihood, MSE, KLD
normalization = False


In [5]:
# check length of dataset
dataset_pandas = pd.read_csv(dataset_name)
dataset_length = len(dataset_pandas)

algorithms = [
    # agr.AlgorithmType.CKS,
    # agr.AlgorithmType.ABCD,
     agr.AlgorithmType.SKC,
    # agr.AlgorithmType.SKS,  # 3CS
    # agr.AlgorithmType.IKS, # LARGe
    # agr.AlgorithmType.TopDown_HKS # LGI
]
options = {"global_max_depth": 1, "local_max_depth": 3}

In [6]:

# prepare data
dataset = dsh.GeneralDatasetHandler(dataset_name,
                                    y_col_name='Y',
                                    x_col_name="X")
#dataset = dsh.KDDHandler(2)
datasets = list()
a, b, c, d = dataset.get_splitted_data()
for i in range(int(dataset_length/segment_length)):
    data_input_format = di.DataInput(a[i*segment_length:(i+1)*segment_length],
                                     b[i*segment_length:(i+1)*segment_length],
                                     c[i*segment_length:(i+1)*segment_length],
                                     d[i*segment_length:(i+1)*segment_length])
    datasets.append(data_input_format)

In [7]:
# perform kernel search on segments
list_of_kernels = []
list_of_noises = []
for i in range(int(dataset_length/segment_length)):

    exps = exp.execute_retrieval_experiments_set(
        datasets[i], algorithms=algorithms, mean_function_search=False, options=options, illustrate=False,
        model_selection_metric=met.MetricType.LL, local_approx=mht.MatrixApproximations.NONE,
        numerical_matrix_handling=mht.NumericalMatrixHandlingType.CHOLESKY_BASED, optimize_metric=met.MetricType.LL,
        random_restart=10) #to optimize

    list_of_kernels.append(exps[1]["best_gp"].covariance_matrix.kernel)
    list_of_noises.append(exps[1]["best_gp"].covariance_matrix.kernel.get_noise())

INFO: Using predefined data input.
INFO: # Starting Experiment Set #1.1. 
Having meta: {'date': '06/11/2021, 09:03:32', 'max available_threads': 4, 'options': {'global_max_depth': 1, 'local_max_depth': 3, 'default_window_size': 500, 'npo': 1, 'partitions_split_per_layer': 10}, 'local_approx': 'MatrixApproximations.NONE', 'numerical_matrix_handling': 'NumericalMatrixHandlingType.CHOLESKY_BASED', 'approx_subset_size': 'None', 'used_base_kernel': "[<class 'gpbasics.KernelBasics.BaseKernels.PeriodicKernel'>, <class 'gpbasics.KernelBasics.BaseKernels.SquaredExponentialKernel'>, <class 'gpbasics.KernelBasics.BaseKernels.LinearKernel'>]", 'nystroem_ratio: ': 0.1, 'jitter': 1e-08, 'dataset': {'name': None, 'size': 5, 'Comment': 'Predefined Data Input'}, 'metrics': {'optimize_for': 'LL', 'select_by': 'LL'}, 'tensorflow_config': {'inter_op_parallel': 4, 'intra_op_parallel': 4}, 'optimizer_config': {'gradient_fitter': "<class 'gpbasics.Optimizer.Fitter.VariationalSgdFitter'>", 'non-gradient_fitte

KeyboardInterrupt: 