# One class SVM

In [12]:
%load_ext autoreload
%autoreload 2
import warnings
warnings.filterwarnings('ignore')

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [19]:
import datetime
from datetime import timezone
import pickle
from sklearn.svm import OneClassSVM
from scipy.sparse import vstack

try:
    import sys
    sys.path.append("../dataset/")
    from dataset_producer import DatasetProducer, DatasetEvaluation, DATASETS, get_log_pattern_key, get_log_timestamp
    iwiget = True
except:
    from dataset.dataset_producer import DatasetProducer, DatasetEvaluation, DATASETS, get_log_pattern_key, get_log_timestamp
    iwiget = False

In [27]:
SUBMIT_LIMIT = datetime.timedelta(minutes=1)

def basic_oc_svm_test(dataset_name=DATASETS.DATASET_TEST, window_range=12):
    models = {}
    print("_______________________________________________________________________________________")
    dataset = DatasetProducer(dataset_name, window=window_range)

    print("Learning")
    learning_matrix = None
    learning_matrix_norm = None
    for _, features in dataset.emulate_log_count_matrix(iwiget=iwiget, training=True):
        if learning_matrix is not None:
            learning_matrix = vstack((learning_matrix, features))
            learning_matrix_norm = vstack((learning_matrix, features/features.max()))
        else:
            learning_matrix = features
            learning_matrix_norm = features/features.max()
            
    for fsize in range(window_range, 2, -1):
        print("Fitting models for fsize {}".format(fsize))   
        trainig_features = learning_matrix[:,-fsize:]
        trainig_features_norm = learning_matrix_norm[:,-fsize:]     
        oc_svm_rbf = (OneClassSVM(), dataset.get_evaluation(), datetime.datetime(1970, 1, 1, tzinfo=timezone.utc))
        oc_svm_rbf_norm = (OneClassSVM(), dataset.get_evaluation(), datetime.datetime(1970, 1, 1, tzinfo=timezone.utc))
        oc_svm_poly = (OneClassSVM(kernel="poly"), dataset.get_evaluation(), datetime.datetime(1970, 1, 1, tzinfo=timezone.utc))
        oc_svm_poly_norm = (OneClassSVM(kernel="poly"), dataset.get_evaluation(), datetime.datetime(1970, 1, 1, tzinfo=timezone.utc))
        oc_svm_rbf[0].fit(trainig_features)
        oc_svm_poly[0].fit(trainig_features)
        oc_svm_rbf_norm[0].fit(trainig_features_norm)
        oc_svm_poly_norm[0].fit(trainig_features_norm)
        models[fsize] = [oc_svm_rbf, oc_svm_poly, oc_svm_rbf_norm, oc_svm_poly_norm]

    print("Execution")
    for ts, features in dataset.emulate_log_count_matrix(iwiget=iwiget):
        feature_normalized = features/features.max()
        for fsize in range(window_range, 2, -1):
            f = [features[:,-fsize:], features[:,-fsize:], feature_normalized[:,-fsize:], feature_normalized[:,-fsize:]]
            for oc_svm, f in zip(models[fsize], f):
                if oc_svm[0].predict(f) and ts - oc_svm[2] > SUBMIT_LIMIT:
                    dataset.submit(ts, oc_svm[1])
                    
    print("_______Results_______")
    for fsize in range(window_range, 2, -1):
        labels = ["OC SVM RBF", "OC SVM Poly", "OC SVM RBF Norm", "OC SVM Poly Norm"]
        for oc_svm, label in zip(models[fsize], labels):
            print(label)
            oc_svm[1].evaluate()
    return models

In [28]:
oc_svm_test_dataset = basic_oc_svm_test(dataset_name=DATASETS.DATASET_TEST)
pickle.dump( oc_svm_test_dataset, open( "dumps/oc_svm_test_dataset.p", "wb" ) )

_______________________________________________________________________________________
Learning


IntProgress(value=0, description='Files:', max=2)

IntProgress(value=0, description='Segments:', max=10)

Done.
Fitting models for fsize 12
Fitting models for fsize 11
Fitting models for fsize 10
Fitting models for fsize 9
Fitting models for fsize 8
Fitting models for fsize 7
Fitting models for fsize 6
Fitting models for fsize 5
Fitting models for fsize 4
Fitting models for fsize 3
Execution


IntProgress(value=0, description='Files:', max=2)

IntProgress(value=0, description='Segments:', max=10)

Done.
_______Results_______
OC SVM RBF
_____________________________________________________
Problems detected: 1 of 1
FP count: 0
Maintenance hits: 0
Total score: 0
_____________________________________________________
OC SVM Poly
_____________________________________________________
Problems detected: 1 of 1
FP count: 0
Maintenance hits: 0
Total score: 0
_____________________________________________________
OC SVM RBF Norm
_____________________________________________________
Problems detected: 1 of 1
FP count: 0
Maintenance hits: 0
Total score: 0
_____________________________________________________
OC SVM Poly Norm
_____________________________________________________
Problems detected: 1 of 1
FP count: 0
Maintenance hits: 0
Total score: 0
_____________________________________________________
OC SVM RBF
_____________________________________________________
Problems detected: 1 of 1
FP count: 0
Maintenance hits: 0
Total score: 0
_____________________________________________________


In [None]:
basic_oc_svm_dataset_1 = basic_oc_svm_test(dataset_name=DATASETS.DATASET_1)
pickle.dump( basic_oc_svm_dataset_1, open( "dumps/basic_oc_svm_dataset_1.p", "wb" ) )

_______________________________________________________________________________________
Learning


IntProgress(value=0, description='Files:', max=97)

IntProgress(value=0, description='Segments:', max=10)

Done.
Fitting models for fsize 12
Fitting models for fsize 11


In [None]:
basic_oc_svm_dataset_2 = basic_oc_svm_test(dataset_name=DATASETS.DATASET_2)
pickle.dump( basic_oc_svm_dataset_2, open( "dumps/basic_oc_svm_dataset_2.p", "wb" ) )