**1. Read dataset**

In [1]:
from sklearn.datasets import fetch_20newsgroups
from nltk import word_tokenize
from nltk.stem.snowball import SnowballStemmer
import re
from nltk.corpus import stopwords
from collections import Counter

import gensim
import warnings
from sklearn import manifold
from sklearn import metrics

from gensim.models import Word2Vec
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
import numpy as np
import pandas as pd
import random
import string

import faiss

from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.utils.data import DataLoader

import sys 
sys.path.append("..") 
sys.path.append("../..") 

import pcl.loader
import pcl.builder2

from copy import deepcopy
from sklearn.model_selection import train_test_split

In [2]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

In [3]:
seed = 1

def setup_seed(seed=seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [4]:
f = open('output.txt', 'a')
f.write('seed: ' + str(seed) + '\n')
f.close()

In [5]:
cats = [ 'talk.politics.mideast',
 'talk.politics.misc',
 'talk.religion.misc',
 'misc.forsale',
 'rec.sport.baseball',
 'sci.med',
 'soc.religion.christian',
 'rec.autos']

newsgroups_train = fetch_20newsgroups(subset='train', categories=cats)
newsgroups_test = fetch_20newsgroups(subset='test', categories=cats)

In [6]:
cachedStopWords = stopwords.words("english")
 
def tokenize(text):
    min_length = 3
    words = map(lambda word: word.lower(), word_tokenize(text));
    words = [word for word in words if word not in cachedStopWords]
    tokens =(list(map(lambda token: SnowballStemmer("english", ignore_stopwords=True).stem(token), words)));
    p = re.compile('[a-zA-Z]+');
    filtered_tokens = list(filter(lambda token: p.match(token) and len(token)>=min_length, tokens));
    return filtered_tokens

In [7]:
setup_seed()

def sequence_list(docs):
    corups = []
    for doc in docs:
        corups.append(tokenize(doc))
    return corups

train_corups = sequence_list(newsgroups_train['data'])
train_labels = np.where(newsgroups_train['target']<3, 0, 1)

test_corups = sequence_list(newsgroups_test['data'])
test_labels = np.where(newsgroups_test['target']<3, 0, 1)

train_dataset = pd.DataFrame({'EventId': train_corups,
                         'Label': train_labels.tolist(),
                         'Type': newsgroups_train['target'].tolist()})
test_dataset = pd.DataFrame({'EventId': test_corups,
                        'Label': test_labels.tolist(),
                        'Type': newsgroups_test['target'].tolist()})

train_normal_ds = train_dataset[train_dataset['Label']==0].sample(n=1100, replace=False)
train_abnormal_ds = train_dataset[train_dataset['Label']==1].sample(n=220, replace=False)

val_normal_ds = train_normal_ds.sample(n=100, replace=False)
val_abnormal_ds = train_abnormal_ds.sample(n=20, replace=False)
val_ds = pd.concat([val_normal_ds, val_abnormal_ds])

train_normal_ds = train_normal_ds.drop(val_normal_ds.index)
train_abnormal_ds = train_abnormal_ds.drop(val_abnormal_ds.index)
train_ds = pd.concat([train_normal_ds, train_abnormal_ds])

test_normal_ds = test_dataset[test_dataset['Label']==0].sample(n=1000, replace=False)
test_abnormal_ds = test_dataset[test_dataset['Label']==1].sample(n=200, replace=False)
test_ds =pd.concat([test_normal_ds, test_abnormal_ds])

**2. Preprocessing**

In [8]:
word2vec = Word2Vec(sentences=train_corups, vector_size=100, window=5, min_count=1, workers=4)

word2vec.wv.vectors = np.append(word2vec.wv.vectors, np.zeros((1, 100), dtype=np.float32), axis=0)
word2vec.wv.vectors = np.append(word2vec.wv.vectors, np.random.randn(1, 100).astype(np.float32), axis=0)

In [9]:
def encode_sequence(sequence, logkey2index):
    return [logkey2index.get(logkey, len(word2vec.wv.key_to_index)+1) for logkey in sequence]

train_ds.loc[:, 'Encoded'] = train_ds.loc[:, 'EventId'].apply(lambda x: encode_sequence(x, word2vec.wv.key_to_index))
val_ds.loc[:, 'Encoded'] = val_ds.loc[:, 'EventId'].apply(lambda x: encode_sequence(x, word2vec.wv.key_to_index))
test_ds.loc[:, 'Encoded'] = test_ds.loc[:, 'EventId'].apply(lambda x: encode_sequence(x, word2vec.wv.key_to_index))

In [10]:
setup_seed()

with warnings.catch_warnings():
    warnings.simplefilter('ignore')

    def compose_pairs(dataset):
        number_seq = dataset.shape[0]
        
        data_aug_list = []
        
        for i in range(number_seq):
            sequence = dataset.iloc[i, 3]
            a, b = np.random.choice(len(sequence), 2)
            
            seq_aug1 = deepcopy(sequence)
            seq_aug1.pop(a)
            seq_aug2 = deepcopy(sequence)
            seq_aug2.pop(b)
            data_aug_list.append([seq_aug1, seq_aug2])

        dataset['Pair0'] = pd.Series([x[0] for x in data_aug_list]).values
        dataset['Pair1'] = pd.Series([x[1] for x in data_aug_list]).values
        
        return dataset

train_ds2 = compose_pairs(train_ds)
val_ds2 = compose_pairs(val_ds)
test_ds2 = compose_pairs(test_ds)

train_ds2['Index'] = range(train_ds2.shape[0])
val_ds2['Index'] = range(val_ds2.shape[0])
test_ds2['Index'] = range(test_ds2.shape[0])

In [11]:
batch_size_train = 50
batch_size_val = 20
batch_size_test = 1200

In [12]:
def collate_fn(data_list):
    data_list.sort(key=lambda x: len(x[0]), reverse=True)
    sequence = [torch.tensor(x[0]) for x in data_list]
    label = [x[1] for x in data_list]
    sequence_length = [len(x[0]) for x in data_list]
    index = [x[2] for x in data_list]
    
    seq_aug1 = [torch.tensor(x[3]) for x in data_list]
    seq_aug2 = [torch.tensor(x[4]) for x in data_list]
    
    type = [x[5] for x in data_list]

    padded_sequence = pad_sequence(sequence, batch_first=True, padding_value=len(word2vec.wv.key_to_index))
    padded_seq_aug1 = pad_sequence(seq_aug1, batch_first=True, padding_value=len(word2vec.wv.key_to_index))
    padded_seq_aug2 = pad_sequence(seq_aug2, batch_first=True, padding_value=len(word2vec.wv.key_to_index))
    return index, padded_sequence, label, sequence_length, padded_seq_aug1, padded_seq_aug2, type

In [13]:
train_data_list = train_ds[['Encoded', 'Label', 'Index', 'Pair0', 'Pair1', 'Type']].values.tolist()
val_data_list = val_ds[['Encoded', 'Label', 'Index', 'Pair0', 'Pair1', 'Type']].values.tolist()
test_data_list = test_ds[['Encoded', 'Label', 'Index', 'Pair0', 'Pair1', 'Type']].values.tolist()

train_loader = DataLoader(train_data_list, batch_size=batch_size_train, collate_fn=collate_fn, drop_last=True)
val_loader = DataLoader(val_data_list, batch_size=batch_size_train, collate_fn=collate_fn, drop_last=True)
test_loader = DataLoader(test_data_list, batch_size=batch_size_test, collate_fn=collate_fn, drop_last=True)

**3. Model**

In [14]:
vocab_size = len(word2vec.wv.key_to_index)
embedding_dim = 100
hidden_dim = 256
num_layers = 2
epochs = 200
num_cluster_n = ['3']
num_cluster_a = ['7']

f = open('output.txt', 'a')
f.write('Abnormal clusters: ' + num_cluster_a[0] + '\n')
f.close()

num_cluster = [str(int(num_cluster_n[i]) + int(num_cluster_a[i])) for i in range(len(num_cluster_n))]

In [15]:
class SequenceEncoder(nn.Module):
    def __init__(self, vocab_size=vocab_size, embedding_dim=embedding_dim, 
                 hidden_dim=hidden_dim, num_layers=num_layers, num_classes=2):
        super().__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.embeddings = nn.Embedding.from_pretrained(torch.from_numpy(word2vec.wv.vectors))
        self.embeddings.requires_grad = True
        self.lstm = nn.LSTM(input_size=embedding_dim,
                            hidden_size=hidden_dim,
                            num_layers=num_layers,
                            batch_first=True,
                            bias=True)

    def forward(self, seq):
        seq_length = [len(x)-x.count(len(word2vec.wv.key_to_index)) for x in seq.tolist()]
        embedded = self.embeddings(seq)
        packed_embedded = pack_padded_sequence(embedded, seq_length, batch_first=True,enforce_sorted=False)
        packed_out, (hidden, cell) = self.lstm(packed_embedded)
        out, len_list = pad_packed_sequence(packed_out, batch_first=True) 
        mean_hidden = torch.sum(out, dim=1) / len_list.view(-1, 1).to(device)
        return nn.functional.normalize(mean_hidden, dim=1)

In [16]:
model = pcl.builder2.MoCo(SequenceEncoder,hidden_dim, batch_size_train, 0.999, 0.05).to(device)
criterion = nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adagrad(model.parameters(), 0.005, weight_decay=1e-6)

**4. Training**

In [17]:
def compute_features(train_ds, model):
    model.eval()
    seq_label = [len(x)-x.count(len(word2vec.wv.key_to_index)) for x in train_ds.Encoded.tolist()]
    encoded_sequences = [torch.tensor(x) for x in train_ds.Encoded.tolist()]
    padded_sequence = pad_sequence(encoded_sequences, batch_first=True, 
                                   padding_value=len(word2vec.wv.key_to_index)).to(device)
    features = model(padded_sequence, is_eval=True, sequence_label=seq_label)   
    return features.cpu().numpy()

In [18]:
def run_kmeans(x, num_cluster, temperature):
    """
    Args:
        x: data to be clustered
    """

    results = {'im2cluster': [], 'centroids': [], 'density': []}

    # intialize faiss clustering parameters
    d = x.shape[1]
    k = int(num_cluster[0])
    clus = faiss.Clustering(d, k)
    clus.verbose = True
    clus.niter = 10
    clus.nredo = 5
    clus.seed = seed
    clus.max_points_per_centroid = 1000
    clus.min_points_per_centroid = 10

    res = faiss.StandardGpuResources()
    cfg = faiss.GpuIndexFlatConfig()
    cfg.useFloat16 = False
    cfg.device = 0
    index = faiss.GpuIndexFlatL2(res, d, cfg)

    clus.train(x, index)

    D, I = index.search(x, 3)  # for each sample, find cluster distance and assignments
    im2cluster = [int(n[0]) for n in I]
    # get cluster centroids
    centroids = faiss.vector_to_array(clus.centroids).reshape(k, d)

    # sample-to-centroid distances for each cluster 
    Dcluster = [[] for c in range(k)]
    for im, i in enumerate(im2cluster):
        Dcluster[i].append(D[im][0])

    # concentration estimation (phi)        
    density = np.zeros(k)
    for i, dist in enumerate(Dcluster):
        if len(dist) > 1:
            d = (np.asarray(dist) ** 0.5).mean() / np.log(len(dist) + 10)
            density[i] = d

            # if cluster only has one point, use the max to estimate its concentration
    dmax = density.max()
    for i, dist in enumerate(Dcluster):
        if len(dist) <= 1:
            density[i] = dmax

    density = density.clip(np.percentile(density, 10),
                           np.percentile(density, 90))  # clamp extreme values for stability
    density = temperature * density / density.mean()  # scale the mean to temperature

    # convert to cuda Tensors for broadcast
    centroids = torch.Tensor(centroids).cuda()
    centroids = nn.functional.normalize(centroids, p=2, dim=1)

    im2cluster = torch.LongTensor(im2cluster).cuda()
    density = torch.Tensor(density).cuda()

    results['centroids'] = centroids
    results['density'] = density
    results['im2cluster'] = im2cluster

    return results

In [19]:
def train_model(train_loader, model, criterion, optimizer, epoch, cluster_result=None):
    model.train()
    
    acc_epoch = []
    loss_epoch = []
    
    for index, sequence, label, _, sequence0, sequence1, _ in train_loader:
        pair0 = sequence0.to(device)
        pair1 = sequence1.to(device)
        label = torch.tensor(label).to(device)

        output, target, output_proto, target_proto = model(im_q=pair0, im_k=pair1, cluster_result=cluster_result, 
                                                           index = index, sequence_label=label)
        info_loss = criterion(output, target)
        
        # ProtoNCE loss
        if output_proto is not None:
            loss_proto = 0
            for proto_out,proto_target in zip(output_proto, target_proto):
                loss_proto += criterion(proto_out, proto_target)

            # average loss across all sets of prototypes
            loss_proto /= len(num_cluster)

        loss = loss_proto + loss_proto
#         print(loss_proto.item(), info_loss.item())

        loss_epoch.append(loss.item())

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    return np.mean(loss_epoch)

In [20]:
warnings.filterwarnings('ignore')

setup_seed()

best_val_acc_sequence = -1000
best_val_model = None

for i in range(epochs): 
    train_normal = train_ds[train_ds['Label']==0]
    train_abnormal = train_ds[train_ds['Label']==1]

    features_n = compute_features(train_normal, model)
    cluster_result_n = run_kmeans(features_n, num_cluster_n, 0.05)

    features_a = compute_features(train_abnormal, model)
    cluster_result_a = run_kmeans(features_a, num_cluster_a, 0.05)


    cluster_result_a['im2cluster'] = cluster_result_a['im2cluster'] + int(num_cluster_n[0])

    keys = ['im2cluster', 'centroids', 'density']
    cluster_result = {'im2cluster': [[], [], []], 'centroids': [[], [], []], 'density': [[], [], []]}

    for key in keys:
        cluster_result[key] = torch.cat((cluster_result_n[key], cluster_result_a[key]), 0)
    
    epoch_loss = train_model(train_loader, model, criterion, optimizer, i, cluster_result)  
    
    model.eval()
    
    correct_sequence = 0

    faiss_search = faiss.IndexFlatL2(cluster_result['centroids'].size(1))
    faiss_search.add(cluster_result['centroids'].detach().cpu().numpy())
    
#     with torch.no_grad():
#         for index, sequence, sequence_label, length, _, _ in val_loader:
#             true_label = []
#             pred_label = []
#             sequence = sequence.to(device)

#             true_label += sequence_label
#             sq = model.encoder_k(sequence).detach().cpu().numpy()
#             D, I = faiss_search.search(sq, 1)
#             pred_label += [1 if pred>2 else 0 for pred in I]

#             acc_sequence = (torch.tensor(true_label)==torch.tensor(pred_label)).sum().item()
#             correct_sequence += acc_sequence
            
#     if correct_sequence > best_val_acc_sequence:
#         best_val_acc_sequence = correct_sequence
#         best_val_model = deepcopy(model.state_dict())
       
    print(f'Epoch {i}: {epoch_loss}')

Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 0 (0.00 s, search 0.00 s): objective=26.7476 imbalance=1.250 nsplit=0         Iteration 1 (0.00 s, search 0.00 s): objective=16.3053 imbalance=1.099 nsplit=0         Iteration 2 (0.00 s, search 0.00 s): objective=15.167 imbalance=1.065 nsplit=0         Iteration 3 (0.00 s, search 0.00 s): objective=14.7547 imbalance=1.048 nsplit=0         Iteration 4 (0.00 s, search 0.00 s): objective=14.5505 imbalance=1.045 nsplit=0         Iteration 5 (0.00 s, search 0.00 s): objective=14.3821 imbalance=1.037 nsplit=0         Iteration 6 (0.00 s, search 0.00 s): objective=14.3104 imbalance=1.036 nsplit=0         Iteration 7 (0.00 s, search 0.00 s): objective=14.2714 imbalance=1.034 nsplit=0         Iteration 8 (0.00 s, search 0.00 s): objective=14.256 imbalance=1.028 nsplit=0         Iteration 9 (0.01 s, search 0.00 s): objective=14.2496 imbalance=1.025 nsplit

Epoch 0: 7.901399354139964
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=14.3165 imbalance=1.027 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=14.3113 imbalance=1.025 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=14.3784 imbalance=1.039 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=14.3118 imbalance=1.025 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=14.3325 imbalance=1.031 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=2.40972 imbalance=1.221 nsplit=0       
Objective improved: keep new clu

Epoch 6: 6.501336634159088
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=15.439 imbalance=1.031 nsplit=0        
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=15.4376 imbalance=1.030 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=15.515 imbalance=1.038 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=15.4188 imbalance=1.029 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=15.4452 imbalance=1.023 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=2.9831 imbalance=1.234 nsplit=0   

  Iteration 9 (0.01 s, search 0.01 s): objective=3.93998 imbalance=1.191 nsplit=0       
Epoch 12: 6.300112545490265
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=17.8734 imbalance=1.039 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=17.8581 imbalance=1.027 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=17.9723 imbalance=1.044 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=17.8483 imbalance=1.029 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=17.8637 imbalance=1.026 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration

  Iteration 9 (0.01 s, search 0.01 s): objective=5.34842 imbalance=1.140 nsplit=0       
Epoch 18: 6.237847526868184
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=21.5064 imbalance=1.037 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=21.4987 imbalance=1.025 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=21.595 imbalance=1.041 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=21.4973 imbalance=1.034 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=21.5118 imbalance=1.023 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration

  Iteration 9 (0.01 s, search 0.00 s): objective=6.64129 imbalance=1.057 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=6.70313 imbalance=1.132 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=7.35914 imbalance=1.132 nsplit=0       
Epoch 24: 6.118396798769633
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=25.6419 imbalance=1.033 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=25.646 imbalance=1.026 nsplit=0        
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=25.744 imbalance=1.039 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=25.6408 imbalance=1.032 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.03

Epoch 30: 6.107977469762166
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=30.3502 imbalance=1.030 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=30.3391 imbalance=1.029 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=30.4686 imbalance=1.034 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=30.3077 imbalance=1.024 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=30.3163 imbalance=1.022 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=8.86828 imbalance=1.087 nsplit=0 

Epoch 36: 6.021466692288716
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=35.4757 imbalance=1.022 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=35.4454 imbalance=1.023 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=35.6858 imbalance=1.031 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=35.4006 imbalance=1.022 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=35.4229 imbalance=1.023 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=11.0683 imbalance=1.096 nsplit=0 

  Iteration 9 (0.01 s, search 0.01 s): objective=13.4725 imbalance=1.126 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=15.4873 imbalance=1.185 nsplit=0       
Epoch 42: 5.935602784156799
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=40.3066 imbalance=1.020 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=40.2946 imbalance=1.022 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=40.5468 imbalance=1.028 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=40.2625 imbalance=1.023 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=40.2922 imbalance=1.021 nsplit=0       
Clu

Epoch 48: 5.863247474034627
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=45.1216 imbalance=1.019 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=45.1121 imbalance=1.021 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=45.3057 imbalance=1.026 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=45.0955 imbalance=1.021 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=45.1176 imbalance=1.023 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=15.3418 imbalance=1.088 nsplit=0 

  Iteration 9 (0.00 s, search 0.00 s): objective=17.322 imbalance=1.155 nsplit=0        
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=17.3255 imbalance=1.049 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=17.0905 imbalance=1.079 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=19.1343 imbalance=1.159 nsplit=0       
Epoch 54: 5.803442815939586
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=50.2176 imbalance=1.020 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=50.203 imbalance=1.019 nsplit=0        
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=50.3519 imbalance=1.030 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02

  Iteration 9 (0.01 s, search 0.00 s): objective=18.762 imbalance=1.024 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=18.9935 imbalance=1.099 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=19.5692 imbalance=1.119 nsplit=0       
Epoch 60: 5.76235302289327
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=55.9524 imbalance=1.028 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=55.9452 imbalance=1.025 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=56.0535 imbalance=1.030 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=55.9436 imbalance=1.025 nsplit=0       
Objective improved: keep new clusters
Oute

  Iteration 9 (0.00 s, search 0.00 s): objective=20.2832 imbalance=1.105 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=21.1933 imbalance=1.137 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=20.7408 imbalance=1.041 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=20.2804 imbalance=1.084 nsplit=0       
Objective improved: keep new clusters
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=21.1179 imbalance=1.119 nsplit=0       
Epoch 66: 5.804095983505249
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=62.0655 imbalance=1.033 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=62.0655 imbalance=1.033 nsplit=0       
Out

  Iteration 9 (0.01 s, search 0.01 s): objective=21.781 imbalance=1.081 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=24.8397 imbalance=1.098 nsplit=0       
Epoch 72: 5.672403117020925
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=69.0283 imbalance=1.040 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=69.0283 imbalance=1.040 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=69.2683 imbalance=1.042 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=69.0283 imbalance=1.040 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=69.0299 imbalance=1.038 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Prep

  Iteration 9 (0.01 s, search 0.01 s): objective=25.1094 imbalance=1.139 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=25.0369 imbalance=1.138 nsplit=0       
Epoch 78: 5.635642786820729
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=75.8833 imbalance=1.046 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=75.8833 imbalance=1.046 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=76.1545 imbalance=1.049 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=75.8833 imbalance=1.046 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=75.8888 imbalance=1.044 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Prep

  Iteration 9 (0.01 s, search 0.00 s): objective=25.3441 imbalance=1.063 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=25.1884 imbalance=1.079 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=25.29 imbalance=1.080 nsplit=0         
Epoch 84: 5.645992080370585
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=82.5752 imbalance=1.050 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=82.5752 imbalance=1.050 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=82.9177 imbalance=1.055 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=82.5752 imbalance=1.050 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=82.5781 

  Iteration 9 (0.01 s, search 0.00 s): objective=26.8092 imbalance=1.062 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=29.0268 imbalance=1.169 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=27.9086 imbalance=1.122 nsplit=0       
Epoch 90: 5.595576981703441
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=88.7546 imbalance=1.050 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=88.7546 imbalance=1.050 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=89.126 imbalance=1.060 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=88.7546 imbalance=1.050 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=88.7563 

  Iteration 9 (0.01 s, search 0.00 s): objective=28.134 imbalance=1.033 nsplit=0        
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=31.7345 imbalance=1.270 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=28.9392 imbalance=1.103 nsplit=0       
Epoch 96: 5.607881247997284
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=94.5397 imbalance=1.058 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=94.527 imbalance=1.055 nsplit=0        
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=95.139 imbalance=1.061 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=94.5397 imbalance=1.058 nsplit=0       
Out

  Iteration 9 (0.01 s, search 0.01 s): objective=33.5429 imbalance=1.280 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=30.041 imbalance=1.052 nsplit=0        
Epoch 102: 5.58198340733846
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=99.2826 imbalance=1.058 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=99.2826 imbalance=1.058 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=99.9205 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=99.2826 imbalance=1.058 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=99.2826 imbalance=1.058 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Prep

Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=31.8489 imbalance=1.060 nsplit=0       
Epoch 108: 5.5578887065251665
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=103.669 imbalance=1.059 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=103.669 imbalance=1.059 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=104.1 imbalance=1.062 nsplit=0         
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=103.669 imbalance=1.059 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=103.669 imbalance=1.059 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): object

  Iteration 9 (0.01 s, search 0.00 s): objective=32.0842 imbalance=1.051 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=35.3157 imbalance=1.124 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=33.0686 imbalance=1.054 nsplit=0       
Epoch 114: 5.534933984279633
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=107.335 imbalance=1.060 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=107.337 imbalance=1.059 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=107.552 imbalance=1.059 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=107.335 imbalance=1.060 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.0

  Iteration 9 (0.01 s, search 0.01 s): objective=34.172 imbalance=1.075 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=34.0377 imbalance=1.059 nsplit=0       
Epoch 120: 5.5191953380902605
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=110.656 imbalance=1.059 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=110.657 imbalance=1.058 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=110.82 imbalance=1.060 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=110.656 imbalance=1.059 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=110.657 imbalance=1.058 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Pr

  Iteration 9 (0.00 s, search 0.00 s): objective=35.6703 imbalance=1.077 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=34.7103 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=35.4244 imbalance=1.075 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=35.2574 imbalance=1.064 nsplit=0       
Epoch 126: 5.627827485402425
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=113.041 imbalance=1.059 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=113.041 imbalance=1.059 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=113.199 imbalance=1.058 nsplit=0       
Ou

  Iteration 9 (0.01 s, search 0.00 s): objective=35.7995 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=38.066 imbalance=1.101 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=41.4427 imbalance=1.186 nsplit=0       
Epoch 132: 5.515156070391337
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=115.307 imbalance=1.060 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=115.307 imbalance=1.060 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=115.476 imbalance=1.058 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=115.307 imbalance=1.060 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.0

  Iteration 9 (0.01 s, search 0.01 s): objective=39.3787 imbalance=1.102 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=42.2853 imbalance=1.178 nsplit=0       
Epoch 138: 5.4875683188438416
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=117.927 imbalance=1.060 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=117.927 imbalance=1.060 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=118.054 imbalance=1.060 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=117.927 imbalance=1.060 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=117.927 imbalance=1.060 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Pr

  Iteration 9 (0.01 s, search 0.01 s): objective=39.6378 imbalance=1.102 nsplit=0       
Epoch 144: 5.481233060359955
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=120.61 imbalance=1.060 nsplit=0        
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=120.61 imbalance=1.060 nsplit=0        
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=120.681 imbalance=1.060 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=120.61 imbalance=1.060 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=120.61 imbalance=1.060 nsplit=0        
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=41.7721 imbalance=1

  Iteration 9 (0.01 s, search 0.00 s): objective=38.7656 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=40.1686 imbalance=1.080 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=40.6793 imbalance=1.091 nsplit=0       
Epoch 150: 6.427207946777344
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=122.071 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=122.071 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=122.095 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=122.071 imbalance=1.061 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.0

  Iteration 9 (0.01 s, search 0.00 s): objective=38.7939 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=40.2891 imbalance=1.080 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=40.7808 imbalance=1.087 nsplit=0       
Epoch 156: 5.488386233647664
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=121.53 imbalance=1.061 nsplit=0        
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=121.53 imbalance=1.061 nsplit=0        
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=121.554 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=121.53 imbalance=1.061 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.0

  Iteration 9 (0.01 s, search 0.01 s): objective=40.7816 imbalance=1.080 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=41.3097 imbalance=1.087 nsplit=0       
Epoch 162: 5.480446418126424
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=122.228 imbalance=1.060 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=122.228 imbalance=1.060 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=122.235 imbalance=1.060 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=122.228 imbalance=1.060 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.03 s, search 0.02 s): objective=122.228 imbalance=1.060 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Pre

  Iteration 9 (0.00 s, search 0.00 s): objective=41.4679 imbalance=1.102 nsplit=0       
Objective improved: keep new clusters
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=39.5725 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=41.3773 imbalance=1.085 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=41.9244 imbalance=1.093 nsplit=0       
Epoch 168: 5.488581717014313
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=123.333 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=123.333 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=123.34 imbalance=1.061 nsplit=0        
Ou

Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=42.001 imbalance=1.085 nsplit=0        
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=42.6028 imbalance=1.098 nsplit=0       
Epoch 174: 5.473793864250183
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=124.656 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=124.656 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=124.658 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=124.656 imbalance=1.061 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=124.656 imbalance=1.061 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 time

  Iteration 9 (0.01 s, search 0.00 s): objective=40.5604 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=42.7071 imbalance=1.086 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=43.8654 imbalance=1.122 nsplit=0       
Epoch 180: 5.471028486887614
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=126.304 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=126.304 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=126.306 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=126.304 imbalance=1.061 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.0

Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=41.0778 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=43.7637 imbalance=1.079 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=44.5493 imbalance=1.137 nsplit=0       
Epoch 186: 5.468895634015401
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=128.083 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=128.083 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=128.085 imbalance=1.061 nsplit=0       
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=128.083 imbalance=1.061 nsplit=0       
Outer iteration 4 

Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=43.3025 imbalance=1.031 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=44.1038 imbalance=1.103 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=41.5933 imbalance=1.052 nsplit=0       
Objective improved: keep new clusters
Outer iteration 3 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=44.4753 imbalance=1.079 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=45.2123 imbalance=1.138 nsplit=0       
Epoch 192: 5.467070281505585
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=129.916 imbalance=1.061 nsplit=0       
Objective improved: keep new c

  Iteration 9 (0.01 s, search 0.01 s): objective=45.8813 imbalance=1.133 nsplit=0       
Epoch 198: 5.46546745300293
Clustering 1000 points in 256D to 3 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.01 s, search 0.00 s): objective=131.786 imbalance=1.061 nsplit=0       
Objective improved: keep new clusters
Outer iteration 1 / 5
  Iteration 9 (0.01 s, search 0.01 s): objective=131.786 imbalance=1.061 nsplit=0       
Outer iteration 2 / 5
  Iteration 9 (0.02 s, search 0.01 s): objective=132.29 imbalance=1.061 nsplit=0        
Outer iteration 3 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=131.786 imbalance=1.061 nsplit=0       
Outer iteration 4 / 5
  Iteration 9 (0.02 s, search 0.02 s): objective=131.786 imbalance=1.061 nsplit=0       
Clustering 200 points in 256D to 7 clusters, redo 5 times, 10 iterations
  Preprocessing in 0.00 s
Outer iteration 0 / 5
  Iteration 9 (0.00 s, search 0.00 s): objective=44.0565 imbalance=1.

In [21]:
# model.load_state_dict(best_val_model)
model.eval()

faiss_search = faiss.IndexFlatL2(cluster_result['centroids'].size(1))
faiss_search.add(cluster_result['centroids'].detach().cpu().numpy())

true_label = []
pred_label = []

true_type_list = []
pred_type_list = []

for index, sequence, sequence_label, length, _, _, type in test_loader:
    sequence = sequence.to(device)
    true_type_list += type
    
    true_label += sequence_label
    sq = model.encoder_k(sequence).detach().cpu().numpy()
    D, I = faiss_search.search(sq, 1)
    pred_type_list += [i[0] for i in I]
    pred_label += [1 if pred>int(num_cluster_n[0])-1 else 0 for pred in I]

In [22]:
print(metrics.classification_report(true_label, pred_label, digits=4))
print(metrics.confusion_matrix(true_label, pred_label))

fpr, tpr, thresholds = metrics.roc_curve(true_label, pred_label, pos_label=1)
print(metrics.auc(fpr, tpr))

f = open('output.txt', 'a')
f.write('Anomaly detection on testing sequences:'+'\n')
f.write(str(metrics.classification_report(true_label, pred_label, digits=4))+'\n')
f.write(str(metrics.confusion_matrix(true_label, pred_label))+'\n')
f.write(str(metrics.auc(fpr, tpr))+'\n')
f.close()

              precision    recall  f1-score   support

           0     0.9657    0.9290    0.9470      1000
           1     0.7017    0.8350    0.7626       200

    accuracy                         0.9133      1200
   macro avg     0.8337    0.8820    0.8548      1200
weighted avg     0.9217    0.9133    0.9163      1200

[[929  71]
 [ 33 167]]
0.882


In [23]:
print(metrics.rand_score(true_type_list, pred_type_list))
print(metrics.mutual_info_score(true_type_list, pred_type_list))

f = open('output.txt', 'a')
f.write('Clustering results:'+'\n')
f.write('rand_score: ' + str(metrics.rand_score(true_type_list, pred_type_list))+'\n')
f.write('mutual_info_score: ' + str(metrics.mutual_info_score(true_type_list, pred_type_list))+'\n')
f.write('-'*50 + '\n')
f.close()

0.7121809841534612
0.4826271390790803
