In [None]:
import os
import random
import numpy as np

import torch
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score,davies_bouldin_score
from sklearn.preprocessing import OneHotEncoder

os.chdir("../") #Load from parent directory
from data_utils import load_datasets
from models import select_encoder

In [None]:
def clusters(device,window_size,n_cross_val,data_type,model_type,
             encoding_size,encoder_type,suffix,addon=0):
    datasets = data_type
    if data_type =='afdb':
        n_classes  = 4
    elif data_type =='ims':
        n_classes  = 5
    elif data_type =='urban':
        n_classes  = 10 
    
    s_score = []
    db_score = []
    
    for cv in range(n_cross_val):
        train_data,train_labels,x_test,y_test = load_datasets(data_type,datasets,cv)
        input_size = [x.shape for x in x_test][0][0]
        T = x_test.shape[-1]
        x_chopped_test = np.split(x_test[:, :, :window_size * (T // window_size)], (T // window_size), -1)
        y_chopped_test = np.concatenate(np.split(y_test[:, :window_size * (T // window_size)], (T // window_size), -1),
                                           0).astype(int)
        x_chopped_test = torch.Tensor(np.concatenate(x_chopped_test, 0))
        y_chopped_test = torch.Tensor(np.array([np.bincount(yy).argmax() for yy in y_chopped_test]))

        testset = torch.utils.data.TensorDataset(x_chopped_test, y_chopped_test)
        loader = torch.utils.data.DataLoader(testset, batch_size=100)
        
        checkpoint = torch.load('./results/baselines/%s_%s/%s/encoding_%d_encoder_%d_checkpoint_%d%s.pth.tar' 
                            %(datasets,model_type,data_type,encoding_size,encoder_type, cv+addon,suffix))
        
        encoder,_ = select_encoder(device,encoder_type,input_size,encoding_size)
        
        encoder = encoder.to(device)
        encoder.load_state_dict(checkpoint['encoder_state_dict'])
        encoder.eval()
        encodings = []
        for windows, _ in loader:
            windows = windows.to(device)
            encoding = encoder(windows).detach().cpu().numpy()
            encodings.append(encoding)
        encodings = np.concatenate(encodings, 0)
        kmeans = KMeans(n_clusters=n_classes, random_state=1).fit(encodings)
        cluster_labels = kmeans.labels_
        print(silhouette_score(encodings, cluster_labels),davies_bouldin_score(encodings, cluster_labels))
        s_score.append(silhouette_score(encodings, cluster_labels))
        db_score.append(davies_bouldin_score(encodings, cluster_labels))
        del encodings
        
    print('Silhouette score: ', np.mean(s_score),'+-', np.std(s_score))
    print('Davies Bouldin score: ', np.mean(db_score),'+-', np.std(db_score))
    
    return

In [None]:
args ={'n_cross_val' : 10,
       'data_type' : 'urban',  #options: afdb, ims, urban
       'model_type' : 'tloss', #options: sup, cpc, tloss, tnc, simclr
       'encoder_type' : 1,
       'window_size' : 2500,
       'encoding_size' : 128,
       'suffix' : '',
       'device' : 'cuda'}

clusters(**args)