# Imports

In [None]:
import torch, glob, json
import numpy as np

from sklearn.cluster import KMeans
from sklearn.neighbors import NearestNeighbors

# Functions

In [None]:
def unroll(list_):
    return [i for item in list_ for i in item]
    
def splitSpace(x: np.ndarray, center = None, n_partitions: int = 4, min_samples: int = 4, ):
    
    ''' Function that recursively splits the space
    
    Inputs:
        x: the input features [len x dim]
        center: the center from the previous iteration
        n_partitions: the space will be divided into n partitions at each iteration
        min_samples: the minimum number of samples in a partition
        
    Output:
        [list]: a (k x m) nested list of cluster centers, with k clusters and m dimensions
    '''
    if center is None:
        center = np.zeros((x.shape[1]))
    
    if (len(x) >= min_samples):
        if (np.sum((x - center)**2, 1).max() > 2.25e-4):
            kmeans = KMeans(n_clusters=n_partitions).fit(x)
            centers = unroll([splitSpace(x[kmeans.labels_ == i],
                              kmeans.cluster_centers_[i], n_partitions) for i in range(n_partitions)])
            return centers
        else:
            return [center]
    else:
        return []

# Code
### Normalize features

In [None]:
feat_structure = {
    'ORIENTATION': {'norm': [179.], 'active': True}, 
    'GL_AFTER': {'norm': np.array([12.]), 'active': True},
    'LBP_4': {'norm': np.array(12*[1.]), 'active': True}, 
    'HOG_25': {'norm': 12*[484/4], 'active': True},
    'SKEL_RATIO': {'norm': [1.], 'active': True},
    'NCOMP': {'norm': [21.], 'active': True},
}

weights = np.array([0.05, 0.15, 0.1, 1.0, 0.25, 0.25])

normalization = []
for i, key in enumerate(feat_structure.keys()):
    normalization.append(feat_structure[key]['norm']/weights[i])
normalization = np.array(unroll(normalization))
ndim = len(normalization)

### Load mapels images

In [None]:
paths = glob.glob('mapels/*.npy')
len(paths)

### Load features

In [None]:
image_features = torch.Tensor([np.load(path, allow_pickle=True) for path in paths])/normalization
n = image_features.shape[0]

### Iteratively splits the space, compute mapotypes centers

In [None]:
centers = splitSpace(np.unique(image_features.view(-1, ndim).numpy(), axis=0), None, 2)
centers = torch.Tensor(centers)
centers.shape

### Attribute mapels to mapotypes

In [None]:
neigh = NearestNeighbors(n_neighbors=1)
neigh.fit(centers)

cluster_n = np.array([neigh.kneighbors(feat, 1, return_distance=False) for feat in image_features])

### Save output

In [None]:
with open('/Volumes/Data_archives/Cartes/PhD/visual_analytical/cluster_attribution_2.npy', 'wb') as f:
    np.save(f, cluster_n)
with open('/Volumes/Data_archives/Cartes/PhD/visual_analytical/cluster_centers_2.npy', 'wb') as f:
    np.save(f, centers.numpy())