In [20]:
import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
import scipy.stats as sts
from scipy.spatial.distance import pdist
from scipy.spatial.distance import squareform

In [5]:
def generation(size):
    N = int(size / 3)

    norm1 = sts.norm(100, 40)
    norm2 = sts.norm(200, 40)
    norm3 = sts.norm(300, 40)

    x = np.append(norm1.rvs(N).round(1), np.append(norm2.rvs(N).round(1), norm3.rvs(N).round(1), axis=0), axis=0).astype(int)
    y = np.append(norm1.rvs(N).round(1), np.append(norm2.rvs(N).round(1), norm3.rvs(N).round(1), axis=0), axis=0).astype(int)

    points = list(zip(x, y))
    points = np.unique(points, axis=0)

    return np.array(points)

points = generation(100)

In [6]:
indices = np.random.choice(points.shape[0], size=5, replace=False)
sample = points[indices]

In [8]:
sample

array([[296, 298],
       [ 98, 140],
       [ 81,  74],
       [138, 224],
       [125, 135]])

In [22]:
pairwise_distances(sample, metric='manhattan')

array([[  0., 356., 439., 232., 334.],
       [356.,   0.,  83., 124.,  32.],
       [439.,  83.,   0., 207., 105.],
       [232., 124., 207.,   0., 102.],
       [334.,  32., 105., 102.,   0.]])

In [23]:
squareform(pdist(sample, 'cityblock'))

array([[  0., 356., 439., 232., 334.],
       [356.,   0.,  83., 124.,  32.],
       [439.,  83.,   0., 207., 105.],
       [232., 124., 207.,   0., 102.],
       [334.,  32., 105., 102.,   0.]])

In [24]:
points = np.array([
    [ 57, 111],
    [303, 310],
    [205, 196],
    [332, 257],
    [180, 172]
])

In [26]:
dist = squareform(pdist(points, 'euclidean'))
dist

array([[  0.        , 316.41270518, 170.67220043, 311.35349685,
        137.29530218],
       [316.41270518,   0.        , 150.33296378,  60.41522987,
        184.85940604],
       [170.67220043, 150.33296378,   0.        , 140.89002804,
         34.6554469 ],
       [311.35349685,  60.41522987, 140.89002804,   0.        ,
        174.15223226],
       [137.29530218, 184.85940604,  34.6554469 , 174.15223226,
          0.        ]])

In [29]:
ultra_list = [34.655446902326915, 60.41523, 153.983751, 228.651367]

In [34]:
clusters = (((2, 4), 0), (1, 3))

In [35]:
from Hierarchy.ToCulcMethods.UltrametricMatrix import ultramatrix

ultra = ultramatrix(clusters, ultra_list)

In [36]:
ultra

array([[  0.       , 228.651367 , 153.983751 , 228.651367 , 153.983751 ],
       [228.651367 ,   0.       , 228.651367 ,  34.6554469, 228.651367 ],
       [153.983751 , 228.651367 ,   0.       , 228.651367 ,  60.41523  ],
       [228.651367 ,  34.6554469, 228.651367 ,   0.       , 228.651367 ],
       [153.983751 , 228.651367 ,  60.41523  , 228.651367 ,   0.       ]])

In [75]:
# np.max(np.abs())
mtx = (np.abs(dist - ultra) / dist)
mtx[np.isnan(mtx)] = 0
np.max(mtx)

  mtx = (np.abs(dist - ultra) / dist)


0.7433112367667509

In [59]:
start_matrix = squareform(pdist(points, 'euclidean'))
ultra_dists = ultra_list
finish_matrix = ultramatrix(clusters, ultra_dists)

max_abs = np.max(np.abs(start_matrix - finish_matrix))

In [60]:
max_abs

87.76133896394629

In [57]:
from Hierarchy.PipelineFunctions.MainWorkers import pipe
from Hierarchy.MinMaxMethod import MinMaxHierarchy

In [58]:
pipe(points, MinMaxHierarchy, 'euclidean')

(87.76133896394629,
 103.94615404686581,
 [34.655446902326915, 60.41523, 153.983751, 228.651367])

In [102]:
import datetime

# NewRunExperiment(size, func, sample_size, n_iter, FUNCOFMETHODS, dist_metric, comment)
sample_size = 5
n_iter = 1000
n_iter_format = str(n_iter) if n_iter < 1000 else f"{n_iter / 1000}k"
dist_metric = 'euclidean'
comment = '1_dim'

f"{comment}-{dist_metric}-{sample_size}-{n_iter_format} {str(datetime.datetime.today().replace(microsecond=0))}"

'1_dim-euclidean-5-1.0k 2023-04-19 11:25:24'