In [1]:
from scripts.ClusterTS_MDS import ClusterMDS
from numpy import loadtxt, float64

In [2]:
PTEX_dissim = loadtxt("./data/PTEX_DTW_matrix.dat", dtype= float64, delimiter = ",")

In [3]:
MDS_methods = ("SMACOF-euclidean-classic", "SMACOF-dissim-classic")
cluster_methods = ("K-Means", "GaussMix")
number_dims = tuple(range(2, 4))
number_clusters = tuple(range(2, 11))

TEC_ClusterSeries = ClusterMDS(dissimilarity = PTEX_dissim)

metrics_SMACOF_euclidean_classic_results = list()
metrics_SMACOF_dtw_classic_results = list()

In [4]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics.cluster import silhouette_score, calinski_harabasz_score, davies_bouldin_score

def GetClusteringMetrics(Xc, num_clusters, cluster_method = "K-Means"):
    if cluster_method == "K-Means":
        KMeans_Cluster_TS = KMeans(n_clusters = num_clusters, init = "k-means++")
        Labels = KMeans_Cluster_TS.fit_predict(Xc)
        
    elif cluster_method == "GaussMix":
        GaussianMix_Cluster_TS = GaussianMixture(n_components = num_clusters, covariance_type = "full", init_params="k-means++")
        Labels = GaussianMix_Cluster_TS.fit_predict(Xc)

    SH_score_Xc = silhouette_score(Xc, Labels)
    CH_score_Xc = calinski_harabasz_score(Xc, Labels)
    DB_score_Xc = davies_bouldin_score(Xc, Labels)

    return {"Silhouette": SH_score_Xc, "Calinski–Harabasz ": CH_score_Xc, "Davies-Boulding":DB_score_Xc}

## SMACOF utilizando matriz DTW y matriz euclidiana por factor de corrección con base a solución MDS clásica

In [5]:
for MDS_method in MDS_methods:
    for num_dims in number_dims:
        TEC_ClusterSeries.ComputeMDS(num_comps_mds = num_dims, method = MDS_method, max_iter = 1000, eps = 1e-6, verbose = 0, visualize_shepard = False)

        for cluster_method in cluster_methods:
            for num_cluster in number_clusters:
                TEC_ClusterSeries.ClusterTSVectors(num_clusters = num_cluster, cluster_method = cluster_method)
                cluster_metrics = {"Method": cluster_method, "Dims": num_dims, "Clusters": num_cluster} | GetClusteringMetrics(TEC_ClusterSeries.Xc_TS, num_cluster, cluster_method)
                
                if MDS_method == MDS_methods[0]:
                    metrics_SMACOF_euclidean_classic_results.append(cluster_metrics)
                else:
                    metrics_SMACOF_dtw_classic_results.append(cluster_metrics)

SMACOF-euclidean-classic with 2 components has a stress-1 value of 0.102424
--Scores with K-Means clustering--
SH coefficient = 0.6549278159086653
CH index = 7831.821887021864
DB index = 0.5825279723781472
--Total series for every cluster--
0 -> 2462
1 -> 1851
--Scores with K-Means clustering--
SH coefficient = 0.7141903227962646
CH index = 12206.369421621512
DB index = 0.4497307166377164
--Total series for every cluster--
0 -> 2448
1 -> 1244
2 -> 621
--Scores with K-Means clustering--
SH coefficient = 0.5916480385906937
CH index = 10360.672016757642
DB index = 0.6057831363167541
--Total series for every cluster--
0 -> 1225
1 -> 1832
2 -> 619
3 -> 637
--Scores with K-Means clustering--
SH coefficient = 0.5829818250711419
CH index = 11331.054726760527
DB index = 0.667464265434252
--Total series for every cluster--
0 -> 646
1 -> 165
2 -> 610
3 -> 1816
4 -> 1076
--Scores with K-Means clustering--
SH coefficient = 0.5361696179591428
CH index = 10804.558248831401
DB index = 0.71132697082671

Exception ignored in: <function ResourceTracker.__del__ at 0x7f79bc55d080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7f97c9a5d080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7fb43b759080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multip

SMACOF-euclidean-classic with 3 components has a stress-1 value of 0.059988
--Scores with K-Means clustering--
SH coefficient = 0.6280015253888284
CH index = 6992.534116429221
DB index = 0.6447938354947518
--Total series for every cluster--
0 -> 2460
1 -> 1853
--Scores with K-Means clustering--
SH coefficient = 0.6762545571524613
CH index = 8515.66921760671
DB index = 0.5767372372162053
--Total series for every cluster--
0 -> 1252
1 -> 2448
2 -> 613
--Scores with K-Means clustering--
SH coefficient = 0.6228947698754433
CH index = 7568.124243728435
DB index = 0.7509263135679547
--Total series for every cluster--
0 -> 767
1 -> 2413
2 -> 600
3 -> 533
--Scores with K-Means clustering--
SH coefficient = 0.6369700958013944
CH index = 7218.460440390613
DB index = 0.699801733778782
--Total series for every cluster--
0 -> 2421
1 -> 50
2 -> 601
3 -> 400
4 -> 841
--Scores with K-Means clustering--
SH coefficient = 0.5358324605378606
CH index = 7620.99561674386
DB index = 0.7487658611624103
--Tota

Exception ignored in: <function ResourceTracker.__del__ at 0x7f61dd15d080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7f568a459080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/usr/lib64/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x7fc92f64d080>
Traceback (most recent call last):
  File "/usr/lib64/python3.13/multip

SMACOF-dissim-classic with 3 components has a stress-1 value of 0.059988
--Scores with K-Means clustering--
SH coefficient = 0.6280015253888254
CH index = 6992.534116429182
DB index = 0.6447938354947557
--Total series for every cluster--
0 -> 2460
1 -> 1853
--Scores with K-Means clustering--
SH coefficient = 0.676254557152458
CH index = 8515.669217606632
DB index = 0.576737237216208
--Total series for every cluster--
0 -> 2448
1 -> 1252
2 -> 613
--Scores with K-Means clustering--
SH coefficient = 0.6855745487017251
CH index = 7432.969988986192
DB index = 0.5760500355310105
--Total series for every cluster--
0 -> 2442
1 -> 1213
2 -> 605
3 -> 53
--Scores with K-Means clustering--
SH coefficient = 0.5210769522755044
CH index = 7132.027389370991
DB index = 0.7988405610974711
--Total series for every cluster--
0 -> 486
1 -> 1863
2 -> 598
3 -> 770
4 -> 596
--Scores with K-Means clustering--
SH coefficient = 0.536129769230819
CH index = 7620.734767928751
DB index = 0.7485975129096284
--Total 

## Metricas de evaluación

In [6]:
from pandas import DataFrame

# Function to highlight min and max values within a block of N rows
def highlight_min_max(df_block):
    # Find min and max per column within the block
    min_vals = df_block.min()
    max_vals = df_block.max()

    # Create a DataFrame for styling with empty strings
    df_style = DataFrame('', index=df_block.index, columns=df_block.columns)

    # Apply the styles
    df_style[df_block == min_vals] = 'background-color: blue'  # Min
    df_style[df_block == max_vals] = 'background-color: red'  # Max

    return df_style

# Function to apply highlighting for blocks of N rows
def apply_highlight_in_blocks(df, N):
    # Create an empty DataFrame for styling
    style_df = DataFrame('', index=df.index, columns=df.columns)

    # Split DataFrame into blocks of N rows and apply highlight_min_max to each block
    for start in range(0, len(df), N):
        end = start + N
        block = df.iloc[start:end, 3:]
        
        # Apply the highlighting function to each block and assign it to the style DataFrame
        style_df.iloc[start:end, 3:] = highlight_min_max(block)

    return df.style.apply(lambda _: style_df, axis=None)

In [None]:
num_row_sep = len(number_clusters)

metrics_SMACOF_euclidean_classic_results = DataFrame(metrics_SMACOF_euclidean_classic_results)
metrics_SMACOF_dtw_classic_results = DataFrame(metrics_SMACOF_dtw_classic_results)

In [13]:
styled_metrics_DTW = apply_highlight_in_blocks(metrics_SMACOF_euclidean_classic_results, num_row_sep)
styled_metrics_DTW

Unnamed: 0,Method,Dims,Clusters,Silhouette,Calinski–Harabasz,Davies-Boulding
0,K-Means,2,2,0.654928,7831.821887,0.582528
1,K-Means,2,3,0.71419,12206.369422,0.449731
2,K-Means,2,4,0.591258,10360.941693,0.606451
3,K-Means,2,5,0.645623,9505.994621,0.608489
4,K-Means,2,6,0.526287,9804.979979,0.77035
5,K-Means,2,7,0.528647,10617.622204,0.735573
6,K-Means,2,8,0.498393,10059.870598,0.79611
7,K-Means,2,9,0.44268,9851.425943,0.848316
8,K-Means,2,10,0.421095,9301.463628,0.874002
9,GaussMix,2,2,0.562859,5244.192397,0.639914


In [14]:
styled_metrics_DTW_Classic = apply_highlight_in_blocks(metrics_SMACOF_dtw_classic_results, num_row_sep)
styled_metrics_DTW_Classic

Unnamed: 0,Method,Dims,Clusters,Silhouette,Calinski–Harabasz,Davies-Boulding
0,K-Means,2,2,0.654928,7831.821887,0.582528
1,K-Means,2,3,0.71419,12206.369422,0.449731
2,K-Means,2,4,0.589595,10360.794021,0.607804
3,K-Means,2,5,0.582582,11330.971017,0.667753
4,K-Means,2,6,0.58487,9589.833365,0.730345
5,K-Means,2,7,0.512421,8804.101798,0.74061
6,K-Means,2,8,0.463867,8418.582797,0.834526
7,K-Means,2,9,0.508447,9899.430325,0.776218
8,K-Means,2,10,0.428063,9119.777581,0.91121
9,GaussMix,2,2,0.563346,5255.13748,0.639397
