In [1]:
from scripts.ClusterTS_MDS import ClusterMDS
from numpy import loadtxt, float64

In [2]:
PTEX_dissim = loadtxt("./data/PTEX_DTW_matrix.dat", dtype= float64, delimiter = ",")

In [None]:
MDS_methods = ("SMACOF-euclidean-classic", "SMACOF-dissim-classic")
number_dims = (2, 3)

cluster_methods = ("K-Means", "GaussMix")
number_clusters = tuple(range(2, 11))

TEC_ClusterSeries = ClusterMDS(dissimilarity = PTEX_dissim)

metrics_SMACOF_euclidean_classic_results = list()
metrics_SMACOF_dtw_classic_results = list()

In [4]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics.cluster import silhouette_score, calinski_harabasz_score, davies_bouldin_score

def GetClusteringMetrics(Xc, num_clusters, cluster_method = "K-Means"):
    if cluster_method == "K-Means":
        KMeans_Cluster_TS = KMeans(n_clusters = num_clusters, init = "k-means++")
        Labels = KMeans_Cluster_TS.fit_predict(Xc)
        
    elif cluster_method == "GaussMix":
        GaussianMix_Cluster_TS = GaussianMixture(n_components = num_clusters, covariance_type = "full", init_params="k-means++")
        Labels = GaussianMix_Cluster_TS.fit_predict(Xc)

    SH_score_Xc = silhouette_score(Xc, Labels)
    CH_score_Xc = calinski_harabasz_score(Xc, Labels)
    DB_score_Xc = davies_bouldin_score(Xc, Labels)

    return {"Silhouette": SH_score_Xc, "Calinski–Harabasz ": CH_score_Xc, "Davies-Boulding":DB_score_Xc}

## SMACOF utilizando matriz DTW y matriz euclidiana por factor de corrección con base a solución MDS clásica

In [5]:
for MDS_method in MDS_methods:
    for num_dims in number_dims:
        TEC_ClusterSeries.ComputeMDS(num_comps_mds = num_dims, method = MDS_method, max_iter = 1000, eps = 1e-6, verbose = 0, visualize_shepard = False)

        for cluster_method in cluster_methods:
            for num_cluster in number_clusters:
                TEC_ClusterSeries.ClusterTSVectors(num_clusters = num_cluster, cluster_method = cluster_method)
                cluster_metrics = {"Method": cluster_method, "Dims": num_dims, "Clusters": num_cluster} | GetClusteringMetrics(TEC_ClusterSeries.Xc_TS, num_cluster, cluster_method)
                
                if MDS_method == MDS_methods[0]:
                    metrics_SMACOF_euclidean_classic_results.append(cluster_metrics)
                else:
                    metrics_SMACOF_dtw_classic_results.append(cluster_metrics)

SMACOF-euclidean-classic with 2 components has a stress-1 value of 0.108391
--Scores with K-Means clustering--
SH coefficient = 0.6457660197743824
CH index = 8193.386759183793
DB index = 0.5680831717047498
--Total series for every cluster--
0 -> 2145
1 -> 2471
--Scores with K-Means clustering--
SH coefficient = 0.7040587756205205
CH index = 12451.177425236076
DB index = 0.4574870258626125
--Total series for every cluster--
0 -> 2458
1 -> 1529
2 -> 629
--Scores with K-Means clustering--
SH coefficient = 0.6642097028642169
CH index = 11656.703989213582
DB index = 0.600951140908562
--Total series for every cluster--
0 -> 2455
1 -> 615
2 -> 292
3 -> 1254
--Scores with K-Means clustering--
SH coefficient = 0.5700554025836966
CH index = 11716.407028973816
DB index = 0.6811406132617647
--Total series for every cluster--
0 -> 269
1 -> 648
2 -> 1823
3 -> 1263
4 -> 613
--Scores with K-Means clustering--
SH coefficient = 0.5176222993636912
CH index = 11239.409151277212
DB index = 0.74931302323932

## Metricas de evaluación

In [6]:
from pandas import DataFrame

# Function to highlight min and max values within a block of N rows
def highlight_min_max(df_block):
    # Find min and max per column within the block
    min_vals = df_block.min()
    max_vals = df_block.max()

    # Create a DataFrame for styling with empty strings
    df_style = DataFrame('', index=df_block.index, columns=df_block.columns)

    # Apply the styles
    df_style[df_block == min_vals] = 'background-color: blue'  # Min
    df_style[df_block == max_vals] = 'background-color: red'  # Max

    return df_style

# Function to apply highlighting for blocks of N rows
def apply_highlight_in_blocks(df, N):
    # Create an empty DataFrame for styling
    style_df = DataFrame('', index=df.index, columns=df.columns)

    # Split DataFrame into blocks of N rows and apply highlight_min_max to each block
    for start in range(0, len(df), N):
        end = start + N
        block = df.iloc[start:end, 3:]
        
        # Apply the highlighting function to each block and assign it to the style DataFrame
        style_df.iloc[start:end, 3:] = highlight_min_max(block)

    return df.style.apply(lambda _: style_df, axis=None)

In [7]:
num_row_sep = len(number_clusters)

metrics_SMACOF_euclidean_classic_results = DataFrame(metrics_SMACOF_euclidean_classic_results)
metrics_SMACOF_dtw_classic_results = DataFrame(metrics_SMACOF_dtw_classic_results)

In [8]:
styled_metrics_DTW = apply_highlight_in_blocks(metrics_SMACOF_euclidean_classic_results, num_row_sep)
styled_metrics_DTW

Unnamed: 0,Method,Dims,Clusters,Silhouette,Calinski–Harabasz,Davies-Boulding
0,K-Means,2,2,0.645766,8193.386759,0.568083
1,K-Means,2,3,0.704059,12451.177425,0.457487
2,K-Means,2,4,0.585954,10365.363784,0.612447
3,K-Means,2,5,0.569998,11716.432796,0.681565
4,K-Means,2,6,0.52966,11303.611795,0.720571
5,K-Means,2,7,0.523995,10941.505212,0.732682
6,K-Means,2,8,0.490957,10590.790338,0.793849
7,K-Means,2,9,0.439494,10293.09806,0.84555
8,K-Means,2,10,0.489066,9836.765255,0.811003
9,GaussMix,2,2,0.572741,5903.649829,0.604852


In [9]:
styled_metrics_DTW_Classic = apply_highlight_in_blocks(metrics_SMACOF_dtw_classic_results, num_row_sep)
styled_metrics_DTW_Classic

Unnamed: 0,Method,Dims,Clusters,Silhouette,Calinski–Harabasz,Davies-Boulding
0,K-Means,2,2,0.645765,8193.376787,0.568084
1,K-Means,2,3,0.704058,12451.149417,0.457488
2,K-Means,2,4,0.664209,11656.67246,0.600951
3,K-Means,2,5,0.638486,9716.096035,0.680372
4,K-Means,2,6,0.534854,11320.330306,0.704935
5,K-Means,2,7,0.521165,10937.380424,0.745122
6,K-Means,2,8,0.447038,9952.003253,0.826246
7,K-Means,2,9,0.437076,10294.213268,0.846838
8,K-Means,2,10,0.42777,9856.502722,0.868158
9,GaussMix,2,2,0.571879,5882.353431,0.605648
