In [None]:
from scripts.ClusterTS_MDS import ClusterVTECDataMDS
from numpy import loadtxt, float64

In [2]:
PTEX_dissim = loadtxt("./data/PTEX_DTW_matrix.dat", dtype= float64, delimiter = ",")

In [3]:
MDS_methods = ("SMACOF-euclidean-classic", "SMACOF-dissim-classic")
cluster_methods = ("K-Means", "Gaussian")
number_dims = tuple(range(2, 4))
number_clusters = tuple(range(2, 11))

TEC_ClusterSeries = ClusterVTECDataMDS(dissimilarity = PTEX_dissim)

metrics_SMACOF_euclidean_classic_results = dict()
metrics_SMACOF_dtw_classic_results = dict()

In [4]:
from sklearn.cluster import KMeans
from sklearn.mixture import GaussianMixture
from sklearn.metrics.cluster import silhouette_score, calinski_harabasz_score, davies_bouldin_score

def GetClusteringMetrics(Xc, num_clusters, cluster_method = "K-Means"):
    if cluster_method == "K-Means":
        KMeans_Cluster_TS = KMeans(n_clusters = num_clusters, init = "k-means++", max_iter = 500)
        Labels = KMeans_Cluster_TS.fit_predict(Xc)
        
    elif cluster_method == "Gaussian":
        GaussianMix_Cluster_TS = GaussianMixture(n_components = num_clusters, covariance_type = "full", init_params="k-means++", max_iter = 500)
        Labels = GaussianMix_Cluster_TS.fit_predict(Xc)

    SH_score_Xc = silhouette_score(Xc, Labels)
    CH_score_Xc = calinski_harabasz_score(Xc, Labels)
    DB_score_Xc = davies_bouldin_score(Xc, Labels)

    return {"Silhouette": SH_score_Xc, "Calinski–Harabasz ": CH_score_Xc, "Davies-Boulding":DB_score_Xc}

## SMACOF utilizando matriz DTW y matriz euclidiana por factor de corrección con base a solución MDS clásica

In [5]:
for MDS_method in MDS_methods:
    for num_dims in number_dims:
        TEC_ClusterSeries.ComputeMDS(num_comps_mds = num_dims, method = MDS_method, max_iter = 1000, eps = 1e-6, verbose = 0, visualize_shepard = False)

        for cluster_method in cluster_methods:
            for num_cluster in number_clusters:
                TEC_ClusterSeries.ClusterTSVectors(num_clusters = num_cluster, cluster_method = cluster_method)
                cluster_metrics = GetClusteringMetrics(TEC_ClusterSeries.Xc_TS, num_cluster, cluster_method)
                
                if MDS_method == MDS_methods[0]:
                    metrics_SMACOF_euclidean_classic_results[cluster_method + f"-{num_dims}-{num_cluster}"] = cluster_metrics
                else:
                    metrics_SMACOF_dtw_classic_results[cluster_method + f"-{num_dims}-{num_cluster}"] = cluster_metrics

SMACOF-euclidean-classic with 2 components has a stress-1 value of 0.102251
--Scores with K-Means clustering--
SH coefficient = 0.6549008031439457
CH index = 7829.109936225328
DB index = 0.5823938581009812
--Total series for every cluster--
0 -> 2461
1 -> 1852
--Scores with K-Means clustering--
SH coefficient = 0.7141846234923809
CH index = 12204.859320758995
DB index = 0.44994965321646413
--Total series for every cluster--
0 -> 1245
1 -> 2447
2 -> 621
--Scores with K-Means clustering--
SH coefficient = 0.6895615558778802
CH index = 11066.46914253523
DB index = 0.5747767075010749
--Total series for every cluster--
0 -> 166
1 -> 2446
2 -> 1089
3 -> 612
--Scores with K-Means clustering--
SH coefficient = 0.6447431075754413
CH index = 9514.290875237228
DB index = 0.610922048835319
--Total series for every cluster--
0 -> 379
1 -> 2444
2 -> 848
3 -> 603
4 -> 39
--Scores with K-Means clustering--
SH coefficient = 0.5541056025524498
CH index = 10498.527089723888
DB index = 0.7289578475496407


## Metricas de evaluación

In [9]:
from pandas import DataFrame

# Function to highlight min and max values within a block of N rows
def highlight_min_max(df_block):
    # Find min and max per column within the block
    min_vals = df_block.min()
    max_vals = df_block.max()

    # Create a DataFrame for styling with empty strings
    df_style = DataFrame('', index=df_block.index, columns=df_block.columns)

    # Apply the styles
    df_style[df_block == min_vals] = 'background-color: blue'  # Min
    df_style[df_block == max_vals] = 'background-color: red'  # Max

    return df_style

# Function to apply highlighting for blocks of N rows
def apply_highlight_in_blocks(df, N):
    # Create an empty DataFrame for styling
    style_df = DataFrame('', index=df.index, columns=df.columns)

    # Split DataFrame into blocks of N rows and apply highlight_min_max to each block
    for start in range(0, len(df), N):
        end = start + N
        block = df.iloc[start:end]
        
        # Apply the highlighting function to each block and assign it to the style DataFrame
        style_df.iloc[start:end] = highlight_min_max(block)

    return df.style.apply(lambda _: style_df, axis=None)

In [10]:
num_row_sep = len(number_clusters)

In [11]:
metrics_SMACOF_euclidean_classic_results = DataFrame(metrics_SMACOF_euclidean_classic_results)
styled_metrics_DTW = apply_highlight_in_blocks(metrics_SMACOF_euclidean_classic_results.T, num_row_sep)
styled_metrics_DTW

Unnamed: 0,Silhouette,Calinski–Harabasz,Davies-Boulding
K-Means-2-2,0.654901,7829.109936,0.582394
K-Means-2-3,0.714185,12204.859321,0.44995
K-Means-2-4,0.689562,11066.469143,0.574777
K-Means-2-5,0.583216,11337.615398,0.66693
K-Means-2-6,0.521369,9808.966765,0.770203
K-Means-2-7,0.48587,9885.510128,0.773123
K-Means-2-8,0.44676,9382.16843,0.821186
K-Means-2-9,0.444606,8704.029548,0.845128
K-Means-2-10,0.504626,9330.855178,0.786643
Gaussian-2-2,0.563548,5260.062556,0.639011


In [12]:
metrics_SMACOF_dtw_classic_results = DataFrame(metrics_SMACOF_dtw_classic_results)
styled_metrics_DTW_Classic = apply_highlight_in_blocks(metrics_SMACOF_dtw_classic_results.T, num_row_sep)
styled_metrics_DTW_Classic

Unnamed: 0,Silhouette,Calinski–Harabasz,Davies-Boulding
K-Means-2-2,0.6549,7829.098231,0.582395
K-Means-2-3,0.714184,12204.82435,0.44995
K-Means-2-4,0.689561,11066.428366,0.574777
K-Means-2-5,0.583079,11337.572972,0.667174
K-Means-2-6,0.554757,10498.009617,0.727547
K-Means-2-7,0.523464,10608.412964,0.752156
K-Means-2-8,0.508167,9345.724645,0.845046
K-Means-2-9,0.441002,9846.160071,0.849645
K-Means-2-10,0.389964,9385.738498,0.886161
Gaussian-2-2,0.505267,2628.983412,0.600355
