# PREPARATION

In [None]:
from template_csp import managetemp as mte
import numpy as np
import pandas as pd
import random
from template_csp.distances import dist1, dist2, dist3, levensthein_distance
import json

hyperparameters = { 
    "weight_occurrence" : 1,
    "weight_sg" : 0.001,
    "weight_formation_entalphy" : 1,
    "n_final_templates" : 20,
    "comp" : 1,
}

ntemp_initial_set = 20

with open(f'ENTHAPY/A{hyperparameters["comp"]}B.json') as json_file:
    data = json.load(json_file)

test_elements=['Be', 'B', 'N', 'Mg', 'O', 'Li', 'C', 'Na', 'Si', 'S', 'Cl', 'F', 'P', 'H', 'Al']

initial_set = mte.InitialSet(test_elements, hyperparameters, f'RunOneTempPerPair/{hyperparameters["comp"]}/{ntemp_initial_set}/TemplateSet_0')
final_set = mte.FinalSet(initial_set, test_elements, hyperparameters, f'RunOneTempPerPair/{hyperparameters["comp"]}/{ntemp_initial_set}/PairSet_0')
reduced_set = final_set.reduced_set()
n_temp_final = final_set.num_final_template

pmatrix = {}
couples = np.array([])

for i in range(len(test_elements)):
    for j in range(i+1,len(test_elements)):
        couple = [test_elements[i],test_elements[j]]
        couple.sort()
        couple = ''.join(couple)
        pvec = np.zeros((2, len(subset_templates)))

        for idx, template in enumerate(subset_templates):
            pvec[0,idx] = df.loc[couple,template]
            pvec[1,idx] = idx
        
        sorted_idx = np.argsort(pvec[0])
        pvec[0] = pvec[0][sorted_idx]
        pvec[1] = pvec[1][sorted_idx]

        pmatrix[couple] = pvec
        couples = np.append(couples, couple)

lev_dist_matrix = np.zeros((len(couples), len(couples)))

for row, couple1 in enumerate(couples):
    for col, couple2 in enumerate(couples):
        if row == col:
            lev_dist_matrix[row,col] = 0
        else:
            lev_dist_matrix[row,col] = dist2(pmatrix[couple1], pmatrix[couple2]) 
print(len(subset_templates))
print(lev_dist_matrix.mean())

#  CLUSTER BY GS

In [None]:
from template_csp import managetemp as mte
import numpy as np
import pandas as pd
import random
from template_csp.distances import dist1, dist2, dist3
import re

hyperparameters = { 
    "weight_occurrence" : 1,
    "weight_sg" : 0.001,
    "weight_formation_entalphy" : 1,
    "lev_red" : 0.9,
}
comp = 1
df = pd.read_csv(f'A{comp}B/relaxation/RELAX_DATA', sep=",", index_col=0, na_filter = False)
df_entforma = pd.read_csv(f'A{comp}B/relaxation/OneElementEnt.txt', sep=",", index_col=0, na_filter = False, header=None)
test_elements=['Be', 'B', 'N', 'Mg', 'O', 'Li', 'C', 'Na', 'Si', 'S', 'Cl', 'F', 'P', 'H', 'Al']

template_set = mte.TemplateSet(test_elements, 'RANDOM/FULL50sets/20/TemplateSet_3')
pairset = mte.PairSet(template_set, test_elements, dist_function=levensthein_distance, relaxed_pairs='RANDOM/FULL50sets/20/PairSet_3', )
reducted_set = pairset.reduced_set(hyperparameters)

subset_templates = [f'{template_set.couples[i][0][0]}{template_set.couples[i][0][1]}_{template_set.couples[i][1]}' for i in reducted_set]
n_temp_test = len(subset_templates)
print(len(subset_templates))
gs_temp = {}
couples = np.array([])

for i in range(len(test_elements)):
    for j in range(i+1,len(test_elements)):
        couple = [test_elements[i],test_elements[j]]
        couple.sort()
        couple = ''.join(couple)
        pvec = np.zeros((2, len(subset_templates)))

        for idx, template in enumerate(subset_templates):
            pvec[0,idx] = df.loc[couple,template]
            pvec[1,idx] = idx
        
        sorted_idx = np.argsort(pvec[0])
        pvec[0] = pvec[0][sorted_idx]
        pvec[1] = pvec[1][sorted_idx]

        gs_temp[couple] = subset_templates[int(pvec[1][0])]
        couples = np.append(couples, couple)

pmatrix = {}
for template in subset_templates:
    pvec = np.zeros((2, len(couples)))
    for idx, couple in enumerate(pairset.couples):
        A = couple[0]#re.findall(r'[A-Z][a-z]*', couple)[0]
        B = couple[1]#re.findall(r'[A-Z][a-z]*', couple)[1]
        ent_form = (df_entforma.loc[A,1] + df_entforma.loc[B,1])/2
        pvec[0,idx] = df.loc[A+B,template] - ent_form
        pvec[1,idx] = idx
    pmatrix[template] = pvec

lev_dist_matrix = np.zeros((len(couples), len(couples)))
for couple in couples:
    for couple2 in couples:
        row = np.where(couples == couple)[0][0]
        col = np.where(couples == couple2)[0][0]
        lev_dist_matrix[row, col] = dist2(pmatrix[gs_temp[couple]], pmatrix[gs_temp[couple2]])
 
np.savetxt('CLUSTERS/ClustersDict/DistMatrix', lev_dist_matrix)


In [None]:
print(gs_temp['HLi'])

# DENDOGRAM

In [None]:
import numpy as np
import matplotlib.pyplot as plt
from scipy.cluster.hierarchy import dendrogram, linkage, fcluster
from scipy.spatial.distance import squareform
import matplotlib.patches as patches

fig, ax = plt.subplots(1, 1, figsize=(10, 10))
fig1, ax1 = plt.subplots(1, 1, figsize=(11, 8))

condensed_dist_matrix = squareform(lev_dist_matrix)
Z = linkage(condensed_dist_matrix, method='average')

num_clusters = 8
cluster_labels = fcluster(Z, t=num_clusters, criterion='maxclust')
threshold = Z[-(num_clusters-1), 2]

dendro = dendrogram(Z ,labels = couples, leaf_rotation=90, leaf_font_size=5, ax=ax1, color_threshold=threshold)



sorted_idx = dendro['leaves']

new = lev_dist_matrix[sorted_idx, :][:, sorted_idx]
cluster_labels_sorted = cluster_labels[sorted_idx]

im = ax.imshow(new, cmap='Spectral') #or Spectral

couples_in_clusters = []
cluster_distances = {}

unique_clusters = np.unique(cluster_labels_sorted)
for cluster in unique_clusters:
    # Trova gli indici che appartengono a questo cluster
    cluster_indices = np.where(cluster_labels_sorted == cluster)[0]

    pairwise_indices = [(i, j) for i in cluster_indices for j in cluster_indices if i < j]
    # Calcola la distanza media per il cluster
    distances = [new[i, j] for i, j in pairwise_indices]
    mean_distance = np.mean(distances) if distances else 0  # Evita errori con cluster singoli
    cluster_distances[cluster] = mean_distance


    if len(cluster_indices) > 0:
        # Trova il primo e l'ultimo elemento del cluster per i bordi
        start, end = cluster_indices[0], cluster_indices[-1]
        cluster_size = end - start + 1
        couples_in_clusters.append(np.array(couples)[sorted_idx][start:end+1])

        # Aggiungi un rettangolo per evidenziare il cluster con un piccolo offset
        rect = patches.Rectangle((start - 0.5, start - 0.5), cluster_size, cluster_size,
                                 linewidth=3, edgecolor='black', facecolor='none')
        ax.add_patch(rect)

# Imposta i tick e mostra la figura
ax.set_xticks(np.arange(len(couples)))
ax.set_yticks(np.arange(len(couples)))
ax.set_xticklabels(np.array(couples)[sorted_idx], fontsize=5, rotation=90)
ax.set_yticklabels(np.array(couples)[sorted_idx], fontsize=5)  
ax.set_xlabel('Couples', fontsize=15)
ax.set_ylabel('Couples', fontsize=15)

cbar = fig.colorbar(im, ax=ax, orientation="vertical", fraction=0.046, pad=0.04, label='Levenshtein distance')
cbar.ax.yaxis.label.set_size(15)
cbar.ax.tick_params(labelsize=10)
cbar.ax.yaxis.set_label_coords(+2.5, 0.5)

fig.savefig('MatrixHeatmap.png', bbox_inches='tight')


ax1.axhline(y=threshold-0.005, color='r', linestyle='--')
ax1.set_ylabel('Levenshtein distance', fontsize=20)
ax1.set_xlabel('Couples', fontsize=20)
ax1.xaxis.set_label_coords(0.5, -0.18)
ax1.yaxis.set_label_coords(-0.07, 0.5)
yticks = np.linspace(0, 0.5, 5)   
yticks = np.round(yticks, decimals=1)
ax1.set_yticks(yticks)
ax1.set_yticklabels(yticks, fontsize=10)
ax1.grid(True, ls=':', axis='y')
plt.show()
fig1.savefig('../LaTeX/Figure Risultati/Dendrogram.png', bbox_inches='tight')

print(cluster_distances)

In [None]:
for cluster in couples_in_clusters:
    for i in cluster:
        print(i+',', end=' ')
    print('\n')
    


# PIE CHART

In [6]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
total_hist = {}
hist = {}
len_cluster = []
for idx_cluster, cluster in enumerate(couples_in_clusters):
    if len(cluster) == 1:
        continue
    hist[f'{idx_cluster}'] = {}
    len_cluster.append(len(cluster))
    for i in cluster:
        sg = subset_templates[int(pmatrix[i][1][0])]

        if sg not in hist[f'{idx_cluster}'].keys():
            hist[f'{idx_cluster}'][sg] = 1/len(cluster)
        else:
            hist[f'{idx_cluster}'][sg] +=1/len(cluster)
            
        if sg not in total_hist.keys():
            total_hist[sg] = 1
        else:
            total_hist[sg] +=1
            
colormap = {}
for idx, template in enumerate(total_hist.keys()):
    colormap[f'{template}'] = cm.Set2(idx/len(total_hist.keys()))


In [None]:
import matplotlib.pyplot as plt
import numpy as np

# Configura il layout per una pagina A4 in orizzontale
fig, axes = plt.subplots(4, 2, figsize=(8.3, 11.7))  # 4 righe, 2 colonne
axes = axes.flatten()  # Converti in un array monodimensionale per iterare facilmente

# Itera attraverso i grafici e le assi
for idx_cluster, (histcluster, ax) in enumerate(zip(hist.values(), axes)):
    title_text = f'Cluster {idx_cluster+1}\n{len_cluster[idx_cluster]} pairs\nAvg: {cluster_distances[idx_cluster+1]:.2f}'

    labels = list(histcluster.keys())
    values = list(histcluster.values())
    colors = [colormap[f'{i}'] for i in labels]
    labels = [f'{x.split("_")[0]} {x.split("_")[1]}' for x in labels]

    # Disegna il donut chart senza percentuali
    wedges, texts = ax.pie(
        values, labels=None, colors=colors,  # Disabilita i label automatici
        wedgeprops=dict(edgecolor='black', linewidth=1.5), startangle=0
    )

    # Aggiungi il buco al centro con il bordo nero
    centre_circle = plt.Circle((0, 0), 0.70, fc='white', edgecolor='black', linewidth=1.5)
    ax.add_artist(centre_circle)

    # Aggiungi il titolo al centro del buco
    ax.text(
        0, 0, title_text, ha='center', va='center', fontsize=10,
        fontweight='bold', color='black'
    )

    # Aggiungi manualmente label e percentuali centrati
    for wedge, value, label in zip(wedges, values, labels):
        # Calcola l'angolo medio della fetta
        angle = (wedge.theta2 + wedge.theta1) / 2
        x = np.cos(np.radians(angle)) * 1.3  # Posizione leggermente fuori dalla fetta
        y = np.sin(np.radians(angle)) * 1.3

        # Posiziona il label
        ax.text(
            x, y, label, ha='center', va='center', fontsize=8,
            fontweight='bold', color='black'
        )

        # Posiziona la percentuale sotto il label
        percentage = f"{value * 100:.1f}%"
        ax.text(
            x, y - 0.13, percentage, ha='center', va='center', fontsize=8,
            color='black'
        )

    ax.axis('equal')  # Mantieni l'aspetto circolare

# Rimuovi assi inutilizzati (se ci sono meno grafici degli spazi disponibili)
for ax in axes[len(hist):]:
    ax.axis('off')

# Regola il layout per evitare sovrapposizioni
plt.tight_layout()

# Salva o mostra l'immagine finale
plt.savefig("PieCharts.png", dpi=300, bbox_inches='tight')  # Salva il file
plt.show()


# OTHER PIE CHARTS

In [None]:
fig, ax = plt.subplots(1,1,figsize=(15, 10))
fig.suptitle(f'Total distribution', fontsize=20, fontweight='bold', y=0.95)

ax.bar(total_hist.keys(), total_hist.values(),  color='blue', edgecolor='black', linewidth=1.2)
ax.set_xlabel('Template', fontsize=15)
ax.set_ylabel('Occurrence', fontsize=15)

plt.show()

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
total_hist = {}
hist = {}
len_cluster = []
for idx_cluster, cluster in enumerate(couples_in_clusters):
    if len(cluster) == 1:
        continue
    hist[f'{idx_cluster}'] = {}
    len_cluster.append(len(cluster))
    for i in cluster:
        sg = subset_templates[int(pmatrix[i][1][0])].split('_')[1]

        if sg not in hist[f'{idx_cluster}'].keys():
            hist[f'{idx_cluster}'][sg] = 1/len(cluster)
        else:
            hist[f'{idx_cluster}'][sg] +=1/len(cluster)
            
        if sg not in total_hist.keys():
            total_hist[sg] = 1/len(cluster)
        else:
            total_hist[sg] +=1/len(cluster)
            
colormap = {}
for idx, template in enumerate(total_hist.keys()):
    colormap[f'{template}'] = cm.Set2(idx/len(total_hist.keys()))

for idx_cluster, histcluster in enumerate(hist.values()):
    fig, ax = plt.subplots(1,1,figsize=(15, 10))
    fig.suptitle(f'Cluster {idx_cluster} with {len_cluster[idx_cluster]} pairs', fontsize=20, fontweight='bold')
    colors = [colormap[f'{int(i)}'] for i in histcluster.keys()]
    wedges, texts, autotexts = ax.pie(histcluster.values(), autopct='%1.1f%%' ,labels = histcluster.keys(), colors = colors, wedgeprops=dict(edgecolor='black', linewidth=1.5), startangle=0)
    ax.axis('equal')
    # Personalizzazione del testo
    for text in texts:
        text.set_fontsize(20)  # Dimensione dei label
        text.set_color('black')
        text.set_fontweight('bold')  # Colore dei label

    for autotext in autotexts:
        autotext.set_fontsize(15)  # Dimensione delle percentuali
        autotext.set_color('black')  # Colore delle percentuali
        autotext.set_weight('bold')  # Grassetto per le percentuali
    plt.show()
            


In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
total_hist = {}
hist = {}
len_cluster = []
for idx_cluster, cluster in enumerate(couples_in_clusters):
    if len(cluster) == 1:
        continue
    hist[f'{idx_cluster}'] = {}
    len_cluster.append(len(cluster))
    for i in cluster:
        templist = f'{subset_templates[int(pmatrix[i][1][0])]},{subset_templates[int(pmatrix[i][1][1])]}'

        flag = False
        for key in hist[f'{idx_cluster}'].keys():
            if set(templist.split(',')) == set(key.split(',')):
                hist[f'{idx_cluster}'][key] +=1/len(cluster)
                flag = True
                break
        if not flag:
            hist[f'{idx_cluster}'][templist] = 1/len(cluster)
            

for idx, histcluster in enumerate(hist.values()):
    fig, ax = plt.subplots(1,1,figsize=(15, 10))
    fig.suptitle(f'Cluster {idx_cluster} with {len_cluster[idx]} pairs', fontsize=20, fontweight='bold')
    wedges, texts, autotexts = ax.pie(histcluster.values(), autopct='%1.1f%%' ,labels = histcluster.keys(), wedgeprops=dict(edgecolor='black', linewidth=1.5), startangle=0)
    ax.axis('equal')
    # Personalizzazione del testo
    for text in texts:
        text.set_fontsize(20)  # Dimensione dei label
        text.set_color('black')
        text.set_fontweight('bold')  # Colore dei label

    for autotext in autotexts:
        autotext.set_fontsize(15)  # Dimensione delle percentuali
        autotext.set_color('black')  # Colore delle percentuali
        autotext.set_weight('bold')  # Grassetto per le percentuali
    plt.show()
            


# PESO PRIMA COMPONENTE

In [None]:
import matplotlib.pyplot as plt 
y1 = []
y2 = []
x = []
first3 = []
first32 = []
for tot in range(1,100):
    summ = 0
    x.append(tot)
    first2=0
    for i in range(0,tot):
        summ += 1./(i+1)
        if i < 3:
            first = summ
            first2 += 1-i/tot


    y1.append(1./summ)
    y2.append(1/((tot+1)/2))
    first3.append(first/summ)
    first32.append(first2/((tot+1)/2))

fig, ax = plt.subplots(1,1,figsize=(15, 10))
ax.plot(x,y1, label='First component with 1/i')
ax.plot(x,y2, label='First component with 1-i/N')
ax.plot(x,first3, label='First 3 component with 1/i')
ax.plot(x,first32, label='First 3 component with 1-i/N')
ax.set_xlabel('Number of templates', fontsize=15)
ax.set_ylabel('Weight of first component', fontsize=15)
ax.grid(True, ls=':')
ax.vlines(6, 0, 1, colors='r', linestyles='--', label='N=13')
ax.legend()
    