# FRAMEWORK

In [None]:
import numpy as np
import os 
import random
import pandas as pd
import time
from tqdm import tqdm
from template_csp.managetemp import generate_one_templateset, generate_one_pairset, graph_difference_std
import json
# LEVEHSTEIN DISTANCE
# LEVEHSTEIN DISTANCE
def levensthein_distance(a1,a2):
    dist=0
    a1 = a1.copy()
    a2 = a2.copy()
    sorted_idxA = np.argsort(a1[0])
    sorted_idxB = np.argsort(a2[0])
    a1[1] = a1[1][sorted_idxA]
    a2[1] = a2[1][sorted_idxB]
    a1[1] = a1[1].astype(int)
    a2[1] = a2[1].astype(int)
    for i in range(len(a1[1])):
        if a1[1][i] != a2[1][i]:
            dist+=1-float(i)/len(a1[1])
    return dist / ((len(a1[0]) + 1 ) / 2)
# DISTANZA SOLO ENTALPIE
def dist1(a1,a2):
    dist=0
    for i in range(len(a1[0])):
        dist += ((a1[0].min() - a1[0][i])/(a1[0].min() - a1[0].max()) - (a2[0].min() - a2[0][i])/(a2[0].min() - a2[0].max())) ** 2
    return (dist/len(a1[0])) ** 0.5
# DISTANZA ENTALPIE E ORDINE 
def dist2(a1,a2):
    dist=0
    a1 = a1.copy()
    a2 = a2.copy()
    sorted_idxA = np.argsort(a1[0])
    sorted_idxB = np.argsort(a2[0])
    a1[1] = a1[1][sorted_idxA]
    a2[1] = a2[1][sorted_idxB]
    for i in range(len(a1)):
        if a1[1][i] != a2[1][i]:
            dist+=(1-float(i)/len(a1[1])) * (    abs(a1[0, int(a1[1,i]) ] - a1[0, int(a2[1,i]) ])/ (a1[0].max() - a1[0].min())    +   abs(a2[0, int(a1[1,i]) ] - a2[0, int(a2[1,i]) ])/ (a2[0].max() - a2[0].min())   ) / 2
    return dist / ((len(a1[0]) + 1 ) / 2)
# DISTANZA ORDINE E PESO SULLO SHIFT
def dist3 (a1, a2):
    dist = 0
    a1 = a1.copy()
    a2 = a2.copy()
    sorted_idxA = np.argsort(a1[0])
    sorted_idxB = np.argsort(a2[0])
    a1[1] = a1[1][sorted_idxA]
    a2[1] = a2[1][sorted_idxB]
    for i in range(len(a1)):
        if a1[1][i] != a2[1][i]:
            dist += (1-float(i)/len(a1[1])) * ( abs(i-np.where(a1[1] == a2[1][i])[0][0])/len(a1[1]) + abs(i - np.where(a2[1] == a1[1][i])[0][0] )/len(a2[1]) ) / 2
    return dist / ((len(a1[0]) + 1 ) / 2)

def create_directory(directory):
    if not os.path.exists(directory):
        os.makedirs(directory)


test_elements=['Be', 'B', 'N', 'Mg', 'O', 'Li', 'C', 'Na', 'Si', 'S', 'Cl', 'F', 'P', 'H', 'Al']

hyperparameters = {
    'ntemp_start' : 1,
    'ntemp_end' : 21,

    'comp' : 1,
    'lev_gen' : 0.8,
    'lev_gen_initial' : 0.8,
    'step' : 0.1,
    'n_sets' : 10,
    'n_template' : 1,

    'id_set' : 1,
    'lev_red' : 0.9,
    'weight_formation_entalphy' : 1,
    'weight_occurrence' : 1,
    'weight_sg' : 0.001,

    'n_pairs' : 105,    
}
random.seed(time.time())

distances = [levensthein_distance, dist1, dist3, dist3]
thresholds = [0.8, 0.25, 0.25, 0.25]
steps = [0.1, 0.02 , 0.02, 0.02]

for dist_function in distances:

    hyperparameters['lev_gen_initial'] = thresholds[distances.index(dist_function)]
    hyperparameters['lev_gen'] = thresholds[distances.index(dist_function)]
    hyperparameters['step'] = steps[distances.index(dist_function)]
    print(hyperparameters['step'])

    n_possible_couples = 105

    dir_temp = f'./DIFFERENT_DISTANCES/{dist_function.__name__}/'
    create_directory(dir_temp)

    import json
    with open(dir_temp + 'params.json', 'w') as f:
        json.dump(hyperparameters, f, indent=4)

    # Range in cui varia il numero di template estratti
    ntemp_studied = hyperparameters['ntemp_end'] - hyperparameters['ntemp_start']

    # Vettori per i risultati globali
    means = np.zeros(ntemp_studied)
    stds = np.zeros(ntemp_studied)

    for i in tqdm(range(hyperparameters['ntemp_start'],hyperparameters['ntemp_end'], 1)):

        hyperparameters['n_template'] = i

        # vettori per store di errore totale e numero di template rimanenti del singolo set
        errors = np.zeros(hyperparameters['n_sets'])
        
        with open('log.txt','a') as fstdout:
            fstdout.write('##################################################\n')
            fstdout.write(f'Generating template set with {i} templates\n')
            fstdout.write('##################################################\n')

        for k in range(hyperparameters['n_sets']):
            # Reset delle variabili
            hyperparameters['lev_gen'] = hyperparameters['lev_gen_initial']
            hyperparameters['id_set'] = k

            # Generazione del template set inziale
            template_set = generate_one_templateset(hyperparameters, test_elements, dist_function)

            # Salvataggio del template set su file
            create_directory(dir_temp+f'{template_set.num_template}')
            template_set.recap_tempset(dir_temp+f'{template_set.num_template}/TemplateSet_{k}')


            # Salvataggio dei risultati per ogni set
            errors[k] = template_set.err_before() 

        # Errore totale con deviazione standard
        means[i-hyperparameters['ntemp_start']] = np.mean(errors)
        stds[i-hyperparameters['ntemp_start']] = np.std(errors)

        df_tot = pd.DataFrame({'Means': means, 'Stds': stds})

        # Salvataggio dei risultati su file
        df_tot.to_csv(dir_temp+f'TotalStatics.csv', header=None)


In [None]:
import pandas as pd
import numpy as np

comp =1
gs_df = pd.read_csv(f'A{comp}B/relaxation/GroundStates.txt', index_col=0, sep=",", na_filter = False)
df = pd.read_csv(f'A{comp}B/relaxation/RELAX_DATA', sep=",", index_col=0, na_filter = False)
couples = df.index.to_list()

gs_df = gs_df.loc[couples]


In [None]:
from itertools import combinations

df = pd.read_csv(f'A{comp}B/relaxation/RELAX_DATA', sep=",", index_col=0, na_filter = False)
templates = np.arange(len(df.columns.to_list()))

combinations = list(combinations(templates, 5))


In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

comp = 1

df = pd.read_csv(f'A{comp}B/relaxation/RELAX_DATA', sep=",", index_col=0, na_filter = False)
gs_df = pd.read_csv(f'A{comp}B/relaxation/GroundStates.txt', sep=",",  index_col=0, na_filter = False)
numtemp = 5

errors_of_temp = np.zeros((len(combinations)))
entalpie = df.to_numpy()
gs = gs_df.to_numpy()

for idx_try, try1 in tqdm(enumerate(combinations)):
    for idx_couple in range(len(gs)):
        errors = [max(entalpie[idx_couple, idx_temp]-gs[idx_couple][0], 0) for idx_temp in try1]
        errors_of_temp[idx_try] += (np.array(errors).min())
    errors_of_temp[idx_try] = errors_of_temp[idx_try]/len(gs)
        

    