In [2]:
import warnings
warnings.filterwarnings('ignore')
import json, os
import numpy as np
from monty.json import MontyDecoder, MontyEncoder
from copy import deepcopy
from scipy.spatial import distance_matrix
import sys
sys.path.append('..')

from phasemapy.dataio import InstanceData
from phasemapy.parser import ICDDEntry
from phasemapy.solver import Phase, Sample

chemsys = ['V', 'Mn', 'Nb']
oxide_system = True
photon_e = 13e3
max_q_shift = 0.02
resample_density = 1000
initial_alphagamma = 0.1
SUM_NORM = 6000
loss_weight = {'xrd_loss': 6.0, 'comp_loss': 2.0, 'entropy_loss': 0.01}

In [3]:
instance_data = InstanceData.from_file('../data/instance_file_24297_NbMnVO_v02.txt', chemsys, photon_e)
instance_data = instance_data.resample_xrd(resample_density)
instance_data.renormalize(norm=SUM_NORM)
instance_data.normalize()

with open('./data/entries_dft.json') as f:
    entries = json.load(f, cls=MontyDecoder)
Phase.theta_to_q(entries)    

In [4]:
for i,e in enumerate(entries):
    if e.entry_id == '22736':
        print(i)

45


In [5]:
entries[45].data

{'xrd': [array([ 5.057634,  7.082838,  7.221637, 10.115267, 11.221089, 11.396655,
         14.165675, 14.443274, 15.172901, 15.794835, 15.900264, 16.086477,
         16.584826, 17.310211, 17.367465, 18.071502, 18.39815 , 18.756211,
         19.374975, 19.47718 , 20.230534, 20.757728, 20.948152, 21.216578,
         21.248513, 21.40292 , 21.664911, 21.901912, 22.411943, 22.442178,
         22.544435, 22.79331 , 24.001167, 24.248066, 25.051522, 25.288168,
         25.52262 , 25.666152, 25.692557, 25.885029, 26.083713, 26.235508,
         26.475954, 26.811288, 27.053333, 27.417829, 27.443181, 27.731061,
         28.331351, 28.886548, 29.237262, 29.615124, 29.742213, 29.815571,
         30.014679, 30.159315, 30.323449, 30.345802, 30.667425, 30.861038,
         31.32666 , 31.58967 , 31.641968, 31.800528, 31.991982, 32.07665 ,
         32.172955, 32.373968, 32.404618, 32.436742, 32.689761, 32.922775,
         33.169653, 33.230924, 33.523426, 33.582599, 33.663267, 33.676046,
         34.189964

In [None]:
#     i=286
#     solution = []
#     for e in [entries[45],entries[64]]:
#         phase = Phase.from_entry_and_instance_data(e, 1 / len(entries), instance_data)
#         solution.append(phase)  
        
#     sample = Sample(i, instance_data.log_q, instance_data.sample_xrd[i], instance_data.chemsys,
#                     instance_data.sample_comp[i], oxide_system, instance_data.wavelength, max_q_shift, solution)

# #     sample.print_solution()
#     sample.refine_all_fractions()
# #     sample.refine_one_by_one()
#     sample.plot(perphase=True)

In [None]:
samples = []
for i in range(instance_data.sample_num):    
    solution = []
    for e in entries:
        phase = Phase.from_entry_and_instance_data(e, 1 / len(entries), instance_data)
        solution.append(phase)  
        
    sample = Sample(i, instance_data.log_q, instance_data.sample_xrd[i], instance_data.chemsys,
                    instance_data.sample_comp[i], oxide_system, instance_data.wavelength, max_q_shift, solution)
    sample.prune_candidates_based_on_composition(cutoff=0.005)
    sample.prune_candidate_based_on_xrd(plot=False, cutoff=0.015)
    sample.refine_all_fractions()
#     sample.refine_one_by_one()
    sample.print_solution()
    sample.update_solution(0.03, 0.2999, sample.max_q_shift)
    sample.print_solution()    
    sample.refine_all_fractions()
    sample.update_solution(0.1, 0.2999, sample.max_q_shift)
    sample.print_solution()
#     sample.plot(perphase=True)
    samples.append(sample) 

In [None]:

for sample in samples:
    sample.update_solution(0.015,0.2999, sample.max_q_shift)
    sample.print_solution()
    

In [None]:
comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
nn_list = {i: np.where((comp_dist[i] < 0.15) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
new_samples=deepcopy(samples)
samples_neighbor = []
for sample in new_samples:    
    if len(sample.phase_fractions)==0:
        sample.solution = new_samples[sample.sample_id-1].solution    
    candidate_entries = []
    for i in nn_list[sample.sample_id]:
        candidate_entries += new_samples[i].entries
    candidate_entries = list(set(candidate_entries))
    solution = []
    for e in candidate_entries:
        phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
        solution.append(phase)
    new_sample = deepcopy(sample)
    new_sample.solution = solution  
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()    
    new_sample.update_solution(0.03, 0.2999, new_sample.max_q_shift)
    new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
    new_sample.update_solution(0.01, 0.2999,sample.max_q_shift)
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()
    new_sample.update_solution(0.01, 0.2999,sample.max_q_shift)
    new_sample.print_solution()
    new_sample.plot(perphase=True)
    samples_neighbor.append(new_sample)

In [None]:
samples = deepcopy(samples_neighbor)

In [None]:

for i,sample in enumerate(samples):
    sample.update_solution(0.15, 0.1999,sample.max_q_shift)
    

In [None]:
comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
nn_list = {i: np.where((comp_dist[i] < 0.5) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
new_samples=deepcopy(samples)
samples_neighbor = []
for sample in new_samples:    
    if len(sample.phase_fractions)==0:
        sample.solution = new_samples[sample.sample_id-1].solution    
    candidate_entries = []
    for i in nn_list[sample.sample_id]:
        candidate_entries += new_samples[i].entries
    candidate_entries = list(set(candidate_entries))
    solution = []
    for e in candidate_entries:
        phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
        solution.append(phase)
    new_sample = deepcopy(sample)
    new_sample.solution = solution  
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()    
    new_sample.update_solution(0.03, 0.1999, new_sample.max_q_shift)
#     new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
#     new_sample.update_solution(0.01, 0.1999,sample.max_q_shift)
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()
    new_sample.update_solution(0.01, 0.2999,new_sample.max_q_shift)
    new_sample.print_solution()
    new_sample.plot(perphase=True)
    samples_neighbor.append(new_sample)

In [None]:
for i in range(instance_data.sample_num):
    sample = samples[i]
    solution_file = f'solution/samples{i}.json'
    with open(solution_file, 'w') as f:
        json.dump(sample, f, cls=MontyEncoder)
        sample.plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')

In [None]:
samples = []
for i in range(instance_data.sample_num):
    solution_file = f'solution/samples{i}.json'
    with open(solution_file) as f:
        sample = json.load(f, cls=MontyDecoder)
    if sample.sample_id !=i:
        print (i)    
    samples.append(sample)

In [None]:
comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
nn_list = {i: np.where((comp_dist[i] < 0.5) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
new_samples=deepcopy(samples)
samples_neighbor = []
for sample in new_samples:    
    if len(sample.phase_fractions)==0:
        sample.solution = new_samples[sample.sample_id-1].solution    
    candidate_entries = []
    for i in nn_list[sample.sample_id]:
        candidate_entries += new_samples[i].entries
    candidate_entries = list(set(candidate_entries))
    solution = []
    for e in candidate_entries:
        phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
        solution.append(phase)
    new_sample = deepcopy(sample)
    new_sample.solution = solution  
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()    
    new_sample.update_solution(0.02, 0.1999, new_sample.max_q_shift)
#     new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
#     new_sample.update_solution(0.01, 0.1999,new_sample.max_q_shift)
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()
    new_sample.update_solution(0.01, 0.1999,new_sample.max_q_shift)
    new_sample.print_solution()
    new_sample.plot(perphase=True)
    samples_neighbor.append(new_sample)

In [None]:
comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
nn_list = {i: np.where((comp_dist[i] < 0.5) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
new_samples=deepcopy(samples_neighbor)
samples = []
for sample in new_samples:    
    if len(sample.phase_fractions)==0:
        sample.solution = new_samples[sample.sample_id-1].solution    
    candidate_entries = []
    for i in nn_list[sample.sample_id]:
        candidate_entries += new_samples[i].entries
    candidate_entries = list(set(candidate_entries))
    solution = []
    for e in candidate_entries:
        phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
        solution.append(phase)
    new_sample = deepcopy(sample)
    new_sample.solution = solution  
#     new_sample.refine_all_fractions()
#     new_sample.refine_one_by_one()    
#     new_sample.update_solution(0.02, 0.2999, new_sample.max_q_shift)
#     new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
#     new_sample.update_solution(0.01, 0.2999,new_sample.max_q_shift)
    new_sample.refine_all_fractions()
    new_sample.refine_one_by_one()
    new_sample.update_solution(0.01, 0.2999,new_sample.max_q_shift)
    new_sample.print_solution()
    new_sample.plot(perphase=True)
    samples.append(new_sample)

In [None]:
for i in range(instance_data.sample_num):
    sample = samples[i]
    solution_file = f'solution/samples{i}.json'
    with open(solution_file, 'w') as f:
        json.dump(sample, f, cls=MontyEncoder)
        sample.plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')

In [None]:
def check(sample,new_sample,loss_weight):
    
    if new_sample.loss(loss_weight) < sample.loss(loss_weight):
        print ('Better solution found!')
        #new_sample.print_loss(loss_weight)
        return deepcopy(new_sample)
    else:
        return deepcopy(sample)

In [None]:
for i in range(instance_data.sample_num):
    sample = samples[i]
    solution_file = f'solution/samples{i}.json'
    new_sample = deepcopy(sample)    
    new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
    new_sample.update_solution(0.02, 0.2999,sample.max_q_shift)

    sample = check(sample,new_sample,loss_weight)    
    sample.refine_one_by_one()
    sample.refine_all_fractions()
    sample.plot(perphase=True)
    with open(solution_file, 'w') as f:
        json.dump(sample, f, cls=MontyEncoder)
        sample.plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')

In [None]:
comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
nn_list = {i: np.where((comp_dist[i] < 0.5) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}

In [None]:
for sample in samples:
    if True:
        # if min([count_act[_.entry_id] for _ in sample.entries])<5.0:
        if sample.R > 0.4:
#         print(sample.sample_id, sample.loss(loss_weight))
            candidate_entries = []
            for i in nn_list[sample.sample_id]:
                candidate_entries += samples[i].entries
            candidate_entries = list(set(candidate_entries))
            solution = []
            for e in candidate_entries:
                phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
                solution.append(phase)
    
            new_sample = deepcopy(sample)
            new_sample.solution = solution
            # au = new_sample.to_auto
            new_sample = new_sample.optimize(num_epoch=500, print_prog=True, loss_weight=loss_weight)
            new_sample.update_solution(0.03, 0.2999, new_sample.max_q_shift)
    
            new_sample = new_sample.optimize(num_epoch=3000, print_prog=True, loss_weight=loss_weight)
            new_sample.update_solution(0.01, 0.2999, new_sample.max_q_shift)
    
            new_sample.refine_one_by_one()
            new_sample.refine_all_fractions()
    
            new_sample.update_solution(0.01, 0.2999, new_sample.max_q_shift)
            print(new_sample.loss(loss_weight))
            if new_sample.loss(loss_weight) <= sample.loss(loss_weight):
                sample.print_solution()
                new_sample.print_solution()
                samples[sample.sample_id] = new_sample
                solution_file = f'solution/samples{sample.sample_id}.json'
                with open(solution_file, 'w') as f:
                    json.dump(samples[sample.sample_id], f, cls=MontyEncoder)
                    samples[sample.sample_id].plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')

In [None]:
solution_statistics(samples)

In [None]:
def solution_statistics(samples_solutions):
    from collections import defaultdict
#     set(sample.entries)
    activated_entries =set()
    for sample in samples_solutions:
        activated_entries = activated_entries | set(sample.entries)
    
    max_act = defaultdict(float)
    min_act = defaultdict(float)
    tot_act = defaultdict(float)
    count_act = defaultdict(int)
    ref = defaultdict(list)
    for sample in samples_solutions:
        norm = sum([p.fraction for p in sample.solution])
        for phase in sample.solution:
            tot_act[phase.entry.entry_id]+=phase.fraction/norm
            count_act[phase.entry.entry_id]+=1
            max_act[phase.entry.entry_id] = max(phase.fraction/norm,max_act[phase.entry.entry_id])
            min_act[phase.entry.entry_id] = min(phase.fraction/norm,max_act[phase.entry.entry_id])
            ref[phase.entry.entry_id].append([phase.fraction/norm,sample.sample_id])
            ref[phase.entry.entry_id] = sorted(ref[phase.entry.entry_id],reverse=True)
    # c1 = np.array([sample.R for sample in samples])
    # c2 = np.array([sample.comp_loss for sample in samples])
    # c3 = np.array([sample.entropy_loss for sample in samples])
    # shifts = np.array([sorted([_.shift for _ in sample.solution],key=lambda x:-abs(x))[0] for sample in samples])
    from pandas import DataFrame
    entry_ids = [_.entry_id for _ in activated_entries]
    df = DataFrame(data={
        'entry_id':entry_ids,
        'tot':[tot_act[i] for i in entry_ids],
        'max':[max_act[i] for i in entry_ids],
        'min':[min_act[i] for i in entry_ids],
        'count':[count_act[i] for i in entry_ids],
        'names':[_.name for _ in activated_entries],
    #     'sample':[ref[i] for i in entry_ids]
                        })
    df.sort_values(by=['tot','count'],ascending=False,inplace=True)
    df.reset_index(drop=True,inplace=True)
    print(df)

In [None]:
# comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
# nn_list = {i: np.where((comp_dist[i] < 0.15) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
# new_samples=deepcopy(samples)
# samples_neighbor = []
# for sample in new_samples:    
#     if len(sample.phase_fractions)==0:
#         sample.solution = new_samples[sample.sample_id-1].solution
#     if len(sample.phase_fractions)>0:
#         candidate_entries = []
#         for i in nn_list[sample.sample_id]:
#             candidate_entries += new_samples[i].entries
#         candidate_entries = list(set(candidate_entries))
#         solution = []
#         for e in candidate_entries:
#             phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
#             solution.append(phase)
#     new_sample = deepcopy(sample)
#     new_sample.solution = solution  
#     new_sample.refine_all_fractions()
#     new_sample.refine_one_by_one()    
#     new_sample.update_solution(0.03, 0.2999, new_sample.max_q_shift)
#     new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
#     new_sample.update_solution(0.01, 0.2999,sample.max_q_shift)
#     new_sample.refine_all_fractions()
#     new_sample.refine_one_by_one()
#     new_sample.print_solution()
#     new_sample.plot(perphase=True)
#     samples_neighbor.append(new_sample)

In [None]:
# new_samples_neighbor=deepcopy(samples_neighbor)
# for sample in new_samples_neighbor:  
#     sample.print_solution()
#     sample.plot(perphase=True)
#     solution_file = f'solution/samples{sample.sample_id}.json'
#     with open(solution_file, 'w') as f:
#         json.dump(sample, f, cls=MontyEncoder)
#     sample.plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')

In [None]:
samples = []
for i in range(instance_data.sample_num):
    solution_file = f'solution/samples{i}.json'
    with open(solution_file) as f:
        sample = json.load(f, cls=MontyDecoder)
    if sample.sample_id !=i:
        print (i)    
    samples.append(sample)

In [None]:
# comp_dist = distance_matrix(instance_data.sample_comp, instance_data.sample_comp)
# nn_list = {i: np.where((comp_dist[i] < 0.15) & (comp_dist[i] > 0))[0] for i in range(instance_data.sample_num)}
# new_samples=deepcopy(samples)
# samples_neighbor = []
# for sample in new_samples:    
#     if len(sample.phase_fractions)==0:
#         sample.solution = new_samples[sample.sample_id-1].solution
#     if len(sample.phase_fractions)>0:
#         candidate_entries = []
#         for i in nn_list[sample.sample_id]:
#             candidate_entries += new_samples[i].entries
#         candidate_entries = list(set(candidate_entries))
#         solution = []
#         for e in candidate_entries:
#             phase = Phase.from_entry_and_instance_data(e, 1 / len(candidate_entries), instance_data)
#             solution.append(phase)
#     new_sample = deepcopy(sample)
#     new_sample.solution = solution  
#     new_sample.refine_all_fractions()
#     new_sample.refine_one_by_one()    
#     new_sample.update_solution(0.05, 0.2999, new_sample.max_q_shift)
# #     new_sample = new_sample.optimize(num_epoch=500, print_prog=True,loss_weight=loss_weight)
# #     new_sample.update_solution(0.01, 0.2999,sample.max_q_shift)
#     new_sample.refine_all_fractions()
#     new_sample.refine_one_by_one()
#     new_sample.update_solution(0.02, 0.2999, new_sample.max_q_shift)
#     new_sample.print_solution()
#     new_sample.plot(perphase=True)
#     samples_neighbor.append(new_sample)

In [None]:
# new_samples_neighbor=deepcopy(samples_neighbor)
# for sample in new_samples_neighbor:  
#     sample.print_solution()
#     sample.plot(perphase=True)
#     solution_file = f'solution/samples{sample.sample_id}.json'
#     with open(solution_file, 'w') as f:
#         json.dump(sample, f, cls=MontyEncoder)
#     sample.plot(perphase=True, saveplot=f'solution_figures/sample_{sample.sample_id}.pdf')