In [7]:
import numpy as np
import matplotlib.pyplot as plt
import sys
import os
from copy import deepcopy
from scipy.constants import h, c, e
from monty.json import MontyDecoder, MontyEncoder

In [3]:
"""
The Bi-Cu-V dataset is from "Automating crystal-structure phase mapping by combining deep learning with constraint reasoning" 
and its github:https://github.com/gomes-lab/DRNets-Nature-Machine-Intelligence

In the following, **P** is the number of pure phases provided in the ICDD library,
**M** is the number of elements in the system, 
**N** is the number of data points, 
**Q** is the number of diffraction scattering vector magnitudes (angles) with measurements in each XRD pattern, 
**Q'** is the downsampled length, and **K** is the expected maximum number of phases present in the solution.

ICDD Library-100 entries:
    **bases_comp.npy** :a matrix of size (**P**, **M**), e.g., (100, 3) for Bi-Cu-V.
        The elemental compositions of the possible pure phases in the ICDD library.
    **bases_edge.npy** : a matrix of size (**P**, **P**), e.g., (100, 100) for Bi-Cu-V.  
        The similarity matrix of possible phases. If two phases are linked, then they are very similar to each other and can be considered interchangeable.
    **bases_name.npy** : a vector of size (**P**), e.g., (100) for Bi-Cu-V.  
        The names of the possible pure phases in the ICDD library.
    **sticks_lib.npy** : an object array of size (**P**).  
        The stick pattern or list of peaks (Q, intensity) for each ICDD library phase.

    
Instance_data-307 samples:
    **composition.npy** : a matrix of size (**N**, **M**), e.g., (307, 3) for Bi-Cu-V.  
        The elemental composition of the mixed material in each data point.
    **data.npy** : a matrix of size (**N**, **M**+**Q**), e.g., (307, 3 + 1197) for Bi-Cu-V.  
        For each row, the composition (length **M**) is concatenated with the XRD pattern (length **Q**), i.e. the XRD intensity at each diffraction angle.
    **lib_comp.npy** : a matrix of size (**P**, **M**), e.g., (100, 3) for Bi-Cu-V.  
        The elemental compositions of the possible pure phases in the ICDD library. This is the same as bases_comp.npy.
    **Q.npy** : a vector of length (**Q**).  
        The XRD scattering vector magnitudes (angles) for the XRD patterns.
    **Q_XXX.npy** : a vector of length (**Q'**).  
        The downsampled XRD scattering vector magnitudes (angles) for lower resolution versions of the XRD patterns.
    **XRD.npy** : a matrix of size (**N**, **Q**) 
        The unnormalized XRD patterns for each data point.   


"""
# ICDD Library-100 entries:
bases_comp = np.load('bases_comp.npy', allow_pickle = True)
bases_edge = np.load('bases_edge.npy',allow_pickle = True)
bases_name = np.load('bases_name.npy',allow_pickle = True)
sticks_lib = np.load('sticks_lib.npy',allow_pickle = True)

# Instance_data-307 samples:
composition = np.load('composition.npy',allow_pickle = True)
data_com_xrd = np.load('data.npy',allow_pickle = True)
lib_comp = np.load('lib_comp.npy',allow_pickle = True)
Q= np.load('Q.npy',allow_pickle = True)
Q_300= np.load('Q_300.npy',allow_pickle = True)
Q_idx_300 = np.load('Q_idx_300.npy',allow_pickle = True)
XRD = np.load('XRD.npy',allow_pickle = True)


In [9]:
import json

def creat_Instance_data_Bi_Cu_V(index, q,sample_xrd,comp):
    dict_Instance_data_info = {
                       'q': q,                      
                       'sample_xrd': sample_xrd, 
                       'comp': comp                       
                      }
    dict_connection = {'Instance_data_info': dict_Instance_data_info}
    dict_index = {'index': index}  
    data_instance = dict( dict_index, **dict_connection)
    return data_instance


if __name__ == '__main__':
    Instance_data_Bi_Cu_V=[]
    for i in range(len(composition)):        
        index = i
        q = Q.tolist()      
        sample_xrd = data_com_xrd[i][3:].tolist()
        comp = composition[i].tolist()  
        json_founc = creat_Instance_data_Bi_Cu_V
        data_instance = json_founc(index, q, sample_xrd,comp)         
        Instance_data_Bi_Cu_V.append(data_instance)
        
    Instance_data_Bi_Cu_V = json.dumps(Instance_data_Bi_Cu_V,cls = MontyEncoder)
    with open('Instance_data_Bi_Cu_V.json','w+') as file:
            file.write(Instance_data_Bi_Cu_V)  

In [10]:
#load instance data: 307 instance of Bi_Cu_V
with open('Instance_data_Bi_Cu_V.json') as f:
    Instance_data_Bi_Cu_V = json.load(f, cls=MontyDecoder)

In [13]:
#Get the scattering vector q and intensity from the stick_lib in ICDD 
qs = []
amps=[]
for i in range(len(sticks_lib)):
    q,amp = list(zip(*(sticks_lib[i])))
    qs.append(q)
    amps.append(amp)

In [14]:
# Get the samples' name, ICDD entry_id, crystal_system by spliting bases_name,i.e.bases_name[0]:'0+Bi(VO4)_04-010-5710_Tetragonal.txt'
names = []
entry_ids = []
crystal_systems = []
bases_name_copy = deepcopy(bases_name)

for i in range(len(bases_name_copy)):
    list1 = bases_name_copy[i].split('_')
    list2 = list1[0].split('+')
    list3 = list1[2].split('.')
    names.append(list2[1])
    entry_ids.append(list1[1])
    crystal_systems.append(list3[0])

In [15]:


def creat_ICDD_entries_Bi_Cu_V(index, q,amp, xrd, comp,base_name,name,entry_id,crystal_system):
    dict_entries_info = {
                       'q': q,
                       'amp':amp,
                       'xrd': xrd, 
                       'comp': comp, 
                        'base_name':base_name, 
                        'name':name,
                        'entry_id':entry_id,
                        'crystal_system':crystal_system
                      }
    dict_connection = {'entries_info': dict_entries_info}
    dict_index = {'index': index}  
    data = dict( dict_index, **dict_connection)
    return data


if __name__ == '__main__':
    ICDD_entries_Bi_Cu_V=[]
    for i in range(len(bases_comp)):        
        index = i
        q = np.array(qs[i]).tolist()
        amp = np.array(amps[i]).tolist()
        xrd = sticks_lib[i].tolist()
        comp = bases_comp[i].tolist()
        base_name = np.array(bases_name[i]).tolist()
        name = np.array(names[i]).tolist()
        entry_id = np.array(entry_ids[i]).tolist()
        crystal_system = np.array(crystal_systems[i]).tolist()
        json_founc = creat_ICDD_entries_Bi_Cu_V
        data = json_founc(index, q,amp, xrd, comp, base_name,name,entry_id,crystal_system)         
        ICDD_entries_Bi_Cu_V.append(data)
        
    ICDD_entries_Bi_Cu_V = json.dumps(ICDD_entries_Bi_Cu_V)
    with open('ICDD_entries_Bi_Cu_V.json','w+') as file:
            file.write(ICDD_entries_Bi_Cu_V)  

In [16]:
#load entry pool: 100 ICDD entries
with open('ICDD_entries_Bi_Cu_V.json') as f:
    entries_Bi_Cu_V = json.load(f, cls=MontyDecoder)