In [1]:
# Import CSPML module.
import CSPML

# Import libraries.
import pandas as pd
import numpy as np
import pickle
import os

In [2]:
# Set the 38 benchmark sets (chemical formulas for crystal structure prediction).
query_formula = ['Ag8GeS6','Al2O3','BN','Ba(FeAs)2','Ba2CaSi4(BO7)2','Bi2Te3','C','Ca14MnSb11','CaCO3','Cd3As2',
 'CoSb3','CsPbI3','Cu12Sb4S13','Fe3O4','GaAs','GeH4','La2CuO4','Li3PS4','Li4Ti5O12','LiBF4','LiCoO2','LiFePO4',
 'LiPF6','MgB7','Mn(FeO2)2','NaCaAlPHO5F2','Si','Si3N4','SiO2','SrTiO3','TiO2','V2O5','VO2','Y3Al5O12','ZnO',
 'ZnSb','ZrO2','ZrTe5'] # (N=38)

# Load candidate compounds (N=33,115) used in the paper (https://doi.org/10.1016/j.commatsci.2022.111496).
with open("./data_set/candidates_paper.pkl", "rb") as f:
    candidates_paper = pickle.load(f)

In [3]:
# Predict stable structures for the given query formulas using the candidate compounds used in the paper.
# In the example below, up to 12 structures are suggested for each query fomula.
predicted = CSPML.Structure_prediction(query_formula, 12, candidates_paper)

i = 10
j = 0
print(f"The top-{j+1}th predicted structure for {query_formula[i]} is shown below;\n{predicted[i][j]}")

None of the candidates had the class probabilities greater than 0.5 at Ba2CaSi4(BO7)2.
None of the candidates had the class probabilities greater than 0.5 at MgB7.
None of the candidates had the same composition ratio as NaCaAlPHO5F2.
The top-1th predicted structure for CoSb3 is shown below;
Full Formula (Co4 Sb12)
Reduced Formula: CoSb3
abc   :   7.948651   7.948651   7.948651
angles: 109.471221 109.471221 109.471221
Sites (16)
  #  SP           a         b         c    magmom
---  ----  --------  --------  --------  --------
  0  Co    0.5       0.5       0.5          1.047
  1  Co    0.5       0         0            0.95
  2  Co    0         0         0.5          0.979
  3  Co    0         0.5       0            0.994
  4  Sb    0.669087  0.840293  0.50938     -0.001
  5  Sb    0.840293  0.50938   0.669087    -0.007
  6  Sb    0.50938   0.669087  0.840293    -0.007
  7  Sb    0.669087  0.159707  0.828795    -0.002
  8  Sb    0.840293  0.171205  0.330913    -0.01
  9  Sb    0.159707

In [4]:
# If SI = True, the supplementary information of the predicted structures are also returned.
# In the example below, up to 5 structures are suggested for each query fomula.
predicted, SI = CSPML.Structure_prediction(query_formula, 5, candidates_paper, SI=True)

print(f"The top-{j+1}th predicted structure for {query_formula[i]} is shown below; \n{predicted[i][j]}")
print(f"This predicted structure was generated by element-substitution of the template structure; \nformula = {SI[i]['topK_formula'][j]}, material id = {SI[i]['topK_id'][j]}")

None of the candidates had the class probabilities greater than 0.5 at Ba2CaSi4(BO7)2.
None of the candidates had the class probabilities greater than 0.5 at MgB7.
None of the candidates had the same composition ratio as NaCaAlPHO5F2.
The top-1th predicted structure for CoSb3 is shown below; 
Full Formula (Co4 Sb12)
Reduced Formula: CoSb3
abc   :   7.948651   7.948651   7.948651
angles: 109.471221 109.471221 109.471221
Sites (16)
  #  SP           a         b         c    magmom
---  ----  --------  --------  --------  --------
  0  Co    0.5       0.5       0.5          1.047
  1  Co    0.5       0         0            0.95
  2  Co    0         0         0.5          0.979
  3  Co    0         0.5       0            0.994
  4  Sb    0.669087  0.840293  0.50938     -0.001
  5  Sb    0.840293  0.50938   0.669087    -0.007
  6  Sb    0.50938   0.669087  0.840293    -0.007
  7  Sb    0.669087  0.159707  0.828795    -0.002
  8  Sb    0.840293  0.171205  0.330913    -0.01
  9  Sb    0.15970

In [5]:
# Make new directory for saving .cif files of the predicted structures.
os.mkdir("predicted_structures_paper")

# If save_cif = True, the .cif files of the predicted structures are automatically saved into save_cif_filename.
# In the example below, up to 5 structures are suggested for each query fomula.
predicted = CSPML.Structure_prediction(query_formula, 5, candidates_paper, save_cif = True, 
                                  save_cif_filename = "predicted_structures_paper")

print(f"The top-{j+1}th predicted structure for {query_formula[i]} is saved as a {query_formula[i]}_{j+1}.cif.")

None of the candidates had the class probabilities greater than 0.5 at Ba2CaSi4(BO7)2.
None of the candidates had the class probabilities greater than 0.5 at MgB7.
None of the candidates had the same composition ratio as NaCaAlPHO5F2.
The top-1th predicted structure for CoSb3 is saved as a CoSb3_1.cif.


In [6]:
# For general use.

# Make new directory for saving .cif files of the predicted structures.
os.mkdir("predicted_structures")

# Perform structure prediction with embedded candidate compounds in the CSPML module.
# Since the embedded candidate compounds (=CSPML.MP_candidates; N=33,153) contains true structures for 
# all query formulas defined in this program, their true structures are suggested as top-1th predicted structures.  
# In the example below, up to 6 structures are suggested for each query fomula.

predicted, SI = CSPML.Structure_prediction(query_formula, 6, SI = True, save_cif = True, 
                                  save_cif_filename = "predicted_structures")

# The simplest form is "predicted= CSPML.Structure_prediction(query_formula, 6)".
# Since the candidate set is embedded in the module, the user only needs to set "query_formula" and "top_K".
# Since candidates_paper is a subset of the embedded candidate compounds (candidates_paper is for reproducing
# the result of the paper), if you use this module for general use, you should use this form.

In [7]:
# For reproducing the results of the crystal structure prediction of the 50 randomly selected benchmark sets
# presented in the paper.

# The 50 randomly selected query compositions.
random50 = ['Ac2SnHg','Al6Ru','Ba2MgTl','Ba3Rh(CN)3','BaMg','BaNaP','Be17Ru3','Ca2Al2Sn2O9','Ca2As4Xe5F34','Cd3TeO6',
 'CdPtF6','Cr2GaC','CsAg2(B5O8)3','Er4MgNi','Eu2Mo(WO6)2','EuCoO3','Ho(Al5Ru)2','Ho6Al7Cu16','HoFeSi','K3Y3(BO3)4',
 'K4U5Te2O21','Li2BPt3','Li2DyIn','Li2SO4','Li7PN4','LiCeHg2','LiGdO2','LiIr','LuBiPd','Mg2Ga5Cu6','MgSn4Ru',
 'Mn3Sb5(IO3)3','Na2Ga2As3','NaCaVO4','NdEuCuS3','NpAs2','Pr9Ga5S21','Rb12Sn2As6O','Rb2ScInCl6','Rb3H(SO4)2','SmH3',
 'SrHI','SrNdVO4','Tb3Ni','TiMnSi2','TmRhO3','Y2GeRh3','Y2Te5O13','ZnH2SO5','Zr5CuSb3'] # (N=50)

# Load preset 33,153 candidate compounds.
with open("./data_set/MP_candidates.pkl", "rb") as f:
    MP_candidates = pickle.load(f)

# Prepare the candidate templates for the 50 query compositions.
random50_property = MP_candidates["property"][np.logical_not(MP_candidates["property"]["pretty_formula"].isin(random50))].reset_index(drop=True)
random50_composition = MP_candidates["composition"][np.logical_not(MP_candidates["property"]["pretty_formula"].isin(random50))].reset_index(drop=True)
random50_descriptor = MP_candidates["descriptor"][np.logical_not(MP_candidates["property"]["pretty_formula"].isin(random50))].reset_index(drop=True)

candidate_random50 = {'property':random50_property, 'composition':random50_composition, 'descriptor':random50_descriptor}

# Crystal structure prediction for the 50 query compositions.
# Make new directory for saving .cif files of the predicted structures.
os.mkdir("predicted_random50")

# If save_cif = True, the .cif files of the predicted structures are automatically saved into save_cif_filename.
# In the example below, up to 5 structures are suggested for each query fomula.
predicted_50, SI_50 = CSPML.Structure_prediction(random50, 5, candidate_random50, save_cif = True, SI = True,
                                  save_cif_filename = "predicted_random50")

# Note that the crystal structure data (CIF files) provided in the Supplementary Data (https://doi.org/10.1016/j.commatsci.2022.111496)
# are locally optimized suructures of the above prediction results using DFT calculations.

None of the candidates had the same composition ratio as Ca2As4Xe5F34.
None of the candidates had the same composition ratio as CsAg2(B5O8)3.
None of the candidates had the same composition ratio as K4U5Te2O21.
None of the candidates had the same composition ratio as Mn3Sb5(IO3)3.
