In [628]:
import pandas as pd
import numpy as np
import plotly as pt
import seaborn as sns
!pip install pymatgen
!pip install mp_api
import requests
import json
!pip install pymatgen nglview



In [629]:
from google.colab import output
output.enable_custom_widget_manager()

In [630]:
df = pd.read_excel("/content/drive/MyDrive/University/Artificial intelligence in chemistry/Perovskite project/Perovskite-liked-oxides-bandgap-prediction/Data/Perovskite dataset export.xlsx",sheet_name='Photocatalytic dataset')

In [631]:
df.columns

Index(['Perovskite', 'Hill formula', 'Interlayer space composition',
       'Bandgap, eV', 'DOI', 'Materials Project ID', 'COD_ID', 'Springer_ID',
       'MP_CIF_modifier', 'COD_CIF_modifier', 'Springer_CIF_modifier',
       'Materials Project verification', 'COD verification',
       'Springer verification', 'General verification', 'MP_CIF_modified',
       'COD_CIF_modified', 'Springer_CIF_modified', 'Z', 'Z_MP', 'Z_COD',
       'Z_Springer', 'a, A', 'b, A', 'c, A', 'Symmetry group', 'd,A', 'a_MP',
       'b_MP', 'c_MP', 'a_COD', 'b_COD', 'c_COD', 'a_Springer', 'b_Springer',
       'c_Springer', 'Number of octahedrons on a layer', 'Valence electrons',
       'Volume', 'Volume_MP', 'Volume_COD', 'Volume_Springer',
       'Valence Electrons Density', 'Valence Electrons Density_MP',
       'Valence Electrons Density_COD', 'Springer_Valence Electrons Density',
       'avg s valence electrons', 'avg p valence electrons',
       'avg d valence electrons', 'avg f valence electrons',
       

In [632]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1089 entries, 0 to 1088
Data columns (total 88 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Perovskite                            1089 non-null   object 
 1   Hill formula                          730 non-null    object 
 2   Interlayer space composition          5 non-null      object 
 3   Bandgap, eV                           1054 non-null   float64
 4   DOI                                   1084 non-null   object 
 5   Materials Project ID                  1031 non-null   object 
 6   COD_ID                                953 non-null    object 
 7   Springer_ID                           248 non-null    object 
 8   MP_CIF_modifier                       332 non-null    object 
 9   COD_CIF_modifier                      197 non-null    object 
 10  Springer_CIF_modifier                 107 non-null    object 
 11  Materials Project

In [633]:
from pymatgen.core.structure import Structure
from pymatgen.core import Composition
from pymatgen.core.periodic_table import Element
import os
import re
import nglview as nv
from pymatgen.io.ase import AseAtomsAdaptor


In [634]:
subs_map = {
    "Ph": "C6H5",
    "Bn": "C7H7",
    #"Pr": "C3H7",
    "Bu": "C4H9",
    "Hx": "C6H13",
    "Me": "CH3",
    "Et": "C2H5",
    "Oc": "C8H17",
    "Dc": "C10H21",
}

import re

def expand_substituents(formula):
    if pd.isna(formula):
        return formula

    for abbr, full in subs_map.items():
        formula = re.sub(rf'{abbr}', full, formula)
    return formula

In [635]:
print(df.shape[0])
#df = df[~df['Perovskite'].str.contains("Nx", na=False)]
#df = df[~df['Perovskite'].str.contains("Ox", na=False)]
print(df.shape[0])
df['Perovskite'] = df['Perovskite'].apply(expand_substituents)

1089
1089


In [636]:
def getStructureFromCIF(cif_file_name):
  if(cif_file_name==-1):
    return 0
  file_path=f"/content/drive/MyDrive/University/Artificial intelligence in chemistry/Perovskite project/Perovskite-liked-oxides-bandgap-prediction/Data/CIF/{cif_file_name}.cif"
  if os.path.exists(file_path):
    try:
      structure = Structure.from_file(file_path)
    except:
      print('ERROR: Invalid structure for ',cif_file_name)
      return None
  else:
    return None

  if(structure == None):
    return None
  return structure

In [637]:
def eliminateAsterisksDromFormula(formula):
  parts = formula.split('*')
  main_formula = parts[0]
  if len(parts) == 1:
    return main_formula
  print("------------")
  print(formula)
  hydrate_part = parts[1]
  print(hydrate_part)
  #match = re.match(r'(\d+)([A-Za-z0-9]+)', hydrate_part)
  #match = re.match(r'([0-9]*\.?[0-9]+)([A-Za-z0-9]+)', hydrate_part)
  match = re.match(r'([0-9]*\.?[0-9]*)?([A-Za-z0-9]+)', hydrate_part)
  if not match:
     raise ValueError(f"Cannot parse hydrate: {hydrate_part}")
  number_str = match.group(1)
  n = float(number_str) if number_str else 1.0
  molecule = match.group(2)
  print(n)
  print(molecule)
  comp = Composition(molecule)
  comp *= n
  total_formula = Composition(main_formula) + comp
  print(total_formula)
  print("------------")
  return total_formula.reduced_composition

In [638]:
print(eliminateAsterisksDromFormula("CuSO4*5H2O"))
print(eliminateAsterisksDromFormula("CuSO4*0.25H2O"))

------------
CuSO4*5H2O
5H2O
5.0
H2O
Cu1 S1 O9 H10
------------
Cu1 S1 O9 H10
------------
CuSO4*0.25H2O
0.25H2O
0.25
H2O
Cu1 S1 O4.25 H0.5
------------
Cu1 S1 O4.25 H0.5


In [639]:
composition_relative_tolerance = 0.05

def checkCompositionStructureMatching(formula,cif_file_name):
  print('checkCompositionStructureMatching: entry')
  structure = getStructureFromCIF(cif_file_name)
  print('checkCompositionStructureMatching: structure is get')
  #print(structure)
  if(structure == None or structure==0):
    return False
  composition = structure.composition
  formula = eliminateAsterisksDromFormula(formula)
  try:
    composition_formula = Composition(formula)
  except:
    return False
  #print(type(composition))
  #print(type(composition_formula))
  #print(composition," || ", composition_formula, " = ")
  #print(composition_formula)
  #same = composition.reduced_composition == composition_formula.reduced_composition

  print(composition.items())
  factors = []
  for el, amt in composition.items():
    amt2= composition_formula[el];
    print(el," comp1: ",amt, " comp2: ",amt2)
    factor= amt2/amt
    factors.append(factor)

  factors_std = np.std(factors)
  print("Compositino scaling factors: ",factor," std:",factors_std)
  #for el, amt in comp.items():
  #      if el.symbol == from_el:
   #         for new_el, frac in to_dict.items():
  #              new_dict[Element(new_el)] = amt * frac

  #same = composition_formula.almost_equals(composition,rtol=0.4)
  same=False
  if(composition_relative_tolerance>factors_std):
    same=True

  print("CIF file:",cif_file_name," || " ,composition," || ", composition_formula, " = ",same)
  return same

In [640]:
#checkCompositionStructureMatching("Nb6K4O15OO","mp-560692")
checkCompositionStructureMatching("Bi14W2O27","sd_1922152")
#checkCompositionStructureMatching("K4La1.332Ta4O14","sd_1810747")

checkCompositionStructureMatching: entry
ERROR: Invalid structure for  sd_1922152
checkCompositionStructureMatching: structure is get


  block = CifBlock.from_str(f"data_{block_str}")
  block = CifBlock.from_str(f"data_{block_str}")
'_atom_site_label'
  struct = parser.parse_structures(primitive=primitive)[0]
  self.symmetry_operations = self.get_symops(data)  # type:ignore[assignment]
  symbol = self._parse_symbol(label)
could not convert string to float: 't'
  struct = parser.parse_structures(primitive=primitive)[0]
  self.symmetry_operations = self.get_symops(data)  # type:ignore[assignment]
could not convert string to float: 't'
  struct = parser.parse_structures(primitive=primitive)[0]
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
t parsed as T
No structure parsed for section 3 in CIF.
could not convert string to float: 't'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
t parsed as T
No structure parsed for section 4 in CIF.
cou

False

#CIF modifier


In [641]:
def parse_stoichiometric_replacement(expr):
  expr = expr.replace(" ", "")
  if "->" not in expr:
        raise ValueError(f"Invalid expression (missing ->): {expr}")
  lhs, rhs = expr.split("->")
  print("LHS: ",lhs," RHS: ", rhs)

  # --- Parse LHS ---
  m = re.fullmatch(r"(?:(\d+(?:\.\d+)?))?([A-Z][a-z]?)", lhs)
  if not m:
      raise ValueError(f"Invalid LHS: {lhs}")

  lhs_coeff = float(m.group(1)) if m.group(1) else 1.0
  lhs_elem = m.group(2)

  # --- Parse RHS ---
  terms = rhs.split(",")
  rhs_counts = {}
  for term in terms:
        print('Term: ', term)
        #m = re.fullmatch(r"(\d+(?:\.\d+)?)([A-Z][a-z]?)", term)
        m = re.fullmatch(r"(?:(\d+(?:\.\d+)?))?([A-Z][a-z]?)", term)
        if not m:
            raise ValueError(f"Invalid RHS term: {term}")
        print("Term goups: ", m.group(1), "  ; ", m.group(2))
        coeff = 1
        if(m.group(1) is not None):
          coeff = float(m.group(1))
        elem = m.group(2)

        rhs_counts[elem] = rhs_counts.get(elem, 0.0) + coeff
  # --- Normalize RHS ---
  total_rhs = sum(rhs_counts.values())
  if total_rhs == 0:
      raise ValueError("RHS total stoichiometry is zero")

  rhs_fractions = {
      elem: coeff / total_rhs
      for elem, coeff in rhs_counts.items()
  }

  return {
      "from": lhs_elem,
      "total": lhs_coeff,
      "to": rhs_fractions
  }

In [642]:
inp = "Ta->Nb"
com = parse_stoichiometric_replacement(inp)
print(com)

inp = "Ta->0.5Nb,0.5Ta"
com = parse_stoichiometric_replacement(inp)
print(com)

LHS:  Ta  RHS:  Nb
Term:  Nb
Term goups:  None   ;  Nb
{'from': 'Ta', 'total': 1.0, 'to': {'Nb': 1.0}}
LHS:  Ta  RHS:  0.5Nb,0.5Ta
Term:  0.5Nb
Term goups:  0.5   ;  Nb
Term:  0.5Ta
Term goups:  0.5   ;  Ta
{'from': 'Ta', 'total': 1.0, 'to': {'Nb': 0.5, 'Ta': 0.5}}


In [643]:
def replace_element(comp, from_el, to_dict):
    print("Element replacement start: From ",from_el," To: ",to_dict)
    new_dict = {}

    for el, amt in comp.items():
        if el.symbol == from_el:
            for new_el, frac in to_dict.items():
                new_dict[Element(new_el)] = amt * frac
        else:
            new_dict[el] = amt
    output = Composition(new_dict)
    print("New comp: ", output)
    print("Element replacement is done!")
    return output

def modify_structure(structure, instruction):
  print("Start structure modification!")
  if(structure is None):
    print("Null structure")
    return None
  try:
    instructions = [cmd.strip() for cmd in instruction.split(";") if cmd.strip()]
        #old, new = instruction.split("->")
        #old = old.strip()
        #new = new.strip()
  except ValueError:
    raise ValueError("Failed to separte instructinos")
        #raise ValueError("Instruction must be of the form 'A->B', e.g. 'K->H'")
  output = structure
  for command in instructions:
    parsed_command = parse_stoichiometric_replacement(command)
    print("Parsed command: ", parsed_command)
    for site in structure:
      if site.is_ordered:
        print("Ordered site:", site.specie)
        if site.specie.symbol == parsed_command["from"]:
          site.species = {
              Element(el): frac
              for el, frac in parsed_command["to"].items()  #[TO DO]: not always 1:1 replacement
          }
      else:
        print("Disordered site:", site.species)
        print(site.species)
        print(type(site.species))
        species_comp = site.species;
        new_species_comp = replace_element(species_comp, parsed_command["from"], parsed_command["to"])
        site.species = new_species_comp
  print("Finish structure modification!")
  print("-------------------------------")
  print("-------------------------------")
  print("-------------------------------")
  print("-------------------------------")
  print("-------------------------------")
  return output


In [644]:
#s = getStructureFromCIF("sd_1810747")
#s
#s_new = modify_structure(s, "K->H")
s = getStructureFromCIF("sd_1958942")
s
s_new = modify_structure(s, "2Sr->Sr,Pb")
s_new.to("new_cif.cif","cif")

Start structure modification!
LHS:  2Sr  RHS:  Sr,Pb
Term:  Sr
Term goups:  None   ;  Sr
Term:  Pb
Term goups:  None   ;  Pb
Parsed command:  {'from': 'Sr', 'total': 2.0, 'to': {'Sr': 0.5, 'Pb': 0.5}}
Disordered site: Bi0.5 Sr0.5
Bi0.5 Sr0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Sr  To:  {'Sr': 0.5, 'Pb': 0.5}
New comp:  Bi0.5 Sr0.25 Pb0.25
Element replacement is done!
Disordered site: Bi0.5 Sr0.5
Bi0.5 Sr0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Sr  To:  {'Sr': 0.5, 'Pb': 0.5}
New comp:  Bi0.5 Sr0.25 Pb0.25
Element replacement is done!
Disordered site: Bi0.5 Sr0.5
Bi0.5 Sr0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Sr  To:  {'Sr': 0.5, 'Pb': 0.5}
New comp:  Bi0.5 Sr0.25 Pb0.25
Element replacement is done!
Disordered site: Bi0.5 Sr0.5
Bi0.5 Sr0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Sr  To:  {'Sr': 0.5, 'Pb': 0.5}
New

  if struct := self._get_structure(data, primitive, symmetrized, check_occu=check_occu):
No structure parsed for section 1 in CIF.
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]
  writer: Any = CifWriter(self, **kwargs)


"# generated using pymatgen\ndata_SrTa2Bi3PbClO11\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   3.91000000\n_cell_length_b   3.91000000\n_cell_length_c   18.49600000\n_cell_angle_alpha   90.00000000\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   SrTa2Bi3PbClO11\n_chemical_formula_sum   'Sr1 Ta2 Bi3 Pb1 Cl1 O11'\n_cell_volume   282.76869760\n_cell_formula_units_Z   1\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Sr  Sr_fix1  1  0.50000000  0.50000000  0.73255000  0.25\n  Bi  Sr_fix1  1  0.50000000  0.50000000  0.73255000  0.5\n  Pb  Sr_fix1  1  0.50000000  0.50000000  0.73255000  0.25\n  Sr  Sr_fix1  1  0.50000000  0.50000000  0.26745000  0.25\n  Bi  Sr_fix1  1  0.50000000  0.50000000  

In [645]:
def modify_CIF(cif_file_name, instruction):
  print('modify CIF: entry', cif_file_name)
  structure = getStructureFromCIF(cif_file_name)
  print('modify CIF: structure is get')
  if(structure == None or structure==0):
    return None
  new_structure = modify_structure(structure, instruction)
  return new_structure

def modify_all_CIFs(cif_input_column, instruction_column, cif_output_column, prefix):
  results = []
  counter=0
  col_idx = {name: i for i, name in enumerate(df.columns)}
  cif_i = col_idx[cif_input_column]
  instr_i = col_idx[instruction_column]

  for i, row in enumerate(df.itertuples(index=False, name=None), start=1):
    cif_input = row[cif_i]
    instruction = row[instr_i]
    print("CIF input: ", cif_input, " instruction: ", instruction)
    if pd.isna(instruction):
      print("No instruction")
      results.append(cif_input)
      continue
    new_CIF = modify_CIF(cif_input,instruction)
    if(new_CIF is None):
      results.append(("Invalid structure to modify: "+cif_input))
      continue
    new_CIF_name = "M_"+ prefix +str(counter)
    counter = counter +1
    file_path=f"/content/drive/MyDrive/University/Artificial intelligence in chemistry/Perovskite project/Perovskite-liked-oxides-bandgap-prediction/Data/CIF/{new_CIF_name}.cif"
    new_CIF.to(file_path,"cif")
    results.append(new_CIF_name)

  df[cif_output_column] = results
  print("Modified CIFs: ", counter)


In [646]:
modify_all_CIFs("Materials Project ID", "MP_CIF_modifier", "MP_CIF_modified", "MP")

CIF input:  mp-560692  instruction:  nan
No instruction
CIF input:  mp-1223501  instruction:  nan
No instruction
CIF input:  mp-553965  instruction:  nan
No instruction
CIF input:  mp-553248  instruction:  nan
No instruction
CIF input:  mp-557195  instruction:  nan
No instruction
CIF input:  mp-20396  instruction:  nan
No instruction
CIF input:  mp-581330  instruction:  nan
No instruction
CIF input:  mp-557195  instruction:  Ca->Sr
modify CIF: entry mp-557195
modify CIF: structure is get
Start structure modification!
LHS:  Ca  RHS:  Sr
Term:  Sr
Term goups:  None   ;  Sr
Parsed command:  {'from': 'Ca', 'total': 1.0, 'to': {'Sr': 1.0}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Orde

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  6Nb  RHS:  3Nb,3Ta
Term:  3Nb
Term goups:  3   ;  Nb
Term:  3Ta
Term goups:  3   ;  Ta
Parsed command:  {'from': 'Nb', 'total': 6.0, 'to': {'Nb': 0.5, 'Ta': 0.5}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-1104930  instruction:  Ti->0.5Zr, 0.5Ti
modify CIF: entry mp-1104930
modify CIF: structure is get
Start structure modification!
LHS:  Ti  RHS:  0.5Zr,0.5Ti
Term:  0.5Zr
Term goups:  0.5   ;  Zr
Term:  0.5Ti
Term goups:  0.5   ;  Ti
Parsed command:  {'from': 'Ti', 'total': 1.0, 'to': {'Zr': 0.5, 'Ti': 0.5}}
Ordered site: K
Ordered site: K
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-1104930  instruction:  Ti->0.7Zr, 0.3Ti
modify CIF: entry mp-1104930
modify CIF: structure is get
Start structure modification!
LHS:  Ti  RHS:  0.7Zr,0.3Ti
Term:  0.7Zr
Term goups:  0.7   ;  Zr
Term:  0.3Ti
Term goups:  0.3   ;  Ti
Parsed comm

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-6548  instruction:  3Ti->2.6Ti, 0.4Fe
modify CIF: entry mp-6548
modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RHS:  2.6Ti,0.4Fe
Term:  2.6Ti
Term goups:  2.6   ;  Ti
Term:  0.4Fe
Term goups:  0.4   ;  Fe
Parsed command:  {'from': 'Ti', 'total': 3.0, 'to': {'Ti': 0.8666666666666667, 'Fe': 0.13333333333333333}}
Ordered site: K
Ordered site: K
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-6548  instruction:  3Ti->2.5Ti, 0.5Fe
modify CIF: entry mp-6548
modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RHS:  2.5Ti,0.5Fe
Term:  2.5Ti
Te

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-10347  instruction:  Cs->K; Ca->Sr
modify CIF: entry mp-10347
modify CIF: structure is get
Start structure modification!
LHS:  Cs  RHS:  K
Term:  K
Term goups:  None   ;  K
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'K': 1.0}}
Ordered site: Cs
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ca  RHS:  Sr
Term:  Sr
Term goups:  None   ;  Sr
Parsed command:  {'from': 'Ca', 'total': 1.0, 'to': {'Sr': 1.0}}
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
----------

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-1223520  instruction:  K->H
modify CIF: entry mp-1223520
modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-553965  instruction:  nan
No instruction
CIF input:  mp-1222828  instruction:  nan
No instruction
CIF input:  mp-553965  instruction:  nan
No instruction
CIF input:  mp-1222828  instruction:  nan
No instruction
CIF input:  mp-553965  instruction:  nan
No instruction
CIF input:  mp-122282

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-13664  instruction:  2Ta->1.7Ta, 0.3Nb
modify CIF: entry mp-13664
modify CIF: structure is get
Start structure modification!
LHS:  2Ta  RHS:  1.7Ta,0.3Nb
Term:  1.7Ta
Term goups:  1.7   ;  Ta
Term:  0.3Nb
Term goups:  0.3   ;  Nb
Parsed command:  {'from': 'Ta', 'total': 2.0, 'to': {'Ta': 0.85, 'Nb': 0.15}}
Ordered site: Sr
Ordered site: Sr
Ordered site: Sr
Ordered site: Sr
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-13664  instruction:  2Ta->1.5Ta, 0.5Nb
modify CIF: entry mp-13664
modify CIF: structure is get
Start structure 

  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Nd  RHS:  Pr
Term:  Pr
Term goups:  None   ;  Pr
Parsed command:  {'from': 'Nd', 'total': 1.0, 'to': {'Pr': 1.0}}
Ordered site: Nd
Ordered site: Nd
Ordered site: Nd
Ordered site: Nd
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-4423  instruction:  2La-> Pr, La
modify CIF: entry mp-4423
modify CIF: structure is get
Start structure modification!
LHS:  2La  RHS:  Pr,La
Term:  Pr
Term goups:  None   ;  Pr
Term:  La
Term goups:  None   ;  La
Parsed command:  {'from': 'La', 'total': 2

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-6548  instruction:  nan
No instruction
CIF input:  mp-1223501  instruction:  K-> 0.65Cu,0.35K
modify CIF: entry mp-1223501
modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  0.65Cu,0.35K
Term:  0.65Cu
Term goups:  0.65   ;  Cu
Term:  0.35K
Term goups:  0.35   ;  K
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'Cu': 0.65, 'K': 0.35}}
Ordered site: K
Ordered site: La
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-6680  instruction:  Rb->0.1Rb, 0.9Cu
modify CIF: entry mp-6680
modify CIF: structure is get
Start structure modification!
LHS:  Rb  RHS:  0.1Rb,0.9Cu
Term:  0.1Rb
Term goups:  0.1   ;  Rb
Term:  0.9Cu
Term goups:  0.9   ;  Cu
Parsed com

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-1205881  instruction:  2Nb->1.9Nb, 0.1Mo
modify CIF: entry mp-1205881
modify CIF: structure is get
Start structure modification!
LHS:  2Nb  RHS:  1.9Nb,0.1Mo
Term:  1.9Nb
Term goups:  1.9   ;  Nb
Term:  0.1Mo
Term goups:  0.1   ;  Mo
Parsed command:  {'from': 'Nb', 'total': 2.0, 'to': {'Nb': 0.95, 'Mo': 0.05}}
Ordered site: La
Ordered site: Nb
Ordered site: Nb
Ordered site: H
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-1205881  instruction:  2Nb->1.85Nb, 0.15Mo
modify CIF: entry mp-1205881
modify CIF: structure is get
Start structure modification!
LHS:  2Nb  RHS:  1.85Nb,0.15Mo
Term:  1.85Nb
Term goups:  1.85   ;  Nb
Term:  0.15Mo
Term goups:  0.15   ;  Mo
Parsed command:  {'from': 'Nb', 'total': 2.0, 

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-1179025  instruction:  3Ti->2.7Ti, 0.3Cr
modify CIF: entry mp-1179025
modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RHS:  2.7Ti,0.3Cr
Term:  2.7Ti
Term goups:  2.7   ;  Ti
Term:  0.3Cr
Term goups:  0.3   ;  Cr
Parsed command:  {'from': 'Ti', 'total': 3.0, 'to': {'Ti': 0.9, 'Cr': 0.09999999999999999}}
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-1179025  instruction:  3Ti->2.6Ti, 0.4Cr
modify CIF: entry mp-1179025
modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RH

  struct = parser.parse_structures(primitive=primitive)[0]
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-1021511  instruction:  Cd->0.3Zn, 0.7Cd
modify CIF: entry mp-1021511
modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.3Zn,0.7Cd
Term:  0.3Zn
Term goups:  0.3   ;  Zn
Term:  0.7Cd
Term goups:  0.7   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.3, 'Cd': 0.7}}
Ordered site: Cd
Ordered site: S
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-1021511  instruction:  Cd->0.4Zn, 0.6Cd
modify CIF: entry mp-1021511
modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.4Zn,0.6Cd
Term:  0.4Zn
Term goups:  0.4   ;  Zn
Term:  0.6Cd
Term goups:  0.6   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.4, 'Cd': 0.6}}
Ordered site: Cd
Ordered site: S
Finish structure modification!
-------------------------------
-------------------------------
--

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-557195  instruction:  3Nb->2.99Nb, 0.01Rh; K->H
modify CIF: entry mp-557195
modify CIF: structure is get
Start structure modification!
LHS:  3Nb  RHS:  2.99Nb,0.01Rh
Term:  2.99Nb
Term goups:  2.99   ;  Nb
Term:  0.01Rh
Term goups:  0.01   ;  Rh
Parsed command:  {'from': 'Nb', 'total': 3.0, 'to': {'Nb': 0.9966666666666667, 'Rh': 0.0033333333333333335}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Or

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-557195  instruction:  K->H; 3Nb->1.5Nb, 1.5Ta
modify CIF: entry mp-557195
modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: 

  writer: Any = CifWriter(self, **kwargs)


CIF input:  mp-6548  instruction:  K->H
modify CIF: entry mp-6548
modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: K
Ordered site: K
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  mp-1245098  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No ins

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  3Nb  RHS:  Nb,2Ta
Term:  Nb
Term goups:  None   ;  Nb
Term:  2Ta
Term goups:  2   ;  Ta
Parsed command:  {'from': 'Nb', 'total': 3.0, 'to': {'Nb': 0.3333333333333333, 'Ta': 0.6666666666666666}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
O

  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  2H  RHS:  1.4H,0.6Na
Term:  1.4H
Term goups:  1.4   ;  H
Term:  0.6Na
Term goups:  0.6   ;  Na
Parsed command:  {'from': 'H', 'total': 2.0, 'to': {'H': 0.7, 'Na': 0.3}}
Ordered site: Na
Ordered site: Na
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  3Ti  RHS:  2.99Ti,0.02Mn
Term:  2.99Ti
Term goups:  2.99   ;  Ti
Term:  0.02Mn
Term goups:  0.02   ;  Mn
Parsed command:  {'from': 'Ti', 'total': 3.0, 'to': {'Ti': 0.9933554817275747, 'Mn': 0.0066445182724252485}}
Ordered site: Na
Ordered site: Na
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Orde

In [647]:
modify_all_CIFs("COD_ID", "COD_CIF_modifier", "COD_CIF_modified","COD")

CIF input:  1001842  instruction:  nan
No instruction
CIF input:  1545643  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  2004917  instruction:  nan
No instruction
CIF input:  1521061  instruction:  nan
No instruction
CIF input:  2238958  instruction:  nan
No instruction
CIF input:  1518045  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  1010942  instruction:  nan
No instruction
CIF input:  1001842  instruction:  nan
No instruction
CIF input:  1545643  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  2004917  instruction:  nan
No instruction
CIF input:  1521061  instruction:  nan
No instruction
CIF input:  2238958  instruction:  nan
No instruction
CIF input:  1518045  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF input:  -1  instruction:  nan
No instruction
CIF 

  writer: Any = CifWriter(self, **kwargs)


CIF input:  1522043  instruction:  Cs->H; Ta->Nb
modify CIF: entry 1522043
modify CIF: structure is get
Start structure modification!
LHS:  Cs  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: Cs
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ta  RHS:  Nb
Term:  Nb
Term goups:  None   ;  Nb
Parsed command:  {'from': 'Ta', 'total': 1.0, 'to': {'Nb': 1.0}}
Ordered site: H
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
------------

  writer: Any = CifWriter(self, **kwargs)


CIF input:  1531664  instruction:  nan
No instruction
CIF input:  1531664  instruction:  2Nb-> 1.9Nb, 0.1Ti
modify CIF: entry 1531664
modify CIF: structure is get
Start structure modification!
LHS:  2Nb  RHS:  1.9Nb,0.1Ti
Term:  1.9Nb
Term goups:  1.9   ;  Nb
Term:  0.1Ti
Term goups:  0.1   ;  Ti
Parsed command:  {'from': 'Nb', 'total': 2.0, 'to': {'Nb': 0.95, 'Ti': 0.05}}
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Pb
Ordered site: Pb
Ordered site: Pb
Ordered site: Pb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Nb  RHS:  Ta
Term:  Ta
Term goups:  None   ;  Ta
Parsed command:  {'from': 'Nb', 'total': 1.0, 'to': {'Ta': 1.0}}
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: K+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: Nb5+
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ord

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  1522043  instruction:  Cs->H; Ca->Sr; Ta->Nb
modify CIF: entry 1522043
modify CIF: structure is get
Start structure modification!
LHS:  Cs  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: Cs
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ca  RHS:  Sr
Term:  Sr
Term goups:  None   ;  Sr
Parsed command:  {'from': 'Ca', 'total': 1.0, 'to': {'Sr': 1.0}}
Ordered site: H
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ta  RHS:  Nb
Term:  Nb
Term goups:  None   ;  Nb
Parsed command:  {'from': 'Ta', 'total': 1.0

  writer: Any = CifWriter(self, **kwargs)


CIF input:  1522043  instruction:  Cs->H; Ca->Sr; 3Ta->Ta, 2Nb
modify CIF: entry 1522043
modify CIF: structure is get
Start structure modification!
LHS:  Cs  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: Cs
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ca  RHS:  Sr
Term:  Sr
Term goups:  None   ;  Sr
Parsed command:  {'from': 'Ca', 'total': 1.0, 'to': {'Sr': 1.0}}
Ordered site: H
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  3Ta  RHS:  Ta,2Nb
Term:  Ta
Term goups:  None   ;  Ta
Term:  2Nb
Term goups:  2   ;  Nb

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Rb  RHS:  Ag
Term:  Ag
Term goups:  None   ;  Ag
Parsed command:  {'from': 'Rb', 'total': 1.0, 'to': {'Ag': 1.0}}
Ordered site: Rb
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  1526803  instruction:  nan
No instruction
CIF input:  2238958  instruction:  Rb->Ag; Ca->Sr
modify CIF: entry 2238958
modify CIF: structure is get
Start structure modification!
LHS:  Rb  RHS:  Ag
Term:  Ag
Term goups:  None   ;  Ag
Parsed command:  {'from': 'Rb', 'total': 1.0, 'to': {'Ag': 1.0}}
Ordered site: Rb
Ordered site: Ca
Ordered site: Ca
Ordered 

  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Nd  RHS:  Pr
Term:  Pr
Term goups:  None   ;  Pr
Parsed command:  {'from': 'Nd', 'total': 1.0, 'to': {'Pr': 1.0}}
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Nd3+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: Ti4+
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O2-
Ordered site: O

  writer: Any = CifWriter(self, **kwargs)
  CIF={'K': 1.0, 'La': 1.0, 'Nb': 2.0, 'O': 7.0}
  PMG={'K': 2.188, 'La': 1.936, 'Nb': 4.0, 'O': 14.0}
  ratios={'Nb': 2.0, 'La': 1.936, 'K': 2.188, 'O': 2.0}
  if struct := self._get_structure(data, primitive, symmetrized, check_occu=check_occu):
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RHS:  2.5Ti,0.5Cr
Term:  2.5Ti
Term goups:  2.5   ;  Ti
Term:  0.5Cr
Term goups:  0.5   ;  Cr
Parsed command:  {'from': 'Ti', 'total': 3.0, 'to': {'Ti': 0.8333333333333334, 'Cr': 0.16666666666666666}}
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: Bi
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site:

  struct = parser.parse_structures(primitive=primitive)[0]
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.2Zn,0.8Cd
Term:  0.2Zn
Term goups:  0.2   ;  Zn
Term:  0.8Cd
Term goups:  0.8   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.2, 'Cd': 0.8}}
Ordered site: Cd2+
Ordered site: Cd2+
Ordered site: S2-
Ordered site: S2-
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  1011054  instruction:  Cd->0.3Zn, 0.7Cd
modify CIF: entry 1011054
modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.3Zn,0.7Cd
Term:  0.3Zn
Term goups:  0.3   ;  Zn
Term:  0.7Cd
Term goups:  0.7   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.3, 'Cd': 0.7}}
Ordered site: Cd2+
Ordered site: Cd2+
Ordered site: S2-
Ordered site: S2-
Finish structure modification!
-------------------------------
-------------------------------
----------

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  1521061  instruction:  K->H
modify CIF: entry 1521061
modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: K
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Ca
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Order

  writer: Any = CifWriter(self, **kwargs)


CIF input:  1001022  instruction:  nan
No instruction
CIF input:  1001022  instruction:  nan
No instruction
CIF input:  1522039  instruction:  nan
No instruction
CIF input:  1522043  instruction:  Cs->H; Ta->Nb
modify CIF: entry 1522043
modify CIF: structure is get
Start structure modification!
LHS:  Cs  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: Cs
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Ta  RHS:  Nb
Term:  Nb
Term goups:  None   ;  Nb
Parsed command:  {'from': 'Ta', 'total': 1.0, 'to': {'Nb': 1.0}}
Ordered site: H
Ordered site: Ca
Ordered site: Ca
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered s

  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  3Ti  RHS:  2.3Ti,0.7Mn
Term:  2.3Ti
Term goups:  2.3   ;  Ti
Term:  0.7Mn
Term goups:  0.7   ;  Mn
Parsed command:  {'from': 'Ti', 'total': 3.0, 'to': {'Ti': 0.7666666666666666, 'Mn': 0.2333333333333333}}
Ordered site: Na
Ordered site: Na
Ordered site: Na
Ordered site: Na
Ordered site: La
Ordered site: La
Ordered site: La
Ordered site: La
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: Ti
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------

In [648]:
modify_all_CIFs("Springer_ID", "Springer_CIF_modifier", "Springer_CIF_modified","Springer")

CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction


'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Pr  RHS:  Sm
Term:  Sm
Term goups:  None   ;  Sm
Parsed command:  {'from': 'Pr', 'total': 1.0, 'to': {'Sm': 1.0}}
Ordered site: Rb
Ordered site: Pr
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  sd_1050391  instruction:  Pr->La
modify CIF: entry sd_1050391
modify CIF: structure is get
Start structure modification!
LHS:  Pr  RHS:  La
Term:  La
Term goups:  None   ;  La
Parsed command:  {'from': 'Pr', 'total': 1.0, 'to': {'La': 1.0}}
Ordered site: Rb
Ordered site: Pr
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure 

  writer: Any = CifWriter(self, **kwargs)


CIF input:  sd_0376709  instruction:  6Nb->2Nb,4Ta
modify CIF: entry sd_0376709
modify CIF: structure is get
Start structure modification!
LHS:  6Nb  RHS:  2Nb,4Ta
Term:  2Nb
Term goups:  2   ;  Nb
Term:  4Ta
Term goups:  4   ;  Ta
Parsed command:  {'from': 'Nb', 'total': 6.0, 'to': {'Nb': 0.3333333333333333, 'Ta': 0.6666666666666666}}
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb

  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  Rb
Term:  Rb
Term goups:  None   ;  Rb
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'Rb': 1.0}}
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Rb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: Nb
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O

Pauling file corrections applied.
No structure parsed for section 1 in CIF.
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  K  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'K', 'total': 1.0, 'to': {'H': 1.0}}
Disordered site: K0.833
K0.833
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  K  To:  {'H': 1.0}
New comp:  H0.833
Element replacement is done!
Disordered site: K0.833
K0.833
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  K  To:  {'H': 1.0}
New comp:  H0.833
Element replacement is done!
Disordered site: K0.833
K0.833
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  K  To:  {'H': 1.0}
New comp:  H0.833
Element replacement is done!
Disordered site: K0.833
K0.833
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  K  To:  {'H': 1.0}
New comp:  H0.833
Element replacement is done!
Disordered site: La0.665 K0.335
La0.665 K0.335
<class 'pymatgen.core.composition.Composition'>
Element replacement 

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  Nd  RHS:  Pr
Term:  Pr
Term goups:  None   ;  Pr
Parsed command:  {'from': 'Nd', 'total': 1.0, 'to': {'Pr': 1.0}}
Ordered site: Cs
Ordered site: Nd
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  sd_1150217  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  nan  instruction:  nan
No instruction
CIF input:  sd_1150217  instruction:  Nd->Sm
modify CIF: entry sd_1150217
modify CIF: structu

'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]
No structure parsed for section 1 in CIF.
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]


modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.5Zn,0.5Cd
Term:  0.5Zn
Term goups:  0.5   ;  Zn
Term:  0.5Cd
Term goups:  0.5   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.5, 'Cd': 0.5}}
Ordered site: Cd
Ordered site: Cd
Ordered site: S
Ordered site: S
Finish structure modification!
-------------------------------
-------------------------------
-------------------------------
-------------------------------
-------------------------------
CIF input:  sd_0304039  instruction:  Cd->0.6Zn, 0.4Cd
modify CIF: entry sd_0304039
modify CIF: structure is get
Start structure modification!
LHS:  Cd  RHS:  0.6Zn,0.4Cd
Term:  0.6Zn
Term goups:  0.6   ;  Zn
Term:  0.4Cd
Term goups:  0.4   ;  Cd
Parsed command:  {'from': 'Cd', 'total': 1.0, 'to': {'Zn': 0.6, 'Cd': 0.4}}
Ordered site: Cd
Ordered site: Cd
Ordered site: S
Ordered site: S
Finish structure modification!
-------------------------------
-------------------------------
--------------------

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


modify CIF: structure is get
Start structure modification!
LHS:  2Sr  RHS:  1.5Sr,0.5Ba
Term:  1.5Sr
Term goups:  1.5   ;  Sr
Term:  0.5Ba
Term goups:  0.5   ;  Ba
Parsed command:  {'from': 'Sr', 'total': 2.0, 'to': {'Sr': 0.75, 'Ba': 0.25}}
Ordered site: Cs
Ordered site: Sr
Ordered site: Sr
Ordered site: Ta
Ordered site: Ta
Ordered site: Ta
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
Ordered site: O
LHS:  Cs  RHS:  H
Term:  H
Term goups:  None   ;  H
Parsed command:  {'from': 'Cs', 'total': 1.0, 'to': {'H': 1.0}}
Ordered site: Cs
Disordered site: Sr0.75 Ba0.25
Sr0.75 Ba0.25
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Cs  To:  {'H': 1.0}
New comp:  Sr0.75 Ba0.25
Element replacement is done!
Disordered site: Sr0.75 Ba0.25
Sr0.75 Ba0.25
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Cs  To:  {'H': 1.0}
New comp:  Sr0.75

  writer: Any = CifWriter(self, **kwargs)
  writer: Any = CifWriter(self, **kwargs)


CIF input:  sd_1958947  instruction:  Nb->Ta
modify CIF: entry sd_1958947
modify CIF: structure is get
Start structure modification!
LHS:  Nb  RHS:  Ta
Term:  Ta
Term goups:  None   ;  Ta
Parsed command:  {'from': 'Nb', 'total': 1.0, 'to': {'Ta': 1.0}}
Disordered site: Nb0.5 Ti0.5
Nb0.5 Ti0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Nb  To:  {'Ta': 1.0}
New comp:  Ta0.5 Ti0.5
Element replacement is done!
Disordered site: Nb0.5 Ti0.5
Nb0.5 Ti0.5
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Nb  To:  {'Ta': 1.0}
New comp:  Ta0.5 Ti0.5
Element replacement is done!
Disordered site: Bi0.75 Sr0.25
Bi0.75 Sr0.25
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Nb  To:  {'Ta': 1.0}
New comp:  Bi0.75 Sr0.25
Element replacement is done!
Disordered site: Bi0.75 Sr0.25
Bi0.75 Sr0.25
<class 'pymatgen.core.composition.Composition'>
Element replacement start: From  Nb  To:  {'Ta': 1.0}
New comp:

In [649]:
df.to_excel("checkpoint_CIF_modification.xlsx")

#CIF Verification

In [650]:
#def verifyCIFFilesColumn(column):
#df["Materials Project verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['Materials Project ID']), axis=1)
df["Materials Project verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['MP_CIF_modified']), axis=1)

checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element K, 16.0), (Element Nb, 24.0), (Element O, 68.0)])
K  comp1:  16.0  comp2:  4.0
Nb  comp1:  24.0  comp2:  6.0
O  comp1:  68.0  comp2:  17.0
Compositino scaling factors:  0.25  std: 0.0
CIF file: mp-560692  ||  K16 Nb24 O68  ||  K4 Nb6 O17  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element K, 1.0), (Element La, 1.0), (Element Nb, 2.0), (Element O, 7.0)])
K  comp1:  1.0  comp2:  1.0
La  comp1:  1.0  comp2:  1.0
Nb  comp1:  2.0  comp2:  2.0
O  comp1:  7.0  comp2:  7.0
Compositino scaling factors:  1.0  std: 0.0
CIF file: mp-1223501  ||  K1 La1 Nb2 O7  ||  K1 La1 Nb2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 2.0), (Element La, 2.0), (Element Nb, 4.0), (Element O, 14.0)])
Rb  comp1:  2.0  comp2:  1.0
La  comp1:  2.0  comp2

  struct = parser.parse_structures(primitive=primitive)[0]
  struct = parser.parse_structures(primitive=primitive)[0]


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 1.0), (Element Ca, 2.0), (Element Ta, 3.0), (Element O, 10.0)])
Rb  comp1:  1.0  comp2:  1.0
Ca  comp1:  2.0  comp2:  2.0
Ta  comp1:  3.0  comp2:  3.0
O  comp1:  10.0  comp2:  10.0
Compositino scaling factors:  1.0  std: 0.0
CIF file: mp-6680  ||  Rb1 Ca2 Ta3 O10  ||  Rb1 Ca2 Ta3 O10  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Na, 8.0), (Element Ta, 8.0), (Element O, 24.0)])
Na  comp1:  8.0  comp2:  1.0
Ta  comp1:  8.0  comp2:  1.0
O  comp1:  24.0  comp2:  3.0
Compositino scaling factors:  0.125  std: 0.0
CIF file: mp-676280  ||  Na8 Ta8 O24  ||  Na1 Ta1 O3  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Li, 2.0), (Element Ta, 2.0), (Element O, 6.0)])
Li 

In [651]:
#df["COD verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['COD_ID']), axis=1)
df["COD verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['COD_CIF_modified']), axis=1)

checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Species K+, 16.0), (Species Nb5+, 24.0), (Species O2-, 68.0)])
K+  comp1:  16.0  comp2:  0
Nb5+  comp1:  24.0  comp2:  0
O2-  comp1:  68.0  comp2:  0
Compositino scaling factors:  0.0  std: 0.0
CIF file: 1001842  ||  K+16 Nb5+24 O2-68  ||  K4 Nb6 O17  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element K, 2.188), (Element La, 1.936), (Element Nb, 4.0), (Element O, 14.0)])
K  comp1:  2.188  comp2:  1.0
La  comp1:  1.936  comp2:  1.0
Nb  comp1:  4.0  comp2:  2.0
O  comp1:  14.0  comp2:  7.0
Compositino scaling factors:  0.5  std: 0.022046731764576926
CIF file: 1545643  ||  K2.188 La1.936 Nb4 O14  ||  K1 La1 Nb2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_item

  struct = parser.parse_structures(primitive=primitive)[0]
  struct = parser.parse_structures(primitive=primitive)[0]
  struct = parser.parse_structures(primitive=primitive)[0]


checkCompositionStructureMatching: structure is get
dict_items([(Element K, 16.0), (Element Ta, 7.9999999999999964), (Element Nb, 15.999999999999993), (Element O, 68.0)])
K  comp1:  16.0  comp2:  4.0
Ta  comp1:  7.9999999999999964  comp2:  2.0
Nb  comp1:  15.999999999999993  comp2:  4.0
O  comp1:  68.0  comp2:  17.0
Compositino scaling factors:  0.25  std: 5.551115123125783e-17
CIF file: M_COD12  ||  K16 Ta8 Nb16 O68  ||  K4 Nb4 Ta2 O17  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element K, 16.0), (Element Ta, 12.0), (Element Nb, 12.0), (Element O, 68.0)])
K  comp1:  16.0  comp2:  4.0
Ta  comp1:  12.0  comp2:  3.0
Nb  comp1:  12.0  comp2:  3.0
O  comp1:  68.0  comp2:  17.0
Compositino scaling factors:  0.25  std: 0.0
CIF file: M_COD13  ||  K16 Ta12 Nb12 O68  ||  K4 Nb3 Ta3 O17  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element K, 16.0), (Element

  CIF={'Ba': 1.0, 'Bi': 4.0, 'O': 15.0, 'Ti': 4.0}
  PMG={'Ti': 16.0, 'Ba': 3.76, 'Bi': 16.239999999999995, 'O': 60.0}
  ratios={'Ba': 3.76, 'Bi': 4.059999999999999, 'Ti': 4.0, 'O': 4.0}
  if struct := self._get_structure(data, primitive, symmetrized, check_occu=check_occu):


checkCompositionStructureMatching: structure is get
dict_items([(Element Na, 2.0), (Element La, 2.0), (Element Ta, 4.0), (Element O, 14.0)])
Na  comp1:  2.0  comp2:  1.0
La  comp1:  2.0  comp2:  1.0
Ta  comp1:  4.0  comp2:  2.0
O  comp1:  14.0  comp2:  7.0
Compositino scaling factors:  0.5  std: 0.0
CIF file: 1544432  ||  Na2 La2 Ta4 O14  ||  Na1 La1 Ta2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatchin

In [652]:
#df["Springer verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['Springer_ID']), axis=1)
df["Springer verification"] = df.apply(lambda row: checkCompositionStructureMatching(row['Perovskite'], row['Springer_CIF_modified']), axis=1)

checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatc

No structure parsed for section 1 in CIF.
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
t parsed as T
No structure parsed for section 3 in CIF.
could not convert string to float: 't'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
t parsed as T
No structure parsed for section 4 in CIF.
could not convert string to float: 't'
  struct = parser.parse_structures(primitive=primitive)[0]


checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 1.0), (Element Pr, 1.0), (Element Ta, 2.0), (Element O, 7.0)])
Rb  comp1:  1.0  comp2:  1.0
Pr  comp1:  1.0  comp2:  1.0
Ta  comp1:  2.0  comp2:  2.0
O  comp1:  7.0  comp2:  7.0
Compositino scaling factors:  1.0  std: 0.0
CIF file: sd_1050391  ||  Rb1 Pr1 Ta2 O7  ||  Rb1 Pr1 Ta2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 1.0), (Element Nd, 1.0), (Element Ta, 2.0), (Element O, 7.0)])
Rb  comp1:  1.0  comp2:  1.0
Nd  comp1:  1.0  comp2:  1.0
Ta  comp1:  2.0  comp2:  2.0
O  comp1:  7.0  comp2:  7.0
Compositino scaling factors:  1.0  std: 0.0
CIF file: M_Springer1  ||  Rb1 Nd1 Ta2 O7  ||  Rb1 Nd1 Ta2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 1.0), (Element Sm, 1.0), (Element Ta, 2.0), (Element O, 7.0)])
Rb  comp1:  1.0  comp2:  1.0
Sm  comp1:

10 fractional coordinates rounded to ideal values to avoid issues with finite precision.
No structure parsed for section 1 in CIF.
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]


checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 16.0), (Element Ta, 7.9999999999999964), (Element Nb, 15.999999999999993), (Element O, 68.0)])
Rb  comp1:  16.0  comp2:  0
Ta  comp1:  7.9999999999999964  comp2:  2.0
Nb  comp1:  15.999999999999993  comp2:  4.0
O  comp1:  68.0  comp2:  17.0
Compositino scaling factors:  0.25  std: 0.10825317547305487
CIF file: M_Springer9  ||  Rb16 Ta8 Nb16 O68  ||  K4 Nb4 Ta2 O17  =  False
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Rb, 16.0), (Element Ta, 12.0), (Element Nb, 12.0), (Element O, 68.0)])
Rb  comp1:  16.0  comp2:  0
Ta  comp1:  12.0  comp2:  3.0
Nb  comp1:  12.0  comp2:  3.0
O  comp1:  68.0  comp2:  17.0
Compositino scaling factors:  0.25  std: 0.10825317547305482
CIF file: M_Springer10  ||  Rb16 Ta12 Nb12 O68  ||  K4 Nb3 Ta3 O17  =  False
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([

  symbol = self._parse_symbol(data["_atom_site_type_symbol"][idx])
'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
OH parsed as 
No _symmetry_equiv_pos_as_xyz type key found. Spacegroup from _symmetry_space_group_name_H-M used.
OH parsed as 
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
  struct = parser.parse_structures(primitive=primitive)[0]


checkCompositionStructureMatching: structure is get
dict_items([(Element K, 4.002), (Element La, 1.33), (Element Ta, 4.0), (Element O, 14.0)])
K  comp1:  4.002  comp2:  6.0
La  comp1:  1.33  comp2:  2.0
Ta  comp1:  4.0  comp2:  6.0
O  comp1:  14.0  comp2:  21.0
Compositino scaling factors:  1.5  std: 0.0017628337366642317
CIF file: sd_1810747  ||  K4.002 La1.33 Ta4 O14  ||  K6 La2 Ta6 O21  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element La, 1.33), (Element H, 4.002000000000001), (Element Ta, 4.0), (Element O, 14.0)])
La  comp1:  1.33  comp2:  2.0
H  comp1:  4.002000000000001  comp2:  6.0
Ta  comp1:  4.0  comp2:  6.0
O  comp1:  14.0  comp2:  21.0
Compositino scaling factors:  1.5  std: 0.0017628337366643262
CIF file: M_Springer18  ||  La1.33 H4.002 Ta4 O14  ||  H6 La2 Ta6 O21  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: ent

'_atom_site_label'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
t parsed as T
No structure parsed for section 3 in CIF.
could not convert string to float: 't'
No _symmetry_equiv_pos_as_xyz type key found. Defaulting to P1.
t parsed as T
No structure parsed for section 4 in CIF.
could not convert string to float: 't'
  struct = parser.parse_structures(primitive=primitive)[0]


ERROR: Invalid structure for  sd_1241787
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Nd, 4.0), (Element Ta, 8.0), (Element H, 4.0), (Element O, 28.0)])
Nd  comp1:  4.0  comp2:  1.0
Ta  comp1:  8.0  comp2:  2.0
H  comp1:  4.0  comp2:  1.0
O  comp1:  28.0  comp2:  7.0
Compositino scaling factors:  0.25  std: 0.0
CIF file: M_Springer38  ||  Nd4 Ta8 H4 O28  ||  H1 Nd1 Ta2 O7  =  True
checkCompositionStructureMatching: entry
checkCompositionStructureMatching: structure is get
dict_items([(Element Li, 4.0), (Element Nd, 4.0), (Element Ta, 8.0), (Element O, 28.0)])
Li  comp1:  4.0  comp2:  1.0
Nd  comp1:  4.0  comp2:  1.0
Ta  comp1:  8.0  comp2:  2.0
O  comp1:  28.0  comp2:  7.0
Compositino scaling factors:  0.25  std: 0.0
CIF file: sd_1955780  ||  Li4 Nd4 Ta8 O28  ||  Li1 Nd1 Ta2 

In [653]:
def markEntriesWithoutVerifiedCIF(ver1, ver2, ver3):
  if(ver1 or ver2 or ver3):
    return False
  return True

In [654]:
df["General verification"] = df.apply(lambda row: markEntriesWithoutVerifiedCIF(row['Materials Project verification'], row['COD verification'],row['Springer verification']), axis=1)
df_filtered = df[df['General verification'] != True]
df_filtered.info()

<class 'pandas.core.frame.DataFrame'>
Index: 752 entries, 0 to 1088
Data columns (total 88 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Perovskite                            752 non-null    object 
 1   Hill formula                          529 non-null    object 
 2   Interlayer space composition          4 non-null      object 
 3   Bandgap, eV                           722 non-null    float64
 4   DOI                                   747 non-null    object 
 5   Materials Project ID                  734 non-null    object 
 6   COD_ID                                687 non-null    object 
 7   Springer_ID                           193 non-null    object 
 8   MP_CIF_modifier                       279 non-null    object 
 9   COD_CIF_modifier                      152 non-null    object 
 10  Springer_CIF_modifier                 62 non-null     object 
 11  Materials Project verif

In [655]:
df_filtered.to_excel("checkpoint_CIF_verification.xlsx")

In [656]:
df.to_excel("checkpoint_CIF_verification_labels.xlsx")

In [657]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1089 entries, 0 to 1088
Data columns (total 88 columns):
 #   Column                                Non-Null Count  Dtype  
---  ------                                --------------  -----  
 0   Perovskite                            1089 non-null   object 
 1   Hill formula                          730 non-null    object 
 2   Interlayer space composition          5 non-null      object 
 3   Bandgap, eV                           1054 non-null   float64
 4   DOI                                   1084 non-null   object 
 5   Materials Project ID                  1031 non-null   object 
 6   COD_ID                                953 non-null    object 
 7   Springer_ID                           248 non-null    object 
 8   MP_CIF_modifier                       332 non-null    object 
 9   COD_CIF_modifier                      197 non-null    object 
 10  Springer_CIF_modifier                 107 non-null    object 
 11  Materials Project