Remove Badlist

In [5]:
import json
from classes import ReactionEntry, from_dict
from exclude import BAD_DOI, BAD_TARGETS, BAD_PRECURSORS


bad_list = ['*', '-', 'x', '+', '/', 'ac', '(2N)', '(3N)', '(4N)', '(5N)', '(6N)', '7LiOH', '2Ni(OH)2']
with open('./solid-state_dataset_20200713.json') as f:
        ss_data= json.load(f)
reactions = ss_data['reactions']
min_pre=2
filetered_reactions = []
for rxn in reactions:
  if rxn['doi'] in BAD_DOI:
    continue
  if len(rxn['precursors']) < min_pre:
    continue
  if any(rxn['targets_string'][x] in BAD_TARGETS for x in range(len(rxn['targets_string']))):
    continue
  if any(rxn['precursors'][x]['material_formula'] in BAD_PRECURSORS for x in range(len(rxn['precursors']))):
    continue
  found_bad = False
  for bad in bad_list:
    if any(bad in rxn['targets_string'][x] for x in range(len(rxn['targets_string']))) or any(bad in rxn['precursors'][x]['material_formula'] for x in range(len(rxn['precursors']))):
      # print(list(rxn[“targets_string”][x] for x in range(len(rxn[“targets_string”]))))
      found_bad = True
  if found_bad:
    continue
  else:
    filetered_reactions.append(rxn)
reactions = [from_dict(reaction, ReactionEntry) for reaction in filetered_reactions]

Define Methods

In [24]:
import pandas as pd
import re
from typing import List, NamedTuple, Optional

# Define the necessary structures
class FormulaPart(NamedTuple):
    amount: str
    material: str

class Composition(NamedTuple):
    formula: str
    amount: str
    elements: dict

class Material(NamedTuple):
    material_string: str
    material_formula: str
    material_name: str
    phase: Optional[str]
    is_acronym: bool
    composition: List[Composition]
    amount_vars: dict
    element_vars: dict
    additives: List[str]
    oxygen_deficiency: Optional[str]

class OperationValue(NamedTuple):
    min_value: Optional[float]
    max_value: Optional[float]
    values: List[float]
    units: str

class OperationConditions(NamedTuple):
    heating_temperature: Optional[List[OperationValue]]
    heating_time: Optional[List[OperationValue]]
    heating_atmosphere: Optional[List[str]]
    mixing_device: Optional[str]
    mixing_media: Optional[str]

class Operation(NamedTuple):
    type: str
    token: str
    conditions: OperationConditions

class Formula(NamedTuple):
    left_side: List[FormulaPart]
    right_side: List[FormulaPart]
    element_substitution: dict

class ReactionEntry(NamedTuple):
    doi: str
    paragraph_string: str
    synthesis_type: str
    reaction_string: str
    reaction: Formula
    targets_string: List[str]
    target: Material
    precursors: List[Material]
    operations: List[Operation]

# Convert to DataFrame
def split_reaction(reaction_string):
    reactants, products = reaction_string.split('==')
    reactants = reactants.strip()
    products = products.strip()
    return reactants, products



def parse_chemical_formula(formula):
    pattern = r'([A-Z][a-z]*)(\d*\.?\d*)'
    matches = re.findall(pattern, formula)
    element_details = []
    for match in matches:
        element, multiplicity = match
        multiplicity = multiplicity if multiplicity else '1'
        atomic_number = re.findall(r'\d+', multiplicity)
        atomic_number = atomic_number[0] if atomic_number else '1'
        element_details.append((element, multiplicity, atomic_number))
    return element_details

def extract_element_details(reaction):
    parts = reaction.split('+')
    element_details = []
    for part in parts:
        part = part.strip()
        element_details.extend(parse_chemical_formula(part))
    return element_details

# Function to expand the element details into separate columns
def expand_element_details(element_details, prefix):
    data = {}
    for i, detail in enumerate(element_details):
        element, multiplicity, atomic_number = detail
        data[f'{prefix}_element_{i+1}'] = element
        data[f'{prefix}_multiplicity_{i+1}'] = multiplicity
        data[f'{prefix}_atomic_number_{i+1}'] = atomic_number
    return data

def extract_temperatures(operations):
    sintering_temp = None
    calcination_temp = None
    for operation in operations:
        if operation.type == 'HeatingOperation':
            if operation.token == 'sintered':
                sintering_temp = extract_temp(operation.conditions)
            elif operation.token == 'calcined':
                calcination_temp = extract_temp(operation.conditions)
    return sintering_temp, calcination_temp

def extract_temp(conditions):
    if conditions.heating_temperature:
        for temp in conditions.heating_temperature:
            if temp.values:
                return temp.values[0]
    return None

Display input elements and output elements individually

In [25]:
# Convert to DataFrame
df = pd.DataFrame([{
    'doi': entry.doi,
    'paragraph_string': entry.paragraph_string,
    'synthesis_type': entry.synthesis_type,
    'reaction_string': entry.reaction_string,
    'targets_string': entry.targets_string,
    'sintering_temp': extract_temperatures(entry.operations)[0],
    'calcination_temp': extract_temperatures(entry.operations)[1]
} for entry in reactions])

# Apply the function to create new columns
df[['input_reaction', 'output_reaction']] = df['reaction_string'].apply(lambda x: pd.Series(split_reaction(x)))
df['input_elements'] = df['input_reaction'].apply(extract_element_details)
df['output_elements'] = df['output_reaction'].apply(extract_element_details)
input_expanded = df['input_elements'].apply(lambda x: pd.Series(expand_element_details(x, 'input')))
output_expanded = df['output_elements'].apply(lambda x: pd.Series(expand_element_details(x, 'output')))

# Concatenate the expanded details with the original DataFrame
df = pd.concat([df, input_expanded, output_expanded], axis=1)

# Drop temporary columns
df.drop(columns=['input_elements', 'output_elements'], inplace=True)

In [26]:
df

Unnamed: 0,doi,paragraph_string,synthesis_type,reaction_string,targets_string,sintering_temp,calcination_temp,input_reaction,output_reaction,input_element_1,...,output_atomic_number_71,output_element_72,output_multiplicity_72,output_atomic_number_72,output_element_73,output_multiplicity_73,output_atomic_number_73,output_element_74,output_multiplicity_74,output_atomic_number_74
0,10.1149/1.1383553,High surface area activated carbons were obtai...,solid-state,2 Li2CO3 + 5 TiO2 == 1 Li4Ti5O12 + 2 CO2,[Li4Ti5O12],,,2 Li2CO3 + 5 TiO2,1 Li4Ti5O12 + 2 CO2,Li,...,,,,,,,,,,
1,10.1149/1.1455647,PNb9O25 was easily obtained by a solid-state r...,solid-state,1 NH4H2PO4 + 4.5 Nb2O5 == 1 PNb9O25 + 1.5 H2O ...,[Nb9PO25],,,1 NH4H2PO4 + 4.5 Nb2O5,1 PNb9O25 + 1.5 H2O + 1 NH3,N,...,,,,,,,,,,
2,10.1149/1.1377593,"LiFexMn2-xO4 (x = 0, 0.1, 0.3, 0.5) were prepa...",solid-state,x FeOOH + 1 LiOH·H2O + 2-x MnCO3 + 0.75-0.25*x...,"[LiMn2O4, LiMn1.9Fe0.1O4, LiMn1.7Fe0.3O4, LiMn...",,,x FeOOH + 1 LiOH·H2O + 2-x MnCO3 + 0.75-0.25*x O2,1 LiFexMn2-xO4 + 2-x CO2 + 1.5+0.5*x H2O,Fe,...,,,,,,,,,,
3,10.1149/1.1614797,The LiNi0.95Ti0.05O2 material was prepared by ...,solid-state,1 LiOH + 0.95 Ni(OH)2 + 0.225 O2 + 0.05 TiO2 =...,[LiTi0.05Ni0.95O2],,550.0,1 LiOH + 0.95 Ni(OH)2 + 0.225 O2 + 0.05 TiO2,1 LiNi0.95Ti0.05O2 + 1.45 H2O,Li,...,,,,,,,,,,
4,10.1149/1.1511190,"For the Y0.9Ca0.1FeO3 cathode, a combustion sy...",solid-state,0.1 Ca(NO3)2 + 1 Fe(NO3)3·9H2O + 0.475 O2 + 0....,[Ca0.1Y0.9FeO3],1000.0,800.0,0.1 Ca(NO3)2 + 1 Fe(NO3)3·9H2O + 0.475 O2 + 0....,1 Y0.9Ca0.1FeO3 + 16.45 H2O + 4.1 [NO3-],Ca,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
27467,10.1016/s0167-2738(97)00198-7,La2Zr2O7 and La2Zr1.68Y0.32O6.84 were prepared...,solid-state,1 La2O3 + 0.16 Y2O3 + 1.68 ZrO2 == 1 La2Zr1.68...,[La2Y0.32Zr1.68O6.84],1650.0,1100.0,1 La2O3 + 0.16 Y2O3 + 1.68 ZrO2,1 La2Zr1.68Y0.32O6.84,La,...,,,,,,,,,,
27468,10.1016/j.matdes.2016.07.119,BaFe7(MnTi)2.5O19 was prepared by a convention...,solid-state,1 BaCO3 + 3.5 Fe2O3 + 2.5 MnO2 + 2.5 TiO2 == 1...,[BaTi2.5Mn2.5Fe7O19],1100.0,,1 BaCO3 + 3.5 Fe2O3 + 2.5 MnO2 + 2.5 TiO2,1 BaFe7(MnTi)2.5O19 + 1 CO2 + 1.25 O2,Ba,...,,,,,,,,,,
27469,10.1016/j.ceramint.2003.09.008,Pb(Fe1/2Nb1/2)O3 (PFN) ceramics were prepared ...,solid-state,0.25 Fe2O3 + 0.25 Nb2O5 + 1 PbO == 1 Pb(Fe1/2N...,[Nb0.5Fe0.5PbO3],,1000.0,0.25 Fe2O3 + 0.25 Nb2O5 + 1 PbO,1 Pb(Fe1/2Nb1/2)O3,Fe,...,,,,,,,,,,
27470,10.1016/j.jeurceramsoc.2015.02.029,In our experiment the Ga2-xFexO3 polycrystalli...,solid-state,0.5*x Fe2O3 + 1-0.5*x Ga2O3 == 1 Ga2-xFexO3,"[Fe1.4Ga0.6O3, Fe0.9Ga1.1O3]",,,0.5*x Fe2O3 + 1-0.5*x Ga2O3,1 Ga2-xFexO3,Fe,...,,,,,,,,,,


Convert to excel

In [28]:
excel_path = './reaction_entries.xlsx'
%pip install openpyxl
df.to_excel(excel_path, index=False)

Note: you may need to restart the kernel to use updated packages.
