In [1]:
import pandas as pd
import numpy as np
import json
from util.classes import *

filename = r'solid-state_dataset_20200713.json'
filedata = open(filename, mode='r').read()
jsonParse = json.loads(filedata)

import glob
# Define the path to the folder containing the CSV files
folder_path = 'sinter_dataset/*.csv'

# Use glob to match all csv file paths in the folder
csv_files = glob.glob(folder_path)

# Initialize an empty list to store dataframes
dfs = []

# Loop through the list of csv file paths
for file in csv_files:
    # Read the current csv file into a dataframe
    df = pd.read_csv(file)
    # Append the dataframe to the list of dataframes
    dfs.append(df)

# Concatenate all dataframes in the list into a single dataframe
merged_df = pd.concat(dfs, ignore_index=True)
merged_df = merged_df.groupby('formula').agg(lambda x: list(set(x))).reset_index().drop(columns=['Unnamed: 0']).set_index('formula')

# Display the merged dataframe
print(merged_df)

In [2]:
# Use from_dict to parse the nested dictionaries
reactions = [from_dict(reaction, ReactionEntry) for reaction in jsonParse['reactions']]

In [5]:
reactions[0].precursors

[Material(material_string='TiO2', material_formula='TiO2', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='TiO2', amount='1', elements={'Ti': '1', 'O': '2'})], amount_vars={}, element_vars={}, additives=[], oxygen_deficiency=None),
 Material(material_string='Li2CO3', material_formula='Li2CO3', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='Li2CO3', amount='1', elements={'Li': '2', 'C': '1', 'O': '3'})], amount_vars={}, element_vars={}, additives=[], oxygen_deficiency=None)]

In [16]:
# Filter the DataFrame to find rows where the 'target' list length is greater than 1
filtered_df = merged_df[merged_df['target'].apply(lambda x: len(x) > 1)]

# Display the filtered DataFrame
print(filtered_df)

Empty DataFrame
Columns: [target]
Index: []


In [8]:
reactions[5].operations

[Operation(type='StartingSynthesis', token='synthesized', conditions=OperationConditions(heating_temperature=None, heating_time=None, heating_atmosphere=None, mixing_device=None, mixing_media=None)),
 Operation(type='HeatingOperation', token='calcined', conditions=OperationConditions(heating_temperature=[OperationValue(min_value=800.0, max_value=800.0, values=[800.0], units='°C')], heating_time=[OperationValue(min_value=24.0, max_value=24.0, values=[24.0], units='h')], heating_atmosphere=['air'], mixing_device=None, mixing_media=None)),
 Operation(type='MixingOperation', token='mixed', conditions=OperationConditions(heating_temperature=None, heating_time=None, heating_atmosphere=None, mixing_device=None, mixing_media=None)),
 Operation(type='MixingOperation', token='stirred', conditions=OperationConditions(heating_temperature=None, heating_time=None, heating_atmosphere=None, mixing_device=None, mixing_media='water')),
 Operation(type='MixingOperation', token='added', conditions=Operati

In [21]:
len(reactions)

31782

In [20]:
[x.precursors for x in reactions if "titanate" in x.target.material_string]

[[Material(material_string='SrCO3', material_formula='SrCO3', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='SrCO3', amount='1', elements={'Sr': '1', 'C': '1', 'O': '3'})], amount_vars={}, element_vars={}, additives=[], oxygen_deficiency=None),
  Material(material_string='TiO2', material_formula='TiO2', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='TiO2', amount='1', elements={'Ti': '1', 'O': '2'})], amount_vars={}, element_vars={}, additives=[], oxygen_deficiency=None)],
 [Material(material_string='SrCO3', material_formula='SrCO3', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='SrCO3', amount='1', elements={'Sr': '1', 'C': '1', 'O': '3'})], amount_vars={}, element_vars={}, additives=[], oxygen_deficiency=None),
  Material(material_string='TiO2', material_formula='TiO2', material_name='', phase=None, is_acronym=False, composition=[Composition(formula='TiO2', amount='1', elements={'

In [None]:
def PreprocessElementCombinationData(data: List[FormulaPart]):
    return sorted(data, key= lambda item: item.material)


def GetElementInfoForIndex(data: List[FormulaPart], index: int, property : str = "Element"):
    if(index >= len(data)): return None
    print(len(data), index)
    if(property.lower()[0]=='e'):
        return data[index].material
    else:
        return data[index].amount

def flatten(data: ReactionEntry):

    # Pre-processing
    rxn_lhs = PreprocessElementCombinationData(data.reaction.left_side)
    rxn_rhs = PreprocessElementCombinationData(data.reaction.right_side)

    payload = {}
    payload['DOI']                                   = data.doi
    payload['Paragraph String']                      = data.paragraph_string
    payload['Synthesis Type']                        = data.synthesis_type
    payload['Rxn String']                            = data.reaction_string
    payload['Rxn Element Substitution']              = data.reaction.element_substitution
    payload['Rxn LHS 1 Material']                    = GetElementInfoForIndex(rxn_lhs, 0, "Element")
    payload['Rxn LHS 2 Material']                    = GetElementInfoForIndex(rxn_lhs, 1, "Element")
    payload['Rxn LHS 3 Material']                    = GetElementInfoForIndex(rxn_lhs, 2, "Element")
    payload['Rxn LHS 4 Material']                    = GetElementInfoForIndex(rxn_lhs, 3, "Element")
    payload['Rxn LHS 5 Material']                    = GetElementInfoForIndex(rxn_lhs, 4, "Element")
    payload['Rxn LHS 6 Material']                    = GetElementInfoForIndex(rxn_lhs, 5, "Element")
    payload['Rxn LHS 7 Material']                    = GetElementInfoForIndex(rxn_lhs, 6, "Element")
    payload['Rxn LHS 8 Material']                    = GetElementInfoForIndex(rxn_lhs, 7, "Element")
    payload['Rxn LHS 9 Material']                    = GetElementInfoForIndex(rxn_lhs, 8, "Element")
    payload['Rxn LHS 10 Material']                   = GetElementInfoForIndex(rxn_lhs, 9, "Element")
    payload['Rxn LHS 1 Amount']                      = GetElementInfoForIndex(rxn_lhs, 0, "Amount")
    payload['Rxn LHS 2 Amount']                      = GetElementInfoForIndex(rxn_lhs, 1, "Amount")
    payload['Rxn LHS 3 Amount']                      = GetElementInfoForIndex(rxn_lhs, 2, "Amount")
    payload['Rxn LHS 4 Amount']                      = GetElementInfoForIndex(rxn_lhs, 3, "Amount")
    payload['Rxn LHS 5 Amount']                      = GetElementInfoForIndex(rxn_lhs, 4, "Amount")
    payload['Rxn LHS 6 Amount']                      = GetElementInfoForIndex(rxn_lhs, 5, "Amount")
    payload['Rxn LHS 7 Amount']                      = GetElementInfoForIndex(rxn_lhs, 6, "Amount")
    payload['Rxn LHS 8 Amount']                      = GetElementInfoForIndex(rxn_lhs, 7, "Amount")
    payload['Rxn LHS 9 Amount']                      = GetElementInfoForIndex(rxn_lhs, 8, "Amount")
    payload['Rxn LHS 10 Amount']                     = GetElementInfoForIndex(rxn_lhs, 9, "Amount")
    payload['Rxn RHS 1 Material']                    = GetElementInfoForIndex(rxn_rhs, 0, "Element")
    payload['Rxn RHS 2 Material']                    = GetElementInfoForIndex(rxn_rhs, 1, "Element")
    payload['Rxn RHS 3 Material']                    = GetElementInfoForIndex(rxn_rhs, 2, "Element")
    payload['Rxn RHS 4 Material']                    = GetElementInfoForIndex(rxn_rhs, 3, "Element")
    payload['Rxn RHS 5 Material']                    = GetElementInfoForIndex(rxn_rhs, 4, "Element")  
    payload['Rxn RHS 1 Amount']                      = GetElementInfoForIndex(rxn_rhs, 0, "Amount")
    payload['Rxn RHS 2 Amount']                      = GetElementInfoForIndex(rxn_rhs, 1, "Amount")
    payload['Rxn RHS 3 Amount']                      = GetElementInfoForIndex(rxn_rhs, 2, "Amount")
    payload['Rxn RHS 4 Amount']                      = GetElementInfoForIndex(rxn_rhs, 3, "Amount")
    payload['Rxn RHS 5 Amount']                      = GetElementInfoForIndex(rxn_rhs, 4, "Amount")
    return payload