In [10]:
import pandas as pd
from discomat.cuds.cuds import Cuds
from rdflib import Graph, Namespace, Literal
from discomat.ontology.namespaces import CUDS
from Chemical_formula_Parser import ChemicalFormulaParser
from discomat.visualisation.cuds_vis import gvis

CE = Namespace('http://materials-discovery.org/chemical-elements#')
MIO = Namespace('http://www.ddmd.io/mio/')

In [11]:
# Reading the csv file
csv_file = 'nasicon.csv'  # Change it if you want to run
data = pd.read_csv(csv_file)
print(data.head())


gall = Graph()
cuds_objects = set()

                      Materials   name  \
0       Li1.33Al0.33Ti1.67P3O12   LATP   
1  Li1.3Al0.3Ti1.7P3S0.17O11.83  LATPS   
2                    LiZr2P3O12    LZP   
3          Li1.2Al0.2Zr1.8P3O12   LAZP   
4                    LiTi2P3O12    LTP   

                                     doi_reference    worktype  \
0  https://doi.org/10.1016/j.electacta.2023.142872  simulation   
1  https://doi.org/10.1016/j.electacta.2023.142872  simulation   
2         https://doi.org/10.1021/acs.jpcc.1c07314  simulation   
3   https://doi.org/10.1016/j.ceramint.2022.01.075  simulation   
4              https://doi.org/10.20964/2022.01.03  simulation   

  experiment_method  sintering_temperature  sintering_time simulation_method  \
0               NaN                    NaN             NaN               DFT   
1               NaN                    NaN             NaN               DFT   
2               NaN                    NaN             NaN              AIMD   
3               NaN           

In [12]:
# Iterate through each row to extract material and relevant properties
for index,row in data.iterrows():
    material = row['Materials']
    name_value = row['name']
    doi_value = row['doi_reference']
    work = row['worktype']
    exp_method = row['experiment_method']
    sin_temperature = row['sintering_temperature']
    sin_time = row['sintering_time']
    sim_method = row['simulation_method']
    sim_software = row['simulation_software']
    force_field = row['force_field']
    time_step = row['time_step']
    sim_temperature = row['simulation_temperature']
    approx_method = row['approximation_method']
    cutoff_energy = row['cutoff_energy']
    kpoints = row['Kpoints']
    total_atoms = row['total_atoms']
    lattice_a_value = row['Lattice_a']
    lattice_b_value = row['Lattice_b']
    lattice_c_value = row['Lattice_c']
    lattice_volume_value = row['Lattice_Volume']
    sec_phase = row['secondary_phase']
    sec_phase_weight =row['secondary_phase_weight']
    grain_size = row['grain_size']
    rela_density = row['relative_density']
    total_energy = row['Total_energy']
    band_gap_energy = row['band_gap_energy']
    activation_energy = row['activation_energy']
    li_conductivity = row['Li_ion_conductivity']


    # Creating CUDs for the material
    if pd.notna(material):
        m = Cuds(ontology_type=MIO.Material, description=f'{material}')
        gall += m.graph
        cuds_objects.add(m)


    # Create CUDS for Materials' name
    if pd.notna(name_value):
        mn = Cuds(ontology_type=MIO.Material_Name, description=f'material name: {name_value}')
        mn.add(MIO.name,Literal(name_value))
        m.add(CUDS.has, mn)
        gall +=  m.graph + mn.graph
        cuds_objects.add(mn)

    # Create the CUDS for doi Reference
    if pd.notna(doi_value):
        doi = Cuds(ontology_type=MIO.DoiReference,description=f'The cuds about doi reference of {material}')
        doi.add(MIO.Doi, Literal(doi_value))
        m.add(CUDS.has, doi)
        gall +=  m.graph + doi.graph
        cuds_objects.add(doi)

    # Creating Cuds for chemical composition and formula
    cc = Cuds(ontology_type=MIO.ChemicalComposition,description=f'The cuds of chemical composition of {material}')
    f = Cuds(ontology_type=MIO.ChemicalFormula,description=f'The cuds of chemical formula of {material}')
    cc.add(CUDS.has, f)
    print(f"Added chemical formula to chemical composition.")
    m.add(CUDS.has, cc)
    print(f"Added chemical composition to material.")
    gall += m.graph + cc.graph + f.graph

    cuds_objects.add(cc)
    cuds_objects.add(f)

    # Parsing Chemical Formulas to 3 lists using Chemical_formula_Parser
    parser = ChemicalFormulaParser(material)
    fu_list = parser.fu_list
    e_list = parser.elements
    s_list = parser.stoichiometry

    print(f"Formula Units: {fu_list}")
    print(f"Elements List: {e_list}")
    print(f"Stoichiometry List: {s_list}")

    # Iterate over the formula units, elements, and stoichiometry lists
    for i, fu_item in enumerate(fu_list):
        # create a new formula unit for each iteration
        fu = Cuds(ontology_type=MIO.ChemicalFormulaUnit,description=f'The cuds of formula unit {fu_item}')
        fu.add(MIO.value, Literal(fu_item))
        # Add the formula unit to the larger structure (f)
        f.add(CUDS.has, fu)
        cuds_objects.add(fu)
        gall += f.graph + fu.graph


        element_symbol = e_list[i]
        stoichiometry_value = s_list[i]

        # Dynamically create the CUDS instance for the element and store it in the dictionary
        es = Cuds(ontology_type=CE[element_symbol],description=f'The cuds of the {element_symbol} element')  # Dynamically get the ontology type for the element
        fu.add(CUDS.has, es)  # Add the element CUDS instance to the formula unit
        cuds_objects.add(es)

        # Create and add the stoichiometry value to the element
        st = Cuds(ontology_type=MIO.Stoichiometry,description=f'The cuds of the stoichiometry for {element_symbol}')
        st.add(MIO.Value, Literal(stoichiometry_value))
        fu.add(CUDS.has, st)
        cuds_objects.add(st)

        gall +=  fu.graph + st.graph + es.graph

    # Create the cuds of each material's work
    w = Cuds(ontology_type=MIO.Work,description=f'the cuds of work for {material}')
    m.add(CUDS.has,w)
    cuds_objects.add(w)

    gall +=  m.graph + w.graph


    # Simulation Work for some materials
    if work == 'simulation':
        sm = Cuds(ontology_type=MIO.Simulation,description=f'simulation for {material}')
        w.add(CUDS.has, sm)
        cuds_objects.add(sm)

        # Create the cuds for simulation method
        smm = Cuds(ontology_type=MIO.Simulation_method,description=f'{sim_method} simulation for {material}')
        smm.add(MIO.Value, Literal(sim_method))
        sm.add(CUDS.has, smm)
        cuds_objects.add(smm)

        # Create the cuds for simulation software
        sms = Cuds(ontology_type=MIO.Simulation_Software,description=f'{sim_software} for {material} simulation')
        sms.add(MIO.Value, Literal(sim_software))
        sm.add(CUDS.has, sms)
        cuds_objects.add(sms)

        # Create the cuds for Simulation settings (conditions)
        sims = Cuds(ontology_type=MIO.Simulation_Setting,description=f'simulation setting for {material}')
        sm.add(CUDS.has, sims)
        cuds_objects.add(sims)

        # Create the cuds for simulation result
        smr = Cuds(ontology_type=MIO.simulation_result,description=f'{sim_method} simulation result for {material}')
        sm.add(CUDS.has, smr)
        cuds_objects.add(smr)

        # Add all of the cuds to the graph
        gall += w.graph + sm.graph + smm.graph + sms.graph +sims.graph + smr.graph


        # Create the Cuds for simulation settings
        if pd.notna(approx_method):
            am = Cuds(ontology_type=MIO.Approximation_Method,description=f'approximation method of simulation for {material}')
            am.add(MIO.Value, Literal(approx_method))
            sims.add(CUDS.has, am)
            cuds_objects.add(am)
            gall += sims.graph + am.graph

        if pd.notna(cutoff_energy):
            ce = Cuds(ontology_type=MIO.cutoff_energy, description=f'cutoff energy of simulation for {material}')
            ce.add(MIO.Value, Literal(cutoff_energy))
            ce.add(MIO.Unit, Literal('eV'))
            sims.add(CUDS.has, ce)
            cuds_objects.add(ce)
            gall += sims.graph + ce.graph

        if pd.notna(kpoints):
            kp = Cuds(ontology_type=MIO.Kpoints, description=f'kpoints of dft simulation for {material}')
            kp.add(MIO.Value, Literal(kpoints))
            sims.add(CUDS.has, kp)
            cuds_objects.add(kp)
            gall += sims.graph + kp.graph

        if pd.notna(total_atoms):
            ta = Cuds(ontology_type=MIO.Total_atoms, description=f'total atoms of simulation for {material}')
            ta.add(MIO.Value, Literal(total_atoms))
            sims.add(CUDS.has, ta)
            cuds_objects.add(ta)
            gall += sims.graph + ta.graph

        if pd.notna(force_field):
            ff = Cuds(ontology_type=MIO.ForceField, description=f'force field of simulation for {material}')
            ff.add(MIO.Value, Literal(force_field))
            sims.add(CUDS.has, ff)
            cuds_objects.add(ff)
            gall += sims.graph + ff.graph

        if pd.notna(time_step):
            ts = Cuds(ontology_type=MIO.Timestep, description=f'time step of simulation for {material}')
            ts.add(MIO.Value, Literal(time_step))
            ts.add(MIO.Unit, Literal('fs'))
            sims.add(CUDS.has, ts)
            cuds_objects.add(ts)
            gall += sims.graph + ts.graph

        if pd.notna(sim_temperature):
            simt = Cuds(ontology_type=MIO.Simulation_temperature, description=f'simulation temperature for {material}')
            simt.add(MIO.Value, Literal(sim_temperature))
            simt.add(MIO.Unit, Literal('K'))
            sims.add(CUDS.has, simt)
            cuds_objects.add(simt)
            gall += sims.graph + simt.graph


        # Create the cuds for simulation result
        if pd.notna(total_energy):
            te = Cuds(ontology_type=MIO.total_energy, description=f'total energy for {material}')
            te.add(MIO.Value, Literal(total_energy))
            te.add(MIO.Unit, Literal('eV'))
            smr.add(CUDS.has, te)
            cuds_objects.add(te)
            gall += smr.graph + te.graph

        if pd.notna(band_gap_energy):
            bge = Cuds(ontology_type=MIO.band_gap_energy, description=f'band gap energy for {material} in the simulation')
            bge.add(MIO.Value, Literal(band_gap_energy))
            bge.add(MIO.Unit, Literal('eV'))
            smr.add(CUDS.has, bge)
            cuds_objects.add(bge)
            gall += smr.graph + bge.graph

        if pd.notna(activation_energy):
            ae = Cuds(ontology_type=MIO.activation_energy, description=f'activation energy for {material} in the simulation')
            ae.add(MIO.Value, Literal(activation_energy))
            ae.add(MIO.Unit, Literal('eV'))
            smr.add(CUDS.has, ae)
            cuds_objects.add(ae)
            gall += smr.graph + ae.graph

        if pd.notna(li_conductivity):
            lic = Cuds(ontology_type=MIO.Li_ion_conductivity, description=f'Li ion conductivity for {material} in the simulation')
            lic.add(MIO.Value, Literal(li_conductivity))
            lic.add(MIO.Unit, Literal('S/cm'))
            smr.add(CUDS.has, lic)
            cuds_objects.add(lic)
            gall += smr.graph + lic.graph

        # Create the cuds for lattice parameters in the simulation result
        if lattice_a_value or lattice_b_value or lattice_c_value or lattice_volume_value:
            Lp = Cuds(ontology_type=MIO.Lattice_parameter, description=f'lattice parameter for {material} in the simulation')
            smr.add(CUDS.has, Lp)
            cuds_objects.add(Lp)
            gall += smr.graph + Lp.graph

            lattice_cuds_dict = {}
            lattice_parameters = [
                (MIO.Lattice_a, lattice_a_value, "angstrom", 'Lattice a', 'La'),
                (MIO.Lattice_b, lattice_b_value, "angstrom", 'Lattice_b', 'Lb'),
                (MIO.Lattice_c, lattice_c_value, "angstrom", 'Lattice_c', 'Lc'),
                (MIO.Lattice_volume, lattice_volume_value, "angstrom^3", 'Lattice Volume', 'Lv'),
            ]

            for lattice_type, value, unit, lattice_name, var_name in lattice_parameters:
                if pd.notna(value):
                    # Add a specific description for each lattice parameter CUDS
                    lattice_cuds_dict[var_name] = Cuds(ontology_type=lattice_type,description=f'{lattice_name} in the simulation for {material}')
                    lattice_cuds_dict[var_name].add(MIO.Value, Literal(value))
                    lattice_cuds_dict[var_name].add(MIO.Unit, Literal(unit))
                    Lp.add(CUDS.has, lattice_cuds_dict[var_name])
                    cuds_objects.add(lattice_cuds_dict[var_name])
                    gall += Lp.graph + lattice_cuds_dict[var_name].graph

    # Experiment Work for materials
    elif work == 'experiment':
        exp = Cuds(ontology_type=MIO.Experiment, description=f'Experiments of {material}')
        w.add(CUDS.has, exp)
        cuds_objects.add(exp)

        # Create the Cuds for experiment method
        expm = Cuds(ontology_type=MIO.Experiment_method, description=f'Experiments method of {material}')
        expm.add(MIO.Value, Literal(exp_method))
        exp.add(CUDS.has, expm)
        cuds_objects.add(expm)

        # Create the Cuds for experiment condition
        expc= Cuds(ontology_type=MIO.Experiment_condition, description=f'Experiments condition of {material}')
        exp.add(CUDS.has, expc)
        cuds_objects.add(expc)

        # Create the Cuds for experiment result
        expr =Cuds(ontology_type=MIO.Experiment_result, description=f'Experiments result of {material}')
        exp.add(CUDS.has, expr)
        cuds_objects.add(expr)

        gall += w.graph + exp.graph+ expm.graph + expr.graph +expc.graph


        # Add experiment conditions
        if pd.notna(sin_time):
            sint = Cuds(ontology_type=MIO.Sin_time, description=f'Sintering time of {material}')
            sint.add(MIO.Value, Literal(sin_time))
            sint.add(MIO.Unit, Literal('h'))
            expc.add(CUDS.has, sint)
            cuds_objects.add(sint)

        if pd.notna(sin_temperature):
            sinT = Cuds(ontology_type=MIO.Sin_temperature, description=f'Sintering temperature of {material}')
            sinT.add(MIO.Value, Literal(sin_temperature))
            sinT.add(MIO.Unit, Literal('K'))
            expc.add(CUDS.has, sinT)
            cuds_objects.add(sinT)

        gall += expc.graph + sint.graph +sinT.graph


        # add experiment results
        if sec_phase is not None:
            sp = Cuds(ontology_type=MIO.Sec_phase, description=f'Second phase of {material}')
            sp.add(MIO.Value, Literal(sec_phase))
            expr.add(CUDS.has, sp)
            cuds_objects.add(sp)

        if sec_phase_weight is not None:
            spw = Cuds(ontology_type=MIO.Sec_phase_weight, description=f'Second phase weight of {material}')
            spw.add(MIO.Value, Literal(sec_phase_weight))
            spw.add(MIO.Unit, Literal('%'))
            expr.add(CUDS.has, spw)
            cuds_objects.add(spw)

        if grain_size is not None:
            gs = Cuds(ontology_type=MIO.Grain_size, description=f'Grainsize of {material}')
            gs.add(MIO.Value, Literal(grain_size))
            gs.add(MIO.Unit, Literal('µm'))
            expr.add(CUDS.has, gs)
            cuds_objects.add(gs)

        if rela_density is not None:
            rd = Cuds(ontology_type=MIO.RelaDensity, description=f'Relative density of {material}')
            rd.add(MIO.Value, Literal(rela_density))
            rd.add(MIO.Unit, Literal('%'))
            expr.add(CUDS.has, rd)
            cuds_objects.add(rd)

        if activation_energy is not None:
            ae =Cuds(ontology_type=MIO.Activation_energy, description=f'Activation energy of {material} in the experiment')
            ae.add(MIO.Value, Literal(activation_energy))
            ae.add(MIO.Unit, Literal('eV'))
            expr.add(CUDS.has, ae)
            cuds_objects.add(ae)

        if li_conductivity is not None:
            lic = Cuds(ontology_type=MIO.Li_ion_conductivity, description=f'Conductivity of {material} in the experiment')
            lic.add(MIO.Value, Literal(li_conductivity))
            lic.add(MIO.Unit, Literal('S/cm'))
            expr.add(CUDS.has, lic)
            cuds_objects.add(lic)

        gall += expr.graph + ae.graph + lic.graph +sp.graph + spw.graph + gs.graph + rd.graph

        # Create the cuds for lattice parameters in the experiment result
        if lattice_a_value or lattice_b_value or lattice_c_value or lattice_volume_value:
            Lp = Cuds(ontology_type=MIO.Lattice_parameter, description='The cuds of lattice parameter')
            expr.add(CUDS.has, Lp)
            cuds_objects.add(Lp)
            gall += expr.graph + Lp.graph

            lattice_cuds_dict = {}
            lattice_parameters = [
                (MIO.Lattice_a, lattice_a_value, "angstrom", 'Lattice a', 'La'),
                (MIO.Lattice_b, lattice_b_value, "angstrom", 'Lattice_b', 'Lb'),
                (MIO.Lattice_c, lattice_c_value, "angstrom", 'Lattice_c', 'Lc'),
                (MIO.Lattice_volume, lattice_volume_value, "angstrom^3", 'Lattice Volume', 'Lv'),
            ]

            for lattice_type, value, unit, lattice_name, var_name in lattice_parameters:
                if pd.notna(value):
                    lattice_cuds_dict[var_name] = Cuds(ontology_type=lattice_type,description=f'{lattice_name} in the simulation for {material}')
                    lattice_cuds_dict[var_name].add(MIO.Value, Literal(value))
                    lattice_cuds_dict[var_name].add(MIO.Unit, Literal(unit))
                    Lp.add(CUDS.has, lattice_cuds_dict[var_name])
                    cuds_objects.add(lattice_cuds_dict[var_name])
                    gall += Lp.graph + lattice_cuds_dict[var_name].graph

Added chemical formula to chemical composition.
Added chemical composition to material.
Formula Units: ['Li1.33', 'Al0.33', 'Ti1.67', 'P3', 'O12']
Elements List: ['Li', 'Al', 'Ti', 'P', 'O']
Stoichiometry List: [1.33, 0.33, 1.67, 3.0, 12.0]
Added chemical formula to chemical composition.
Added chemical composition to material.
Formula Units: ['Li1.3', 'Al0.3', 'Ti1.7', 'P3', 'S0.17', 'O11.83']
Elements List: ['Li', 'Al', 'Ti', 'P', 'S', 'O']
Stoichiometry List: [1.3, 0.3, 1.7, 3.0, 0.17, 11.83]
Added chemical formula to chemical composition.
Added chemical composition to material.
Formula Units: ['Li', 'Zr2', 'P3', 'O12']
Elements List: ['Li', 'Zr', 'P', 'O']
Stoichiometry List: [1, 2.0, 3.0, 12.0]
Added chemical formula to chemical composition.
Added chemical composition to material.
Formula Units: ['Li1.2', 'Al0.2', 'Zr1.8', 'P3', 'O12']
Elements List: ['Li', 'Al', 'Zr', 'P', 'O']
Stoichiometry List: [1.2, 0.2, 1.8, 3.0, 12.0]
Added chemical formula to chemical composition.
Added che

In [13]:
# print the number of cuds
print(f"Total number of unique CUDS objects in the graph: {len(cuds_objects)}")


gvis(gall,'nasicon.html')

gall.serialize('nasicon.ttl',format='ttl')

Total number of unique CUDS objects in the graph: 455
Graph saved to file:///Users/currystep/Desktop/hackathon-3/hack_nasicon/nasicon.html


<Graph identifier=N95d96033e97a4e5a89ea58c566b0700c (<class 'rdflib.graph.Graph'>)>