# Import the libs and base ontologies

In [1]:
from owlready2 import *
import pandas as pd
from neo4j import GraphDatabase, basic_auth
from owlready2 import get_ontology
# from emmopy import get_emmo
from neo4j import GraphDatabase, basic_auth
from sentence_transformers import SentenceTransformer, util
import torch
import re
onto_path.append("")
onto_process=get_ontology("C://Abhishek Thesis//ontology//manufacturing.owl").load()

# onto_matter=get_ontology("C://Abhishek Thesis//ontology//matter.owl").load()
onto_matter=get_ontology("https://raw.githubusercontent.com/IEK-13/MatGraphAI/master/Ontology/matter.owl").load()

onto_quantity=get_ontology("C://Abhishek Thesis//ontology//quantities.owl").load()



In [2]:
# results = onto_quantity.search_one(label="properties.Quantity")
# print(results)

# Creating node-class 'EMMO_IS_A' relations between exisiting classes.

## map the 3 ontologies on neo4j

In [3]:



# Define Neo4j connection details
uri = "bolt://localhost:7687"
user = "neo4j"
password = "password"

# Create Neo4j driver and session
driver = GraphDatabase.driver(uri, auth=(user, password))
session = driver.session()

# Define function to create nodes
def create_node(tx, label, name,iri):
    tx.run("MERGE (n:" + label + " {name: $name, uri:$uri})", name=name, uri=iri)
    

# Define function to create EMMO_IS_A relationships
def create_emmo_is_a_rel(tx,class_, subclass, superclass):
    tx.run("MATCH (c1:" + class_ + " {name: $subclass}), (c2:" + class_ + " {name: $superclass}) "
           "MERGE (c1)-[:EMMO_IS_A]->(c2)",class_=class_ ,subclass=subclass, superclass=superclass)

# Create nodes for each class in Matter ontology
with session.begin_transaction() as tx:
    for clazz in onto_matter.classes():
        create_node(tx, "EMMOMatter", clazz.name,clazz.iri)

# Create nodes for each class in Process ontology
with session.begin_transaction() as tx:
    for clazz in onto_process.classes():
        create_node(tx, "EMMOProcess", clazz.name,clazz.iri)

# Create nodes for each class in Quantity ontology
with session.begin_transaction() as tx:
    for clazz in onto_quantity.classes():
        create_node(tx, "EMMOQuantity", clazz.name,clazz.iri)

# Create EMMO_IS_A relationships for each class
with session.begin_transaction() as tx:
    for clazz in onto_matter.classes():
        for parent in clazz.is_a:
            create_emmo_is_a_rel(tx,'EMMOMatter', clazz.name, parent.name)
    for clazz in onto_process.classes():
        for parent in clazz.is_a:
            create_emmo_is_a_rel(tx,'EMMOProcess', clazz.name, parent.name)
    for clazz in onto_quantity.classes():
        for parent in clazz.is_a:
            create_emmo_is_a_rel(tx,'EMMOQuantity', clazz.name, parent.name)

# Close Neo4j session and driver
session.close()
driver.close()


In [4]:
#To extend the imaging techniques in EMMOProcess-

def create_subclass(driver, node_label, superclass_name, subclass_name,relationship_name, url):
    with driver.session() as session:
        result = session.run(
            "MATCH (superclass:" + node_label + ") "
            "WHERE superclass.name = $superclassName "
            "MERGE (subclass:" + node_label + " { name: $subclassName }) "
            "ON CREATE SET subclass.url = $url "
            "MERGE (subclass)-[:"+relationship_name+"]->(superclass) "
            "RETURN subclass, superclass",
            superclassName=superclass_name,
            subclassName=subclass_name,
            url=url + subclass_name  # Append subclass_name to the URL
        )

with driver.session() as session:
    with session.begin_transaction() as tx:
        node_label = 'EMMOProcess'
        url_manuf = 'http://www.example.com/manufacturing#'
        relation_emmo_is_a='EMMO_IS_A'
        combinations = [
            ['ElectronMicroscopy', 'FIBImaging'],
            ['ElectronMicroscopy', 'IC_SEMImaging'],
            ['Microscopy', 'ScanningProbeMicroscopy'],
            ['Imaging', 'Neutron'],
            ['ScanningProbeMicroscopy', 'AFMImaging'],
            ['Imaging', 'XRay'],
            ['XRay', 'Synchrotron'],
            ['Synchrotron', 'SynchrotronRadiography'],
            ['Synchrotron', 'SynchrotronTomography']
        ]
        
        for combination in combinations:
            superclass_name = combination[0]
            subclass_name = combination[1]
            
            create_subclass(driver, node_label, superclass_name, subclass_name, relation_emmo_is_a,url_manuf)

# Neo4j codes

In [5]:
# Match(n) optional match(n)-[r]-() delete n,r

#to search for a node
# MATCH (n:Person)
# WHERE n.name = 'John'
# RETURN n


# MATCH (superclass:EMMOMatter)
# WHERE superclass.name = 'Device'
#     return superclass

# Get the Organizational Data from file

In [6]:

# query = f"MATCH (m:{node1} {{name: '{domain_cls}'}}), (ma:{node2} {{name: '{range_cls}'}}) CREATE (m)-[:{rel_type}]->(ma)"
#             result = session.run(query)

In [7]:


# Read the Excel file
excel_file = 'C://Abhishek Thesis//OneDrive_1_5-11-2023//test file-DataCatalog//DataCatalog2.xlsx'
df = pd.read_excel(excel_file, header=None)

# Find the row index where 'Organizational Data' is located
start_row = df[df.iloc[:, 0] == 'Organizational Data'].index[0]

# Find the row index where 'Characterization Data' is located
end_row = df[df.iloc[:, 0] == 'Characterization Data'].index[0]

# Extract the organizational data rows
organizational_data = df.iloc[start_row + 1: end_row]

# Remove trailing spaces from attribute names
organizational_data.loc[:, 0] = organizational_data.loc[:, 0].str.strip()

# Extract attributes and values
attributes = organizational_data.iloc[:, 0].tolist()
values = organizational_data.iloc[:, 1].tolist()

# Create a dictionary of attributes and values
data_dict = dict(zip(attributes, values))

# Generate variable names dynamically and assign values
for attribute, value in zip(attributes, values):
    var_name = attribute.lower().replace(' ', '_').replace('-', '').replace(':', '')
    globals()[var_name] = value

# Print the variables to verify they contain the expected values
for attribute, value in zip(attributes, values):
    var_name = attribute.lower().replace(' ', '_').replace('-', '').replace(':', '')
    print(f"{var_name}: {globals()[var_name]}")


experiment_title: Elucidating the Influence of the d-Band Center on the Synthesis of Isobutanol
experimentid: 000000-1
measurementid: 000000-1-3
upload_date: 15/11/2022
measurement_date: 16/12/2022
institution: FZJ IEK-15, FZJ IEK-16
founding_body: HIP, ABBC
country: Germany, France
authors_list: Gupta, Max
orcid: IUHB567B56, OIJDH98475
access_conditions: public
published: https://doi.org/10.3390/catal11030406
topic: Fuel Cells
device: ElectrochemicalCell
component: WorkingElectrode
subcomponent: ClothGDL
material: CarbonBlack
granularity_level: Nanostructure


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  organizational_data.loc[:, 0] = organizational_data.loc[:, 0].str.strip()


# Load file data to the neo4j ontology and make relationships

In [8]:
# def create_or_get_node(tx, label, **properties):
#     """ check for existing nodes, if not there, create new"""
#     query = "MERGE (n:%s {name: $name}) RETURN n" % label
#     result = tx.run(query, name=properties['name'])
#     if not result.peek():
#         create_node(tx, label, **properties)
     
    
    
def create_nodes(tx, label, **properties):
    query = f"CREATE (n:{label}) SET n += $properties RETURN n"
    return tx.run(query, properties=properties)

def create_or_get_node(tx, label, match_property, properties):
    query = f"MERGE (n:{label} {{{match_property}: $value}}) SET n += $properties RETURN n"
    result = tx.run(query, value=properties[match_property], properties=properties)
    if not result.peek():
        create_nodes(tx, label, **properties)
        
def create_or_get_node_smp(tx, label, match_property, properties):
    query = f"MERGE (n:{label} {{{match_property}: $value}}) SET n += $properties RETURN n"
#     result = tx.run(query, value=properties[match_property], properties=properties)
#     if not result.peek():
    create_nodes(tx, label, **properties)
        
def create_relation(tx, node1, node2, relation, property1, property2, label1, label2):
    query = f"MATCH (n1:{label1}), (n2:{label2}) WHERE n1.{property1} = $name1 AND n2.{property2} = $name2 " \
            f"MERGE (n1)-[r:{relation}]->(n2) " \
            f"RETURN count(r)"
    result = tx.run(query, name1=node1, name2=node2)
    count = result.single()[0]
    return count

# Function to create the relationship if it doesn't exist
# def create_relation(tx, node1, node2, relation, property1, property2):
#     query = f"MATCH (n1), (n2) WHERE n1.{property1} = $name1 AND n2.{property2} = $name2 " \
#             f"MERGE (n1)-[r:{relation}]->(n2) " \
#             f"RETURN count(r)"
#     result = tx.run(query, name1=node1, name2=node2)
#     count = result.single()[0]
#     return count


founding_body_list = founding_body.split(",")    
country_list = country.split(",")
institution_list = institution.split(",")
orcid_list = orcid.split(",")
authors_list_list = authors_list.split(",")

# Create nodes for Researcher, Institution and Country
with driver.session() as session:
    with session.begin_transaction() as tx:
    
        for country_ in country_list:
            if country_ not in ('','-','nan'):
                properties = {'name': country_}
                create_or_get_node(tx, "Country",'name', properties)
                
        for founding_body_ in founding_body_list:
            if founding_body_ not in ('','-','nan'):
                properties = {'name': founding_body_}
                create_or_get_node(tx, "FundingBody",'name', properties)
        
    
        for institution_ in institution_list:
            if institution_ not in ('','-','nan'):
                properties = {'name': institution_}
                create_or_get_node(tx, "Institution",'name', properties)

        for authors_list_,orcid_ in zip(authors_list_list,orcid_list):
            if orcid_ not in ('', '-', 'nan'):
                properties = {'name': authors_list_, 'orcid': orcid_}
                create_or_get_node(tx, "Researcher", 'orcid', properties)

        
#         if orcid not in ('','-','nan'): 
#             create_or_get_node(tx, "Researcher", name=authors_list, orcid=orcid)
        
                
        
        if subcomponent not in ('','-','nan'):    
            properties = {'name': subcomponent}
            create_or_get_node(tx, "Subcomponent",'name', properties)

        
#         if access_conditions not in ('','-','nan'):    
#             properties = {'name': access_conditions}
#             create_or_get_node(tx, "Parameter",'name', properties)

        
                
        if component not in ('','-','nan'):    
            properties = {'name': component}
            create_or_get_node(tx, "Component",'name', properties)
  
        if device not in ('','-','nan'): 
            properties = {'name': device}
            create_or_get_node(tx, "Device",'name', properties)

        

        if material not in ('','-','nan'):  
            properties = {'name': material}
            create_or_get_node(tx, "Material", 'name',properties)

                     

        properties = {'name': measurementid,'experiment_start':measurement_date,
                    'date_added':upload_date, 'run_title' :experiment_title, 'experiment_id':experimentid, 'access_conditions':access_conditions}
        create_or_get_node(tx, "Measurement",'name',properties)

        
        
# config_rel_lst=[{'node1':orcid, 'node2':institution,'relation': 'AFFILIATED_TO', 'property_name1':'orcid','property_name2': 'name'},
#       {'node1':institution, 'node2':country,'relation': 'IN', 'property_name1':'name','property_name2': 'name'},
#       {'node1':orcid, 'node2':country,'relation': 'IN', 'property_name1':'orcid','property_name2': 'name'},
#       {'node1':measurement_id, 'node2':orcid,'relation': 'BY', 'property_name1':'name','property_name2': 'orcid'} 
#      ]
           
# Create relations between the nodes  
    with session.begin_transaction() as tx:  
    # Check and create relationship
        for orcid_,institution_  in zip(orcid_list,institution_list):
            count = create_relation(tx, orcid_, institution_, 'AFFILIATED_TO', 'orcid', 'name','Researcher', 'Institution')

            if count == 0:
#                 print("Relationship AFFILIATED_TO doesn't exist, creating it...")
                create_relation(tx, orcid_, institution_, 'AFFILIATED_TO', 'orcid', 'name','Researcher', 'Institution')
            else:
                pass

        for institution_, country_ in zip(institution_list,country_list):

            count = create_relation(tx, institution_, country_, 'IN', 'name', 'name','Institution', 'Country')

            if count == 0:
#                 print("Relationship IN doesn't exist, creating it...")
                create_relation(tx, institution_, country_, 'IN', 'name', 'name','Institution', 'Country')
            else:
                pass
        
        
        for orcid_, country_ in zip(orcid_list,country_list):
            count = create_relation(tx, orcid_, country_, 'IN', 'orcid', 'name','Researcher', 'Country')

            if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
                create_relation(tx, orcid_, country_, 'IN', 'orcid', 'name','Researcher', 'Country')
            else:
                pass
        
        
        for orcid_ in orcid_list:
            count = create_relation(tx, measurementid, orcid_, 'BY', 'name', 'orcid','Measurement', 'Researcher')

            if count == 0:
#                 print("Relationship BY doesn't exist, creating it...")
                create_relation(tx, measurementid, orcid_, 'BY', 'name', 'orcid','Measurement', 'Researcher')
            else:
                pass
        
        
        count = create_relation(tx, material, measurementid, 'IS_MEASUREMENT_INPUT', 'name', 'name','Material', 'Measurement')
        if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
            create_relation(tx,material, measurementid, 'IS_MEASUREMENT_INPUT', 'name', 'name','Material', 'Measurement')
        else:
            pass
        
      
        count = create_relation(tx, component, subcomponent, 'HAS_PART', 'name', 'name','Component', 'Subcomponent')
        if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
            create_relation(tx,component, subcomponent, 'HAS_PART', 'name', 'name','Component', 'Subcomponent')
        else:
            pass
            
            
        count = create_relation(tx, device, component, 'HAS_PART', 'name', 'name','Device', 'Component')
        if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
            create_relation(tx,  device, component, 'HAS_PART', 'name', 'name','Device', 'Component')
        else:
            pass

        
        count = create_relation(tx, component, material, 'HAS_PART', 'name', 'name','Component', 'Material')
        if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
            create_relation(tx,  component, material, 'HAS_PART', 'name', 'name','Component', 'Material')
        else:
            pass

        count = create_relation(tx, component, measurementid, 'IS_MEASUREMENT_INPUT', 'name', 'name','Component', 'Measurement')
        if count == 0:
#                 print("Relationship IN_ orcid doesn't exist, creating it...")
            create_relation(tx,  component, measurementid, 'IS_MEASUREMENT_INPUT', 'name', 'name','Component', 'Measurement')
        else:
            pass
            
          
        for founding_body_ in founding_body_list:
            count = create_relation(tx,measurementid , founding_body_,  'FUNDED_BY', 'name', 'name','Measurement', 'FundingBody')
        if count == 0:
#                 print("Relationship BY doesn't exist, creating it...")
            create_relation(tx, measurementid , founding_body_,  'FUNDED_BY', 'name', 'name','Measurement', 'FundingBody')
        else:
            pass
        
        
#         create_relation4(tx, component, 'Imaging', 'new_rel', 'name', 'name', 'EMMOMatter', 'EMMOProcess')

# To get the classes and subclasses from ontology, so as to create further IS_A relations with file data

In [9]:
from neo4j import GraphDatabase


def get_all_subclasses(driver, target_class_name,node_lab):
    with driver.session() as session:
#        result = session.run(
#            "MATCH (target {name: $targetClassName})<-[:$relationshipType*]-(subclass: {name: $targetClassName}) RETURN DISTINCT subclass.name",
#                    targetClassName=target_class_name,
#                    relationshipType=relationship_type)
        
        result = session.run(
            "MATCH (target:{node_lab} {{name: $targetClassName}})<-[:EMMO_IS_A*]-(subclass:{node_lab}) "
            "RETURN DISTINCT subclass.name".format(node_lab=node_lab),
            targetClassName=target_class_name
        )
        return set([record["subclass.name"] for record in result])




In [10]:
# Example usage
target_class_name = "Component"
node_lab='EMMOMatter'
# Create a Neo4j driver instance
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))

# Call the function to retrieve all subclasses
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)

# Include the target class itself in the set of subclasses
all_subclasses.add(target_class_name)

# Convert the set to a list
component_lst = list(all_subclasses)

# Print the subclasses
print(component_lst)

['ReferenceElectrode', 'NafionMembrane', 'CCM', 'Aquivion', 'Component', 'AquivionD72-25BS', 'ConversionElectrode', 'NonPolarizableElectrode', 'NegativeElectrode', 'Flemion', '5LayerMEA', 'RotationDiskElectrode', 'InertElectrode', 'AquivionD79-25BS', 'FuelCellComponent', 'PartiallyFluorinatedMembrane', 'PolarizableElectrode', 'Asiplex', 'Gore-Select', 'CatalystCoatedMembrane', 'Cathode', 'AquivionD98-25BS', 'NonFluorinatedCompositeMembrane', 'HydrogenElectrode', 'ElectrochemicalComponent', 'AquivionSO3H-Form', 'GasDiffusionElectrode', 'NonFluorinatedMembrane', 'Neosepta-F', 'WorkingElectrode', 'Membrane', 'IndicatorElectrode', 'GoldElectrode', 'Electrode', 'Anode', 'CompositeElectrode', 'CarbonPasterElectrode', 'IntercalationElectrode', 'LithiumIntercalationElectrode', 'MembraneElectrodeAssembly', 'NRE211', 'NormalHydrogenElectrode', 'SimpleElectrode', 'PorousElectrode', 'PerfluorinatedMembrane', 'MetalElectrode', 'NRE212', 'BimetallicElectrode', 'CounterElectrode', 'PlatinumElectrode'

In [11]:
target_class_name = "Device"
node_lab='EMMOMatter'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
device_lst = list(all_subclasses)
print(device_lst)

['ElectrolyticElectrochemicalCell', 'ReplacementBattery', 'Battery', 'PilotBatteryCell', 'AcidicElectrochemicalCell', 'ElectrochemicalHalfcell', 'DryBatteryCell', 'FuelCell', 'HybridElectrochemicalCell', 'ElectrochemicalCapacitor', 'AlkalineElectrochemicalCell', 'ConcentrationElectrochemicalCell', 'NearneutralElectrochemicalCell', 'CoinCell', 'WestonStandardVoltageCell', 'MetalAirElectrodeElectrochemicalCell', 'ConversionElectrochemicalCell', 'ReserveBatteryCell', 'ThreeElectrodeElectrochemicalCell', 'MoltenSaltElectrochemicalCell', 'AqueousElectrochemicalCell', 'CylindricalCell', 'AlkalineZincAirElectrochemicalCell', 'PaperLinedCell', 'NonAqueousElectrochemicalCell', 'LihtiumElectrochemicalCell', 'IntercalationElectrochemicalCell', 'SolidElectrolyteElectrochemicalCell', 'BufferBattery', 'ElectrochemicalCell', 'OEMBattery', 'GalvanicElectrochemicalCell', 'LithiumCarbonMonofluorideElectrochemicalCell', 'PhotoelectrolyticElectrochemicalCell', 'BatteryCell', 'EmergencyBattery', 'PasteLine

In [12]:
target_class_name = "Subcomponent"
node_lab='EMMOMatter'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
subcomponent_lst = list(all_subclasses)
print(subcomponent_lst)

['BipolarPlate', 'StainlessSteelBasedMetalNitrideCoatedBiploarPlate', 'StainlessSteelBasedOrganicSelfAssembleMonoPolymerCoatedBipolarPlate', 'NickelBasedMetalNitrideCoatedBipolarPlate', 'CompositeBipolarPlate', 'ElectricCurrentCollector', 'CoinCellHousing', 'GasDiffusionLayer', 'StainlessSteelBasedDiamondLikeCarbonCoatedBipolarPlate', 'NickelBasedMetalBasedCoatedBipolarPlate', 'NickelBasedNobleMetalCoatedBipolarPlate', 'FuelCellStation', 'StainlessSteelBasedBiploarPlate', 'NonPorousGraphiteBipolarPlate', 'IonicSubcomponent', 'Binder', 'AluminiumBasedMetalNitrideCoatedBipolarPlate', 'ElectrochemicalSubcomponent', 'StainlessSteelBasedCarbonBasedCoatedBipolarPlate', 'CoolingPlate', 'CurrentCollector', 'StainlessSteelBasedConductivePolymerCoatedBipolarPlate', 'TitaniumBasedMtealNitrideCoatedBipolarPlate', 'BatteryCellContainer', 'StainlessSteelBasedNobleMetalCoatedBipolarPlate', 'ClothGDL', 'StructuralSubcomponent', 'SwagelokCellHousing', 'TitaniumBasedMetalCarbideCoatedBipolarPlate', 'Tit

In [13]:
target_class_name = "Material"
node_lab='EMMOMatter'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
material_lst = list(all_subclasses)
print(material_lst)

['ActiveMaterial', 'NaturalMaterial', 'EA30E', 'TernaryMaterial', 'Copolymer', 'AquivionSO3H-Form', 'LampBlack', 'F30E-HT', 'Ceramic', 'PTFE', 'AcetyleneBlack', 'ReactiveMaterial', 'Metal', 'Electrolyte', 'SolidElectrolyte', 'ElectrolyteSolution', 'CarbonSupportedPlatinumCatalyst', 'IsotropicMaterial', 'IntercalationMaterial', 'Polyepoxide', 'Nafion_D2021CS', 'AquivionD72-25BS', 'SupportingElectrolyte', 'Catalyst', 'HeterogenousMaterial', 'LiquidElectrolyte', 'GraphitizedFurnaceBlackCarbonSupportedPlatinumCatalyst', 'NearNeutralElectrolyte', 'NanoMaterial', 'EnergyMaterial', '800EWIonomer', 'ElectrochemicalIntercalationMaterial', 'FurnaceBlack', 'AlkalineElectrolyte', 'PlatinumCatalyst', '3DNanoMaterial', 'AqueousElectrolyte', 'ChannelBlack', 'Anolyte', 'SecundaryCatalyst', 'Nafion', 'ManufacturedMaterial', 'CarbonBlack', 'ConductingPolymerSupportedPlatinumCatalyst', 'TernaryCatalyst', 'ElectroCatalyst', '0DNanoMaterial', 'AquivionD98-25BS', 'LeclancheElectrolyte', 'HomoGenousMaterial'

In [14]:
target_class_name = "Manufacturing"
node_lab='EMMOProcess'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
manufacturing_lst = list(all_subclasses)
print(manufacturing_lst)

['AdditiveManufacturing', 'FuelCellAssembly', 'MEAManufacturing', 'CCLManufacturing', 'MaterialsManufacturing', 'CCMManufacturing', 'SamplePreparation', 'ComponentManufacturing', 'FuelCellManufacturing', 'Manufacturing', 'CatalystInkManufacturing', 'DeviceManufacturing', '5LayerMEAManufacturing']


In [15]:
target_class_name = "Imaging"
node_lab='EMMOProcess'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
imaging_lst = list(all_subclasses)
print(imaging_lst)

['AFMImaging', 'FIBImaging', 'DynamicLightScattering', 'SynchrotronRadiography', 'SynchrotronTomography', 'XRay', 'Scattering', 'Synchrotron', 'Microscopy', 'ElectronMicroscopy', 'IC_SEMImaging', 'ScanningProbeMicroscopy', 'Imaging', 'TEMImaging', 'SEMImaging', 'Neutron']


In [16]:
target_class_name = "Quantity"
node_lab='EMMOQuantity'
# driver = GraphDatabase.driver("bolt://localhost:7687", auth=("neo4j", "password"))
all_subclasses = get_all_subclasses(driver, target_class_name,node_lab)
all_subclasses.add(target_class_name)
quantity_lst = list(all_subclasses)
print(quantity_lst)

['ElectricFieldStrength', 'HydrodynamicDiameter', 'StandardAmountConcentration', 'TemperatureCoefficientOfTheOpenCircuitVolatage', 'FilledVolume', 'IonicCurrent', 'SpecificEnergy', 'ResidualActiveMass', 'DischargeCurrent', 'EnergyDensity', 'LinearMassDensity', 'DiffusionLimitedCurrent', 'RelativePermittivity', 'Black', 'SpecificEnthalpy', 'VolumeFlowRate', 'PureNumberQuantity', 'MolarHeatCapacity', 'PositiveCharge', 'MolarMass', 'Density', 'ModulusOfRigidity', 'ParticleConcentration', 'LuminousFlux', 'IsobaricHeastCapacity', 'ElectricResistance', 'AcceptorDensity', 'DimensionlessQuantity', 'AngularReciprocalLatticeVector', 'DiffuseLayerPotential', 'RelativePermeability', 'SpecificHeatCapacity', 'FermiEnergy', 'ThermodynamicGruenEisenParameter', 'CarrierLifetime', 'UpperCriticalMagneticFluxDensity', 'EnergyDensityOfStates', 'ActivationEnergy', 'Length', 'ExperimentalCapacity', 'AngularVelocity', 'Weight', 'DonorDensity', 'ServiceMass', 'MassConcentrationOfWater', 'SpecificVolumeVoid', '

In [17]:
my_list =quantity_lst
query = 'ElectricCurrent'
matching_strings = [item for item in my_list if query in item]
matching_strings

['LinearElectricCurrentDensity', 'ElectricCurrentDensity', 'ElectricCurrent']

# Creation of IS_A relationship of meta data (organizational data) to EMMO classes.

In [18]:
from sentence_transformers import SentenceTransformer, util
"""
This is a simple application for sentence embeddings: semantic search

We have a corpus with various sentences. Then, for a given query sentence,
we want to find the most similar sentence in this corpus.

This script outputs for various queries the top 3 most similar sentences in the corpus.
"""

def modify_strings(string):
    modified_lst=''
   

    if len(string.split())==1:
        print('string is ',string)
        if string.isupper()==True:         
            modified_lst=string
            
        elif string[0].isupper()==True and string[1].isupper()==False:
            modified_lst=string
#                 print(modified_string)

        else:
            modified_lst=string.title()

    else:     

        modified_string = ''.join(word.capitalize() for word in string.split())
        modified_lst=modified_string
    return modified_lst



def emmo_cls_onto(queries,emmo_lst,node_label,superclass_name,url):
    
    embedder = SentenceTransformer('all-MiniLM-L6-v2')

    # Corpus with example sentences
    cls_lst_onto = emmo_lst
    cls_str_onto = []
    for n in cls_lst_onto:
        cls_str_onto.append(str(n))
    #print(abc_str)


    corpus = cls_str_onto
    corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
    cls_dict ={}
    # Query sentences:
    # queries = class_list
    # Find the closest 3 sentences of the corpus for each query sentence based on cosine similarity
    top_k = min(3, len(corpus))

    subclass_name = queries
#         words = subclass_name.split()
#         capitalized_words = [word.capitalize() for word in words]
#         query = ''.join(capitalized_words)
    query = modify_strings(subclass_name)
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    c_lst=[]
    scr_lst=[]

    # We use cosine-similarity and torch.topk to find the highest 3 scores
    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)
#     print(top_results)
    print("\n\n======================\n\n")
    print("Query:", query)
    print("\nTop 3 most similar sentences in corpus:")

    for score, idx in zip(top_results[0], top_results[1]):
        print('c_lst ',corpus[idx], "(Score: {:.4f})".format(score))
        c_lst.append(corpus[idx].replace('mason.', ''))
        scr_lst.append("{:.4f}".format(score)) 
#         print('c_lst ',c_lst)


    with driver.session() as session:
        with session.begin_transaction() as tx:

            if float(scr_lst[0])>=0.9:
                spr_cls = c_lst[0]  
                count = create_relation(tx,subclass_name ,spr_cls, "IS_A", 'name', 'name',superclass_name,node_label)
                if count == 0:
                    create_relation(tx,subclass_name ,spr_cls, "IS_A", 'name', 'name',superclass_name,node_label)
                else:
                    pass
                
                print(query,c_lst[0],superclass_name)

#                 with session.begin_transaction() as tx:   
#                     if device not in ('','-','nan'):  
#                         properties = {'name': material}
#                         create_or_get_node(tx, "Material", properties)


            elif float(scr_lst[0])<0.6:     
                # Example usage

                # Create a Neo4j driver instance
                # Call the function to create the subclass and "IS_A" relationship
                url_mat='http://www.semanticweb.org/maxdreger/ontologies/2022/10/matter/#'
                create_subclass(driver, node_label, superclass_name, query,relation_emmo_is_a, url_mat)
                count = create_relation(tx, subclass_name ,query, "IS_A",  'name', 'name',superclass_name,node_label)
                if count == 0:
                    create_relation(tx, subclass_name ,query, "IS_A",  'name', 'name',superclass_name,node_label)
                else:
                    pass
                

                print('outside func ',superclass_name ,node_label, query, url_mat)
#            createNewClass(query,spr_cls)

In [19]:
node_label = 'EMMOMatter'
superclass_name='Device'
url_mat='http://www.semanticweb.org/maxdreger/ontologies/2022/10/matter/#'
# d_lst=['Catalysis','Lithium Battery','PEMFC','Fuel Cells','FUELCELLS','FuelCells','Vanadium redox flow battery','VanadiumRedoxFlowBattery','Proton Exchange Membrane Fuel Cell','LithiumBattery','Polymer electrolyte fuel cell','PolymerElectrolyteFuelCell']
cls_dict=emmo_cls_onto(device,device_lst,node_label,superclass_name,url_mat)


string is  ElectrochemicalCell




Query: ElectrochemicalCell

Top 3 most similar sentences in corpus:
c_lst  ElectrochemicalCell (Score: 1.0000)
c_lst  ElectrolyticElectrochemicalCell (Score: 0.9099)
c_lst  GalvanicElectrochemicalCell (Score: 0.8110)
ElectrochemicalCell ElectrochemicalCell Device


In [20]:
# with driver.session() as session:
#     with session.begin_transaction() as tx:
#         node_label = 'EMMOMatter'
#         superclass_name='Device'
#         spr_cls='FuelCell' 
#         query='FuelCells'
#         create_relation(tx,query ,spr_cls, "IS_A", 'name', 'name')

In [21]:
node_label = 'EMMOMatter'
superclass_name='Component'
# c_lst_=['Catalyst layer','Membrane electrode assembly','MEA','Battery Electrode']
cls_dict=emmo_cls_onto(component,component_lst,node_label,superclass_name,url_mat)


string is  WorkingElectrode




Query: WorkingElectrode

Top 3 most similar sentences in corpus:
c_lst  WorkingElectrode (Score: 1.0000)
c_lst  SimpleElectrode (Score: 0.7683)
c_lst  ReferenceElectrode (Score: 0.7392)
WorkingElectrode WorkingElectrode Component


In [22]:
node_label = 'EMMOMatter'
superclass_name='Subcomponent'
# s_lst_=['Catalyst','Gas diffusion electrode','Catalyst Layer']
cls_dict=emmo_cls_onto(subcomponent,subcomponent_lst,node_label,superclass_name,url_mat)



string is  ClothGDL




Query: ClothGDL

Top 3 most similar sentences in corpus:
c_lst  ClothGDL (Score: 1.0000)
c_lst  FeltGDL (Score: 0.5906)
c_lst  PaperGDL (Score: 0.5872)
ClothGDL ClothGDL Subcomponent


In [23]:
node_label = 'EMMOMatter'
superclass_name='Material'
# lst_=['30 wt% Pd/C','5 wt% Pd/C','Gas diffusion layer','0.2 mg/cm2 Pt MEA','platinum','Carbon felt electrode','NMC111 electrode','LixMnO2','Pt']
cls_dict=emmo_cls_onto(material,material_lst,node_label,superclass_name,url_mat)

string is  CarbonBlack




Query: CarbonBlack

Top 3 most similar sentences in corpus:
c_lst  CarbonBlack (Score: 1.0000)
c_lst  AcetyleneBlack (Score: 0.6581)
c_lst  ThermalBlack (Score: 0.6580)
CarbonBlack CarbonBlack Material


In [24]:
# print(Measurement_ID,Experiment_ID=Experiment_ID, Experiment_Title=Experiment_Title, Measurement_date=Measurement_date)

# Get the Characterization Data from file

In [25]:
# Find the row index where 'Organizational Data' is located
start_row = df[df.iloc[:, 0] == 'Characterization Data'].index[0]

# Find the row index where 'Characterization Data' is located
end_row = df[df.iloc[:, 0] == 'Sample preparation'].index[0]

# Extract the organizational data rows
Characterization_measurement_data = df.iloc[start_row + 1: end_row]

# Remove trailing spaces from attribute names
Characterization_measurement_data.loc[:, 0] = Characterization_measurement_data.loc[:, 0].str.strip()

# Extract attributes and values
attributes = Characterization_measurement_data.iloc[:, 0].tolist()
values = Characterization_measurement_data.iloc[:, 1].tolist()

# Create a dictionary of attributes and values
data_dict = dict(zip(attributes, values))

# Generate variable names dynamically and assign values
for attribute, value in zip(attributes, values):
    var_name = attribute.lower().replace(' ', '_').replace('-', '').replace(':', '')
    globals()[var_name] = value

# Print the variables to verify they contain the expected values
for attribute, value in zip(attributes, values):
    var_name = attribute.lower().replace(' ', '_').replace('-', '').replace(':', '')
    print(f"{var_name}: {globals()[var_name]}")


measurement_type: SEMImaging
specimen: homogeneous powder


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  Characterization_measurement_data.loc[:, 0] = Characterization_measurement_data.loc[:, 0].str.strip()


In [26]:
measurement_type

'SEMImaging'

In [27]:
specimen

'homogeneous powder'

In [28]:
def emmo_cls_mapping(queries,emmo_lst,node_label,superclass_name):
    
    embedder = SentenceTransformer('all-MiniLM-L6-v2')

    # Corpus with example sentences
    cls_lst_onto = emmo_lst
    cls_str_onto = []
    for n in cls_lst_onto:
        cls_str_onto.append(str(n))
    #print(abc_str)


    corpus = cls_str_onto
    corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
    cls_dict ={}
    # Query sentences:
    # queries = class_list
    # Find the closest 3 sentences of the corpus for each query sentence based on cosine similarity
    top_k = min(3, len(corpus))

    subclass_name = queries
#         words = subclass_name.split()
#         capitalized_words = [word.capitalize() for word in words]
#         query = ''.join(capitalized_words)
    query = subclass_name
    query_embedding = embedder.encode(query, convert_to_tensor=True)

    c_lst=[]
    scr_lst=[]

    # We use cosine-similarity and torch.topk to find the highest 3 scores
    cos_scores = util.cos_sim(query_embedding, corpus_embeddings)[0]
    top_results = torch.topk(cos_scores, k=top_k)
#     print(top_results)
    print("\n\n======================\n\n")
    print("Query:", query)
    print("\nTop 3 most similar sentences in corpus:")

    for score, idx in zip(top_results[0], top_results[1]):
        print('c_lst ',corpus[idx], "(Score: {:.4f})".format(score))
        c_lst.append(corpus[idx].replace('mason.', ''))
        scr_lst.append("{:.4f}".format(score)) 
#         print('c_lst ',c_lst)


    with driver.session() as session:
        with session.begin_transaction() as tx:

            if float(scr_lst[0])>=0.9:
                spr_cls = c_lst[0]  
                return spr_cls
    


In [29]:
name_property='name'
is_a_rel='IS_A'

with driver.session() as session:
    with session.begin_transaction() as tx:
        if measurement_type not in ('','-','nan'):    
            properties = {'name': measurement_type}
            create_or_get_node(tx, "MeasurementType",'name', properties)


        count = create_relation(tx, measurement_type, measurement_type, is_a_rel, name_property, name_property,'MeasurementType', 'EMMOProcess')

        if count == 0:
        #                 print("Relationship IN doesn't exist, creating it...")
            create_relation(tx, institution_, country_, is_a_rel, name_property, name_property,'MeasurementType', 'EMMOProcess')
        else:
            pass
        
        
        count = create_relation(tx, measurementid, measurement_type, 'HAS_MEASUREMENT_TYPE', name_property, name_property,'Measurement', 'MeasurementType')

        if count == 0:
        #                 print("Relationship IN doesn't exist, creating it...")
            create_relation(tx, institution_, country_, 'HAS_MEASUREMENT_TYPE', name_property, name_property,'MeasurementType', 'EMMOProcess')
        else:
            pass

In [30]:
# Find the row index where 'Organizational Data' is located

# Find the row index where 'Sample preparation' is located
# Find the row index where 'Sample preparation' is located
start_row = 'Sample preparation'
end_row = 'Characterization environment'

start_index = df[df.iloc[:, 0] == start_row].index[0]
end_index = df[df.iloc[:, 0] == end_row].index[0]

# Extract the desired table based on the row indices
table_data = df.iloc[start_index+1:end_index]

# Assign the first row as column names
table_data.columns = table_data.iloc[0]
table_data = table_data.iloc[1:].reset_index(drop=True)

# Remove the unwanted numerical column
table_data = table_data.loc[:, ~table_data.columns.astype(str).str.isnumeric()]
table_data


23,Step,Precursor,Amount,Technique,Condition,Target,Amount.1,NaN
0,1,"CarbonBlack, WorkingElectrode, AcidicElectroch...","41 mm, 34 mm, 24 mm, 87mm",SamplePreparation,"Duration:24 hour, Temperature: 20 °C",F30E,5 mm,
1,2,"F30E, StainlessSteelBasedCarbonBasedCoatedBipo...","5mm, 25 mm, 56 mm",MaterialsManufacturing,"Temperature: 50 °C, Duration: 10 min, Electric...",PTFESubstrate,6 mm,
2,3,"PTFESubstrate, PaperLinedCell, BatteryCell","6mm, 3 mm, 33 mm",5LayerMEAManufacturing,Duration:24 hour,Polyepoxide,26 mm,
3,4,"Polyepoxide, IonicLiquidElectrolyte, NafionMem...","26mm, 2 mm, 8 mm",AdditiveManufacturing,Duration:34 min,ClothGDL,16 mm,
4,5,"ClothGDL, ElectricCurrentCollector","16mm, 30 mm",FuelCellManufacturing,Temperature:12 mm,Sample,7 mm,


In [31]:
# Loop through each row and print the column values
for index, row in table_data.iterrows():
    print(f"Row {index+1}:")
    for column, value in row.items():
        if not pd.isna(value):
            print(f"{column}: {value}")
    print()

Row 1:
Step: 1
Precursor: CarbonBlack, WorkingElectrode, AcidicElectrochemicalCell, Cathode
Amount: 41 mm, 34 mm, 24 mm, 87mm
Technique: SamplePreparation
Condition: Duration:24 hour, Temperature: 20 °C
Target: F30E
Amount: 5 mm

Row 2:
Step: 2
Precursor: F30E, StainlessSteelBasedCarbonBasedCoatedBipolarPlate, LampBlack
Amount: 5mm, 25 mm, 56 mm
Technique: MaterialsManufacturing
Condition: Temperature: 50 °C, Duration: 10 min, ElectricCurrent:250 pA, Energy:5 keV
Target: PTFESubstrate
Amount: 6 mm

Row 3:
Step: 3
Precursor: PTFESubstrate, PaperLinedCell, BatteryCell
Amount: 6mm, 3 mm, 33 mm
Technique: 5LayerMEAManufacturing
Condition: Duration:24 hour
Target: Polyepoxide
Amount: 26 mm

Row 4:
Step: 4
Precursor: Polyepoxide, IonicLiquidElectrolyte, NafionMembrane
Amount: 26mm, 2 mm, 8 mm
Technique: AdditiveManufacturing
Condition: Duration:34 min
Target: ClothGDL
Amount: 16 mm

Row 5:
Step: 5
Precursor: ClothGDL, ElectricCurrentCollector
Amount: 16mm, 30 mm
Technique: FuelCellManufactur

In [32]:
sample_name = "SamplePreparation_" + measurementid
with driver.session() as session:
    with session.begin_transaction() as tx:
        properties = {'name': sample_name}
        create_or_get_node(tx, "SamplePreparation",'name', properties)
        create_relation(tx, measurementid, sample_name, 'HAS_SAMPLEPREPARATION', 'name', 'name', 'Measurement', 'SamplePreparation')

In [33]:
def create_SamplePrepsubclass(tx, node_label, superclass_name, subclass_name):
    result = tx.run(
        "MATCH (superclass:" + node_label + ") "
        "WHERE superclass.name = $superclassName "
        "CREATE (subclass:" + node_label + " {name: $subclassName})"
        "MERGE (subclass)-[:IS_A]->(superclass) "
        "RETURN subclass",
        superclassName=superclass_name,
        subclassName=subclass_name
    )
    return result.single()


def create_subclass(driver, node_label, superclass_name, subclass_name,relationship_name, url):
    with driver.session() as session:
        result = session.run(
            "MATCH (superclass:" + node_label + ") "
            "WHERE superclass.name = $superclassName "
            "MERGE (subclass:" + node_label + " { name: $subclassName }) "
            "ON CREATE SET subclass.url = $url "
            "MERGE (subclass)-[:"+relationship_name+"]->(superclass) "
            "RETURN subclass, superclass",
            superclassName=superclass_name,
            subclassName=subclass_name,
            url=url + subclass_name  # Append subclass_name to the URL
        )

In [34]:
# relation_has_sampleprep = 'HAS_SAMPLEPREPARATION'
# url_mat = 'http://www.semanticweb.org/maxdreger/ontologies/2022/10/SamplePreparation/#'

# with driver.session() as session:
#     for index, row in table_data.iterrows():
#         step_number = row['Step']
#         sample_name = "SamplePreparation_" + measurementid
#         step_name = 'Step_' + str(step_number) + '_' + measurementid

#         create_subclass(driver, "SamplePreparation", sample_name, step_name, relation_has_sampleprep, url_mat)
#         with session.begin_transaction() as tx:
#             create_relation(tx, measurementid, sample_name, 'HAS_SAMPLEPREPARATION', 'name', 'name', 'Measurement', 'SamplePreparation')


In [35]:

# with driver.session() as session:
#     with session.begin_transaction() as tx:
#         for i in range(1, h):
#             create_relation(tx, i, i + 1, "FOLLOWED_BY", 'name', 'name', 'SamplePreparation', 'SamplePreparation')
            
#         tx.commit()

In [36]:
# # Define the column-to-variable mapping
# column_mapping = {
#     'Step': 'step',
#     'Precursor': 'precursor',
#     'Amount': 'amount',
#     'Technique': 'technique',
#     'Condition': 'condition',
#     'Target': 'target'
# }

# # Define variables
# variables = {variable: '' for variable in column_mapping.values()}

# # Loop through each row and store the column values dynamically
# for index, row in table_data.iterrows():
#     print(f"Row {index+1}:")
#     amounts = []
#     for column, value in row.items():
#         if not pd.isna(value):
#             if column == 'Amount':
#                 amounts.append(value)
#             else:
#                 variable_name = column_mapping.get(column)
#                 if variable_name:
#                     variables[variable_name] = value
    
#     # Store the amounts separately
#     amount1, amount2 = amounts[:2]
    
#     # Access the values from the variables dictionary
#     step = variables['step']
#     precursor = variables['precursor']
#     technique = variables['technique']
#     condition = variables['condition']
#     target = variables['target']
    
#     print(step, precursor, amount1, technique, condition, target, amount2)
    
#     print()


# get materials again-

In [37]:
node_label_mat = 'EMMOMatter'
node_label_tech ='EMMOProcess'
node_label_quantity ='EMMOQuantity'
superclass_step='SamplePreparation'
sample_name = "SamplePreparation_" + measurementid
superclass_mat = 'Material'
superclass_comp = 'Component'
superclass_subcomp = 'Subcomponent'
superclass_device = 'Device'
superclass_tech='Manufacturing'
superclass_para='Parameter'
is_manufacturing_property='HAS_FLOAT_VALUE'
has_manufacturing_input='HAS_MANUFACTURING_INPUT'
is_manufacturing_input='IS_MANUFACTURING_INPUT'
is_manufacturing_output='IS_MANUFACTURING_OUTPUT'
has_parameter_rel='HAS_PARAMETER'


In [38]:
# prec='BatteryCell'
# superclass_val = superclass_device
# emmo_cls_mapping(prec, device_lst, node_label_mat, superclass_val)

In [39]:
def create_rel_property(tx, node1, node2, relation, property1, property2, label1, label2, properties):
    query = f"MATCH (n1:{label1}), (n2:{label2}) WHERE n1.{property1} = $name1 AND n2.{property2} = $name2 " \
            f"MERGE (n1)-[r:{relation}]->(n2) " \
            f"SET r += $properties " \
            f"RETURN count(r)"
    result = tx.run(query, name1=node1, name2=node2, properties=properties)
    count = result.single()[0]
    return count


In [40]:
import re

def extract_numeric_value(item):
    pattern = r'(\d+(\.\d+)?)'  # Regex pattern to match numeric values
    
    matches = re.findall(pattern, item)
    if matches:
        numeric_value = float(matches[0][0])
        return numeric_value
    else:
        return None
    


def extract_parameter_names(item):
    pattern = r'(\w+):'  # Regex pattern to match variable names

    variable_names = []
    match = re.search(pattern, item)
    if match:
        variable_name = match.group(1)
        variable_names.append(variable_name)

    if len(variable_names) == 1:
        return variable_names[0]
    else:
        return variable_names


In [41]:
import random
from datetime import datetime

def generate_random_id():
    # Get the current date and time
    current_datetime = datetime.now()
    
    # Format the date and time as desired (e.g., YYYYMMDD_HHMMSS)
    datetime_string = current_datetime.strftime("%Y%m%d_%H%M%S")
    
    # Generate a random number or string
    random_part = str(random.randint(1000, 999999))
    
    # Combine the date, time, and random part to create the ID
    random_id = datetime_string + random_part
    
    return random_id


In [42]:
generate_random_id()

'20230621_161249991011'

In [43]:
# Define the column-to-variable mapping
column_mapping = {
    'Step': 'step',
    'Precursor': 'precursor',
    'Amount': 'amount',
    'Technique': 'technique',
    'Condition': 'condition',
    'Target': 'target'
}

# Define variables
variables = {variable: '' for variable in column_mapping.values()}
previous_node=sample_name



# Loop through each row and store the column values dynamically
for index, row in table_data.iterrows():
    print(f"Row {index+1}:")
    amounts = []
    for column, value in row.items():
        if not pd.isna(value):
            if column == 'Amount':
                amounts.append(value)
            else:
                variable_name = column_mapping.get(column)
                if variable_name:
                    variables[variable_name] = value
    
    # Store the amounts separately
    amount1, amount2 = amounts[:2]
    print(amount1)
    # Access the values from the variables dictionary
    step = variables['step']
    step_name='Step_'+str(step)+'_'+measurementid
    precursor = variables['precursor']
    technique = variables['technique']
    condition = variables['condition']
    target = variables['target']
    
    precursor_list = precursor.split(', ')
    amount1_list= amount1.split(', ')
    
    with driver.session() as session:
        with session.begin_transaction() as tx:
            
            if technique not in ('','-','nan'):
                properties = {name_property: technique}
                create_or_get_node_smp(tx,superclass_tech,name_property, properties)
                        
        

    print(precursor_list)        
    superclass_val=''                
    
#     for prec in precursor_list:
    for prec, amt in zip(precursor_list, amount1_list):
        print('This is the prec: ', prec, ' This is the amount: ', amt)
        with driver.session() as session:
            with session.begin_transaction() as tx:
                superclass_val = superclass_mat
                prec_val = emmo_cls_mapping(prec, material_lst, node_label_mat, superclass_val)
                if prec_val is not None:
                    properties = {name_property: prec}
                    create_or_get_node_smp(tx, superclass_step, name_property, properties)
                    create_relation(tx,prec ,prec_val, is_a_rel, name_property, name_property,superclass_step,node_label_mat)
                    print(prec_val, ' its here ', superclass_val)
                else:
                    superclass_val = superclass_comp
                    prec_val = emmo_cls_mapping(prec, component_lst, node_label_mat, superclass_val)
                    if prec_val is not None:
                        properties = {name_property: prec}
                        create_or_get_node_smp(tx, superclass_step, name_property, properties)
                        create_relation(tx,prec ,prec_val, is_a_rel,name_property, name_property,superclass_step,node_label_mat)
                        print(prec_val, ' its here ', superclass_val)
                    else:
                        superclass_val = superclass_device
                        prec_val = emmo_cls_mapping(prec, device_lst, node_label_mat, superclass_val)
                        if prec_val is not None:
                            properties = {name_property: prec}
                            create_or_get_node_smp(tx, superclass_step, name_property, properties)
                            create_relation(tx,prec ,prec_val, is_a_rel, name_property,name_property,superclass_step,node_label_mat)
                            print(prec_val,' ',prec, ' its here ', superclass_val)
                        else:
                            superclass_val = superclass_subcomp
                            prec_val = emmo_cls_mapping(prec, subcomponent_lst, node_label_mat, superclass_val)
                            if prec_val is not None:
                                properties = {name_property: prec}
                                create_or_get_node_smp(tx, superclass_step, name_property, properties)
                                create_relation(tx,prec ,prec_val, is_a_rel,name_property, name_property,superclass_step,node_label_mat)
                                print(prec_val, ' its here ', superclass_val)
                            else:
                                pass
        
                create_relation(tx, previous_node , prec_val, has_manufacturing_input, name_property, name_property,superclass_step,superclass_step)
                numeric_amt1=extract_numeric_value(amt)
                properties = {
                    is_manufacturing_property: numeric_amt1
                }
                create_rel_property(tx, prec_val , technique, is_manufacturing_input, name_property, name_property,superclass_step,superclass_tech,properties)

                
                
                
    tech_manuf=emmo_cls_mapping(technique,manufacturing_lst,node_label_tech, superclass_mat)   
    
    
    superclass_val = superclass_mat
    tar_var = emmo_cls_mapping(target, material_lst, node_label_mat, superclass_val)
    with driver.session() as session:
        with session.begin_transaction() as tx:
            if tar_var is not None:
                properties = {name_property: target}
                create_or_get_node_smp(tx, superclass_step,name_property, properties)
                create_relation(tx,target ,target, is_a_rel,name_property, name_property,superclass_step,node_label_mat)
            else:
                superclass_val = superclass_comp
                tar_var = emmo_cls_mapping(target, component_lst, node_label_mat, superclass_val)
                if tar_var is not None:
                    properties = {name_property: target}
                    create_or_get_node_smp(tx, superclass_step, name_property, properties)
                    create_relation(tx,target ,tar_var, is_a_rel, name_property, name_property,superclass_step,node_label_mat)
                else:
                    superclass_val = superclass_device
                    tar_var = emmo_cls_mapping(target, device_lst, node_label_mat, superclass_val)
                    if tar_var is not None:
                        properties = {name_property: target}
                        create_or_get_node_smp(tx, superclass_step, name_property, properties)
                        create_relation(tx,target ,tar_var,is_a_rel, name_property, name_property,superclass_step,node_label_mat)
                    else:
                        superclass_val = superclass_subcomp
                        tar_var = emmo_cls_mapping(target, subcomponent_lst, node_label_mat, superclass_val)
                        if tar_var is not None:
                            properties = {name_property: target}
                            create_or_get_node_smp(tx, superclass_step, name_property, properties)
                            create_relation(tx,target ,tar_var, is_a_rel, name_property, name_property,superclass_step,node_label_mat)
                        else:
                            pass
            numeric_amt2=extract_numeric_value(amount2)
            properties = {
                    is_manufacturing_property: numeric_amt2
                }
            create_rel_property(tx, technique ,tar_var , is_manufacturing_output, name_property ,name_property,superclass_tech,superclass_step,properties )
            create_relation(tx,technique ,tech_manuf, is_a_rel, name_property, name_property,superclass_tech,node_label_tech)
    
    
    condition_list = condition.split(', ') 
    for condition_ in condition_list:
        with driver.session() as session:
            with session.begin_transaction() as tx:
                 if condition_ not in ('','-','nan'):
                    condition=extract_parameter_names(condition_)
                    prop_name=condition+'_id'
                    unique_id=generate_random_id()
                    properties = {name_property: condition,'measurementid':measurementid, 'stepid':step, prop_name:  unique_id}
                    create_or_get_node_smp(tx, superclass_para,'prop_name', properties)       
                    numeric_condition=extract_numeric_value(condition_)
                    properties = {is_manufacturing_property: numeric_condition}
                    create_relation(tx, unique_id, condition, is_a_rel, prop_name, name_property,superclass_para,node_label_quantity)
                    create_rel_property(tx, tech_manuf, unique_id, has_parameter_rel, name_property, prop_name ,superclass_tech,superclass_para, properties)
    previous_node= tar_var          
#     print(step, precursor, mat, amount1, technique, condition, target, amount2)
    
#     print()

# create_relation                   (tx, orcid_, country_, 'IN', 'orcid', 'name','Researcher', 'Country')
               


Row 1:
41 mm, 34 mm, 24 mm, 87mm
['CarbonBlack', 'WorkingElectrode', 'AcidicElectrochemicalCell', 'Cathode']
This is the prec:  CarbonBlack  This is the amount:  41 mm




Query: CarbonBlack

Top 3 most similar sentences in corpus:
c_lst  CarbonBlack (Score: 1.0000)
c_lst  AcetyleneBlack (Score: 0.6581)
c_lst  ThermalBlack (Score: 0.6580)
CarbonBlack  its here  Material
This is the prec:  WorkingElectrode  This is the amount:  34 mm




Query: WorkingElectrode

Top 3 most similar sentences in corpus:
c_lst  ActiveMaterial (Score: 0.3826)
c_lst  FunctionalMaterial (Score: 0.3433)
c_lst  ManufacturedMaterial (Score: 0.3277)




Query: WorkingElectrode

Top 3 most similar sentences in corpus:
c_lst  WorkingElectrode (Score: 1.0000)
c_lst  SimpleElectrode (Score: 0.7683)
c_lst  ReferenceElectrode (Score: 0.7392)
WorkingElectrode  its here  Component
This is the prec:  AcidicElectrochemicalCell  This is the amount:  24 mm




Query: AcidicElectrochemicalCell

Top 3 most similar sentences in

In [44]:
# # Define the column-to-variable mapping
# column_mapping = {
#     'Step': 'step',
#     'Precursor': 'precursor',
#     'Amount': 'amount',
#     'Technique': 'technique',
#     'Condition': 'condition',
#     'Target': 'target'
# }

# # Define variables
# variables = {variable: '' for variable in column_mapping.values()}

# # Loop through each row and store the column values dynamically
# for index, row in table_data.iterrows():
#     print(f"Row {index+1}:")
#     amounts = []
#     for column, value in row.items():
#         if not pd.isna(value):
#             if column == 'Amount':
#                 amounts.append(value)
#             else:
#                 variable_name = column_mapping.get(column)
#                 if variable_name:
#                     variables[variable_name] = value
    
#     # Store the amounts separately
#     amount1, amount2 = amounts[:2]
    
#     # Access the values from the variables dictionary
#     step = variables['step']
#     step_name='Step_'+str(step)+'_'+measurementid
#     precursor = variables['precursor']
#     technique = variables['technique']
#     condition = variables['condition']
#     target = variables['target']
    
#     precursor_list = precursor.split(', ')
    
    
#     with driver.session() as session:
#         with session.begin_transaction() as tx:
#             condition_list = condition.split(', ') 
#             for condition_ in condition_list:
#                 if condition_ not in ('','-','nan'):
#                     properties = {'name': condition_}
#                     create_or_get_node(tx, "Parameter",'name', properties)

#             if technique not in ('','-','nan'):
#                 properties = {'name': technique}
#                 create_or_get_node(tx, "Manufacturing",'name', properties)
                        
# #             if amount1 not in ('','-','nan'):
# #                 properties = {'name': amount1}
# #                 create_or_get_node(tx, "Parameter",'name', properties)
                
# #             if amount2 not in ('','-','nan'):
# #                 properties = {'name': amount2}
# #                 create_or_get_node(tx, "Parameter",'name', properties)
        

#     print(precursor_list)        
#     superclass_val=''                
#     for prec in precursor_list:
#         print('This is the prec: ', prec)
#         with driver.session() as session:
#             with session.begin_transaction() as tx:
#                 superclass_val = superclass_mat
#                 prec_val = emmo_cls_mapping(prec, material_lst, node_label_mat, superclass_val)
#                 if prec_val is not None:
#                     properties = {'name': prec}
#                     create_or_get_node(tx, superclass_val, 'name', properties)
#                     create_relation(tx,prec ,prec, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                     print(prec_val, ' its here ', superclass_val)
#                 else:
#                     superclass_val = superclass_comp
#                     prec_val = emmo_cls_mapping(prec, component_lst, node_label_mat, superclass_val)
#                     if prec_val is not None:
#                         properties = {'name': prec}
#                         create_or_get_node(tx, superclass_val, 'name', properties)
#                         create_relation(tx,prec ,prec, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                         print(prec_val, ' its here ', superclass_val)
#                     else:
#                         superclass_val = superclass_device
#                         prec_val = emmo_cls_mapping(prec, device_lst, node_label_mat, superclass_val)
#                         if prec_val is not None:
#                             properties = {'name': prec}
#                             create_or_get_node(tx, superclass_val, 'name', properties)
#                             create_relation(tx,prec ,prec, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                             print(prec_val,' ',prec, ' its here ', superclass_val)
#                         else:
#                             superclass_val = superclass_subcomp
#                             prec_val = emmo_cls_mapping(prec, subcomponent_lst, node_label_mat, superclass_val)
#                             if prec_val is not None:
#                                 properties = {'name': prec}
#                                 create_or_get_node(tx, superclass_val, 'name', properties)
#                                 create_relation(tx,prec ,prec, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                                 print(prec_val, ' its here ', superclass_val)
#                             else:
#                                 pass
        
#                 create_relation(tx, step_name , prec_val, 'HAS_PART', 'name', 'name',superclass_step,superclass_val)
#                 print('Created relationship: ',step_name ,'HAS_PART', prec_val)
#                 create_relation(tx, prec_val, amount1, 'HAS_PARAMETER', 'name', 'name',superclass_val,superclass_para)
#                 print('Created relationship: ',prec_val ,'HAS_PARAMETER', amount1)
#                 create_relation(tx, prec_val , technique, 'IS_MANUFACTURING_INPUT', 'name', 'name',superclass_val,superclass_tech)
#                 print('Created relationship: ',prec_val ,'IS_MANUFACTURING_INPUT', technique)
                
                
# #         with driver.session() as session:
# #             with session.begin_transaction() as tx:
                
                
                
#     tech_manuf=emmo_cls_mapping(technique,manufacturing_lst,node_label_tech,superclass_tech)   
    
    
#     superclass_val = superclass_mat
#     tar_var = emmo_cls_mapping(target, material_lst, node_label_mat, superclass_val)
#     with driver.session() as session:
#         with session.begin_transaction() as tx:
#             if tar_var is not None:
#                 properties = {'name': target}
#                 create_or_get_node(tx, superclass_val, 'name', properties)
#                 create_relation(tx,target ,target, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#             else:
#                 superclass_val = superclass_comp
#                 tar_var = emmo_cls_mapping(target, component_lst, node_label_mat, superclass_val)
#                 if tar_var is not None:
#                     properties = {'name': target}
#                     create_or_get_node(tx, superclass_val, 'name', properties)
#                     create_relation(tx,target ,target, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                 else:
#                     superclass_val = superclass_device
#                     tar_var = emmo_cls_mapping(target, device_lst, node_label_mat, superclass_val)
#                     if tar_var is not None:
#                         properties = {'name': target}
#                         create_or_get_node(tx, superclass_val, 'name', properties)
#                         create_relation(tx,target ,target, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                     else:
#                         superclass_val = superclass_subcomp
#                         tar_var = emmo_cls_mapping(target, subcomponent_lst, node_label_mat, superclass_val)
#                         if tar_var is not None:
#                             properties = {'name': target}
#                             create_or_get_node(tx, superclass_val, 'name', properties)
#                             create_relation(tx,target ,target, "IS_A", 'name', 'name',superclass_val,node_label_mat)
#                         else:
#                             pass

#             create_relation(tx, technique ,tar_var , 'IS_MANUFACTURING_OUTPUT', 'name', 'name',superclass_tech,superclass_val )
#             create_relation(tx, tar_var, amount2, 'HAS_PARAMETER', 'name', 'name',superclass_val,superclass_para)
            
    
#     for condition_ in condition_list:
#         with driver.session() as session:
#             with session.begin_transaction() as tx:
#                 create_relation(tx, technique, condition_, 'HAS_PARAMETER', 'name', 'name',superclass_tech,superclass_para)
                
# #     print(step, precursor, mat, amount1, technique, condition, target, amount2)
    
# #     print()
