## Query structure mirrors pif structure
* You can query subsystems, processing steps, properties, conditions of properties etc. by creating a query that matches the object hierarchy to the section you want to query


In [1]:
from citrination_client import CitrinationClient
from citrination_client import PifSystemReturningQuery, PifSystemQuery, FieldQuery, ValueQuery
from citrination_client import PropertyQuery, DataQuery, DatasetQuery, ChemicalFieldQuery, ChemicalFilter, Filter

from os import environ
from pypif import pif

client = CitrinationClient(environ['CITRINATION_API_KEY'], 'https://citrination.com')

## Flattening the PIF structure

extract_as creates a flattened dictionary structure mapping user supplied keys to objects in the PIF that match within the query

extract_all is an option for extract_as that pulls a list of all objects at the level in the hierarchy that match the query

Let's search for the "Enthalpy of Formation" property:

In [5]:
dataset_id = 150675
query_size = 10

query = PifSystemReturningQuery(
            size=query_size,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(dataset_id))]
                ),
                chemical_formula=ChemicalFieldQuery(
                    extract_as='formula',
                    filter=ChemicalFilter(
                            equal='CdTe')),
                system=PifSystemQuery(
                    properties=PropertyQuery(
                        extract_all=True,
                        name=FieldQuery(
                            filter=[Filter(equal="Enthalpy of Formation")]),
                        value=FieldQuery(
                            extract_as="formation_enthalpy",
                            extract_all=True)
                    )
                )
            ))

query_result = client.search.pif_search(query)
print( "{} hits\n".format(query_result.total_num_hits))
print( "Extracted fields:")
for i in range(2):
    print( pif.dumps(query_result.hits[i].extracted, indent=2))

69640 hits

Extracted fields:
{
  "formation_enthalpy": [
    "0.0"
  ]
}
{
  "formation_enthalpy": [
    "0.1074050600000005"
  ]
}


## Chemical formula search
Citrine has developed specialized search functionality specifically for chemical formulas. The analyser parses the search string and recognizes chemical entities such as elements and stoichiometries to find chemically relevant results.

1. You can use a `simple_chemical_search` with a simple search string like "PbSe" or,
2. You can structure a `PifSystemReturningQuery` with more detailed elemental and stoichiometric strings.

Let's search over the Materials Project dataset using ```mp_dataset_id = 150675``` as the dataset_id.

In [22]:
mp_dataset_id = 150675
query = client.search.generate_simple_chemical_query(chemical_formula="PbSe", include_datasets=[mp_dataset_id])
search_result = client.search.pif_search(query)
print( "{} hits \n".format(search_result.total_num_hits))

for i in range(search_result.total_num_hits):
    print( pif.dumps(search_result.hits[i].extracted))
    print("\n")

4 hits 

{"property_units": "g/cm$^3$", "name": "Lead selenide - HP", "property_value": "8.767862185821011", "chemical_formula": "PbSe", "property_name": "Density"}


{"property_units": "g/cm$^3$", "name": "Lead selenide", "property_value": "4.062929739915243", "chemical_formula": "PbSe", "property_name": "Density"}


{"property_units": "g/cm$^3$", "name": "Clausthalite", "property_value": "7.872521935843158", "chemical_formula": "PbSe", "property_name": "Density"}


{"property_units": "g/cm$^3$", "name": "Nickel lead selenide (3/2/2)", "property_value": "9.207453481307569", "chemical_formula": "Ni3(PbSe)2", "property_name": "Density"}




Now let's explore the different filters we can apply to chemical formulas.

In [27]:

query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_all=True,
                        extract_as='formula'),
                    )
                )
            )

search_result = client.search.pif_search(query)
print( "{} hits".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))
    

query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_all=True,
                        extract_as='formula',
                        filter=ChemicalFilter(
                            equal='Ga')
                    )
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))


query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_all=True,
                        extract_as='formula',
                        filter=ChemicalFilter(
                            equal='?x?y?z')
                    )
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))


query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_all=True,
                        extract_as='formula',
                        filter=ChemicalFilter(
                            equal='?xOy')
                    )
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))
    

query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_all=True,
                        extract_as='formula',
                        filter=ChemicalFilter(
                            equal='?1O1')
                    )
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))

69640 hits
{"formula": ["Na(FeO2)2"]}
{"formula": ["CaV(SiO3)2"]}
{"formula": ["LuAgSn"]}
{"formula": ["La4C2Cl5"]}
{"formula": ["Li3V3(BO5)2"]}
23 hits
{"formula": ["Ga(MoSe2)4"]}
{"formula": ["Ga(MoSe2)4"]}
{"formula": ["Ga"]}
{"formula": ["GaS"]}
{"formula": ["Ga"]}
33017 hits
{"formula": ["Pr2NCl3"]}
{"formula": ["K7TaAs4"]}
{"formula": ["Er(MnGe)2"]}
{"formula": ["GdMnGe"]}
{"formula": ["Rb(MoS)3"]}
1577 hits
{"formula": ["CuO"]}
{"formula": ["ZrO2"]}
{"formula": ["SiO2"]}
{"formula": ["Fe3O4"]}
{"formula": ["SiO2"]}
121 hits
{"formula": ["CuO"]}
{"formula": ["NbO"]}
{"formula": ["FeO"]}
{"formula": ["RbO"]}
{"formula": ["TiO"]}


## Logical operations

We can also include the following logical operations on the filters: `SHOULD, MUST, OPTIONAL, MUST_NOT`

In [28]:
query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=[
                        ChemicalFieldQuery(
                            extract_as='formula',
                            filter=ChemicalFilter(
                                equal='?1O1'),
                            logic="MUST_NOT"),
                        ChemicalFieldQuery(
                        extract_as='formula',
                            filter=ChemicalFilter(
                                equal='?xOy')
                        )]
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits\n".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted))


    
query = PifSystemReturningQuery(
            size=5,
            random_results=True,
            query=DataQuery(
                dataset=DatasetQuery(
                    id=[Filter(equal=str(mp_dataset_id))]
                ),
                system=PifSystemQuery(
                    chemical_formula=ChemicalFieldQuery(
                        extract_as='formula'
                    ),
                    properties=[
                        PropertyQuery(
                            name=FieldQuery(
                                filter=[Filter(equal="Enthalpy of Formation")]),
                            value=FieldQuery(
                                extract_as="H_f",
                                logic="MUST")
                        ),
                        PropertyQuery(
                            name=FieldQuery(
                                filter=[Filter(equal="Band Gap")]),
                            value=FieldQuery(
                                filter=[Filter(min=1E-5)],
                                extract_as="bandgap",
                                logic="MUST")
                        ),
                         PropertyQuery(
                            name=FieldQuery(
                                filter=[Filter(equal="Crystal System")]),
                            value=FieldQuery(
                                extract_as="crystal system",
                                logic="SHOULD")
                        )]
                )
            )
        )

search_result = client.search.pif_search(query)
print( "{} hits\n".format(search_result.total_num_hits))
for i in range(5):
    print( pif.dumps(search_result.hits[i].extracted, indent=2))

1456 hits

{"formula": "Tb5O9"}
{"formula": "TiO2"}
{"formula": "HoO3"}
{"formula": "VO2"}
{"formula": "P2O3"}
69640 hits

{
  "crystal system": "trigonal",
  "bandgap": "3.5443000000000002",
  "formula": "Si2Mo4P6O25",
  "H_f": "-2.6705706557609394"
}
{
  "crystal system": "triclinic",
  "bandgap": "0.6514000000000006",
  "formula": "KSbO3",
  "H_f": "-1.39231271975"
}
{
  "crystal system": "monoclinic",
  "bandgap": "2.2218",
  "formula": "Mg3(HO3)2",
  "H_f": "-1.8332794854309091"
}
{
  "crystal system": "orthorhombic",
  "bandgap": "0.6036000000000001",
  "formula": "PdS2",
  "H_f": "-0.7156774493750001"
}
{
  "crystal system": "monoclinic",
  "bandgap": "0.8796999999999999",
  "formula": "Li2Co(PO3)5",
  "H_f": "-2.5019288754011386"
}
