In [2]:
import pandas as pd
import numpy as np
import pymatgen as mg

from pymatgen import MPRester

a = MPRester('9Mh5d6mP4sgSBzeE')

def get_atomic_info(element):
    """
    inputs an element (Str), returns a number of properties
    :return: list
    """

    assert element[0].isupper(), \
        "First letter must be capitalized, follow the periodic table"

    e = mg.Element(element)
    # list of attribute keywords
    keywords = ["mendeleev_no", "electrical_resistivity",
                "velocity_of_sound", "reflectivity",
                "refractive_index", "poissons_ratio", "molar_volume",
                "electronic_structure", "thermal_conductivity",
                "boiling_point", "melting_point",
                "critical_temperature", "superconduction_temperature",
                "liquid_range", "bulk_modulus", "youngs_modulus",
                "brinell_hardness", "rigidity_modulus",
                "mineral_hardness", "vickers_hardness",
                "density_of_solid", "atomic_radius_calculated",
                "van_der_waals_radius", "atomic_orbitals",
                "coefficient_of_linear_thermal_expansion"]

    properties = [getattr(e, x) for x in keywords]

    return properties

def get_short_atomic_info(element):
    """
    inputs an element (Str), returns a shorter number of  properties than get_atomic_info
    :return: list
    """
    assert element[0].isupper(), \
        "First letter must be capitalized, follow the periodic table"

    e = mg.Element(element)
    # list of attribute keywords
    keywords = ["mendeleev_no", "electrical_resistivity",
                "velocity_of_sound", # "reflectivity",
                #"refractive_index", "poissons_ratio", "molar_volume",
                #"electronic_structure",
                "thermal_conductivity",
                "boiling_point", "melting_point",
                #"critical_temperature",
                # "superconduction_temperature",
                #"liquid_range",
                "bulk_modulus", "youngs_modulus",
                "brinell_hardness", "rigidity_modulus",
                "mineral_hardness", # "vickers_hardness",
                "density_of_solid", "atomic_radius_calculated",
                "van_der_waals_radius", # "atomic_orbitals",
                "coefficient_of_linear_thermal_expansion"]
    print(len(keywords))
    properties = [getattr(e, x) for x in keywords]
    return properties


def compound_to_descriptors(compound):
    """This converts the dictionary of compounds to a list of all descriptors available (raveled)"""
    dict = get_empirical_formula(compound)
    list = []
    for key, value in dict.items():
        list.extend([value] + get_atomic_info(key))

    assert len(list) == len(get_atomic_info("H"))*len(dict)+len(dict), "Output is wrong length"
    return list


def compound_short_descriptors(compound):
    """This converts the dictionary of compounds to a list of descriptors that are relevant for our ANN(raveled)
    This is a shorter version!"""

    # get_empirical_formula returns a dictionary with elements and corresponding stoichiometry
    dict = get_empirical_formula(compound)
    list = []
    # populate list with stoichiometry
    for key, value in dict.items():
        list.extend([value] + get_short_atomic_info(key))
    assert len(list) == len(get_short_atomic_info("H"))*len(dict)+len(dict), "Output is wrong length"
    return list


In [3]:

def test_get_atomic_info():

    # Test element inputs
    test_form1 = "Ca"
    test_form2 = "La"
    test_form3 = "ga"
    not_element = "Az"
    # Try passing a number
    try:
        get_atomic_info(1234)
    except Exception:
        pass
    else:
        raise Exception("Bad input allowed",
                        "Error not raised when numerical value is passed")

    output_1 = get_atomic_info(test_form1)
    output_2 = get_atomic_info(test_form2)

    assert len(output_1) == 25, \
        "Wrong output length "+ str(len(output_1))

    assert len(output_2) == 25, \
        "Wrong output length"
    # Try non-capitalized element name
    try:
        get_atomic_info(test_form3)
    except Exception:
        pass
    else:
        raise Exception("Did not catch case of non-capitalized element")
    try:
        get_short_atomic_info(not_element)
    except Exception:
        pass
    else:
        raise Exception("Did not catch case of non-element input")
    return True


def test_get_short_atomic_info():

    # Test non-string inputs and string that's not an element
    test1 = "H"
    a_integer = 2
    a_float = 2.5
    not_element = "Az"
    not_capitalize = "ga"
    # Ensure output is correct length
    results = get_short_atomic_info(test1)
    assert len(results) == 15, \
        "Wrong output length"
    # Try integer input
    try:
        get_short_atomic_info(a_integer)
    except Exception:
        pass
    else:
        raise Exception("Input must be a string and the name of an element")
    # Try float input
    try:
        get_short_atomic_info(a_float)
    except Exception:
        pass
    else:
        raise Exception("Input must be a string and the name of an element")
    # Try a non-element
    try:
        get_short_atomic_info(not_element)
    except Exception:
        pass
    else:
        raise Exception("Did not catch case of non-element input")
    # Try non-capitalized element
    try:
        get_short_atomic_info(not_capitalize)
    except Exception:
        pass
    else:
        raise Exception("Did not catch case of non-capitalized element")
    return True

def test_compound_to_descriptors():

    # make sure output is list
    test1 = compound_to_descriptors("Mn0.5B0.3C1.2")
    assert isinstance(test1, list),\
        "Output is not a list"
    # output should be 25 descriptors * number of elements + number of elements
    assert len(test1) == 78,\
        "Wrong output length"
    # ensure descriptors for each element is obtained

    return True

def test_compound_short_descriptors():


    return True

In [7]:
import re


def get_empirical_formula(formula):
    """
    Converts chemical formula to empirical formula and respective
    coefficients for doping and proportions.

    Example:
    >>> emp_form = temann.get_empirical_formula("Ca0.98La0.02MnO3")
    >>> emp_form
    {'Ca': 0.98, 'La': 0.02, 'Mn': 1, 'O': 3}
    """
    assert not isinstance(formula, list), \
        "Cannot pass a list. Input must be a string"
        
    assert isinstance(formula, str), "Must pass a string."
    
    # Split formula into individual elements and their proportions
    proportions = re.findall("[A-Z][^A-Z]*", formula)
    
    elements = {}
    
    # Split each group into element and coefficient
    for pair in proportions:
        if bool(re.search(r"\d", pair)):
            split = re.match(r"([A-Z][a-z]*)([0-9]\.*[0-9]*)", pair)
            if bool(re.search(r"\.", split.group(2))):
                elements[split.group(1)] = float(split.group(2))
            else:
                elements[split.group(1)] = int(split.group(2))
        else:
            elements[pair] = 1

    return elements


In [9]:
len(compound_to_descriptors("CaO"))

52

In [33]:
get_short_atomic_info('H')

15


[103.0,
 None,
 1270.0,
 0.1805,
 20.28,
 14.01,
 None,
 None,
 None,
 None,
 None,
 None,
 0.53,
 1.2,
 None]

In [37]:
compound_short_descriptors("Al2O3")

15
15
15


[2,
 80.0,
 2.7e-08,
 5100.0,
 235.0,
 2792.0,
 933.47,
 76.0,
 70.0,
 245.0,
 26.0,
 2.75,
 2700.0,
 1.18,
 1.84,
 2.31e-05,
 3,
 101.0,
 None,
 317.5,
 0.02658,
 90.2,
 54.8,
 None,
 None,
 None,
 None,
 None,
 None,
 0.48,
 1.52,
 None]