In [1]:
import pandas as pd
import json
import pymatgen as mg

In [2]:
metaDF = pd.read_excel('templateSampleFilled.xlsx',
                   usecols="A:F",nrows=4)
meta = metaDF.to_json(orient="split")
metaParsed = json.loads(meta)['data']

In [3]:
metaData = {
    'name' : metaParsed[0][1],
    'email' :  metaParsed[1][1],
    'directFetch' :  metaParsed[2][1],
    'handFetch' :  metaParsed[3][1],
    'comment' : metaParsed[0][5]
    }
print(metaData)

{'name': 'Happy Researcher', 'email': 'happy@psu.edu', 'directFetch': 'T', 'handFetch': 'F', 'comment': 'This is just a test!'}


In [4]:
df2 = pd.read_excel('templateSampleFilled.xlsx',
                   usecols="A:K",nrows=4,skiprows=8)


In [5]:
result = df2.to_json(orient="records")
parsed = json.loads(result)
print(json.dumps(parsed, indent=4))

[
    {
        "id/nickname": 1,
        "Composition": "Fe60 Ni20 Cr10 Zr10 ",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 750000000.0,
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153"
    },
    {
        "id/nickname": 2,
        "Composition": "Fe60 Ni10 Cr20 Zr10",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 650000000.0,
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153"
    },
    {
        "id/nickname": 3,
        "Composition": "Fe60 Ni10 Cr10 Zr20",
        "Structure": "BCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 7000

In [6]:
datapoint = parsed[0]

In [72]:
# representation of (1) IUPAC standardized formula, (2) pymatgen reduced
# composition object, (3) reduced formula, and (4) chemical system

def compStr2compList(s):
    try:
        compObj = mg.Composition(s)
        if not compObj.valid:
            print("Composition invalid")
        return [compObj.iupac_formula, compObj.reduced_composition,
                compObj.reduced_formula, compObj.chemical_system]
    except:
        print("Can't parse composition!: "+s)
        return ['', mg.Composition(), '', '']

In [77]:
for s in ['Fe2Ni3', 'Fe10 Ni10 Al80', 'NiZrNdTaB', 'BTaNdZrNi', 'Zr3 (AlTaNdTi)2']:
    print(compStr2compList(s))


['Fe2 Ni3', Comp: Fe2 Ni3, 'Fe2Ni3', 'Fe-Ni']
['Fe10 Ni10 Al80', Comp: Al8 Fe1 Ni1, 'Al8FeNi', 'Al-Fe-Ni']
['Nd1 Zr1 Ta1 Ni1 B1', Comp: Nd1 Zr1 Ta1 Ni1 B1, 'NdZrTaNiB', 'B-Nd-Ni-Ta-Zr']
['Nd1 Zr1 Ta1 Ni1 B1', Comp: Nd1 Zr1 Ta1 Ni1 B1, 'NdZrTaNiB', 'B-Nd-Ni-Ta-Zr']
['Nd2 Zr3 Ti2 Ta2 Al2', Comp: Nd2 Zr3 Ta2 Ti2 Al2, 'Nd2Zr3Ta2(TiAl)2', 'Al-Nd-Ta-Ti-Zr']


In [57]:
# If composition -> keep as is
# if all uppercase (e.g. BCC, FCC) -> keep as is
# otherwise -> make all lowercase

def phaseNameUnifier(s):
    try:
        isComp = mg.Composition(s).valid
    except:
        isComp = False

    if isComp:
        return s
    elif s.isupper():
        return s
    else:
        return s.lower()

In [62]:
for s in ['BCC', 'FCC', 'Fe3Ni', 'Al2O3', 'Laves', 'Sigma']:
    print(phaseNameUnifier(s))

BCC
FCC
Fe3Ni
Al2O3
laves
sigma


In [64]:
def structStr2list(s):
    ls = []
    try:
        s = s.replace(' ','')
        tempLs = list(s.split('+'))
        for phase in tempLs:
            if phase[0].isdigit():
                for i in range(int(phase[0])):
                    ls.append(phaseNameUnifier(phase[1:]))
            else:
                ls.append(phaseNameUnifier(phase))

        if ls.__len__()>0:
            return [ls, ls.__len__()]
        else:
            return []
    except:
        print('Error parsing structure list.')
        return []

In [66]:
for s in ['BCC', 'FeC3', 'BCC+TiAl5', 'FCC+BCC', 'BCC+FCC+Laves', '3BCC', '2BCC+2FCC+Sigma+Laves']:
    print(structStr2list(s))

[['BCC'], 1]
[['FeC3'], 1]
[['BCC', 'TiAl5'], 2]
[['FCC', 'BCC'], 2]
[['BCC', 'FCC', 'laves'], 3]
[['BCC', 'BCC', 'BCC'], 3]
[['BCC', 'BCC', 'FCC', 'FCC', 'sigma', 'laves'], 6]


In [10]:
def datapoint2entry(metaD, dataP):
    compList = compStr2compList(dataP['Composition'])
    entry = {
        'meta' : metaD,
        'material' : {
            'formula' : compList[0],
            'composition' : compList[1],
            'reducedFormula' : compList[2],
            'system' : compList[3],
            'structure' : dataP['Structure'],
            'processing' : dataP['Processing'],
            'comment' : dataP['Material Comment']
        },
        'property': {
            'name' : dataP['Name'],
            'source' : dataP['Source'],
            'temperature' : dataP['Temperature [K]'],
            'value' : dataP['Value [SI]'],
            #'unitName' : dataP['Unit [SI]'],
            'reference' : {
                'pointer' : dataP['Pointer'],
                'doi' : dataP['DOI']
            }
        }
    }

    return entry

In [11]:
datapoint2entry(metaData, datapoint)


{'meta': {'name': 'Happy Researcher',
  'email': 'happy@psu.edu',
  'directFetch': 'T',
  'handFetch': 'F',
  'comment': 'This is just a test!'},
 'material': {'formula': 'Zr10 Cr10 Fe60 Ni20',
  'composition': Comp: Zr1 Cr1 Fe6 Ni2,
  'reducedFormula': 'ZrCr(Fe3Ni)2',
  'system': 'Cr-Fe-Ni-Zr',
  'structure': 'BCC+FCC',
  'processing': 'HIP+A',
  'comment': None},
 'property': {'name': 'UTS',
  'source': 'EXP',
  'temperature': 298,
  'value': 750000000.0,
  'reference': {'pointer': 'T7', 'doi': '10.1557/jmr.2018.153'}}}