In [119]:
import pandas as pd
import json
import pymatgen as mg

In [120]:
metaDF = pd.read_excel('templateSampleFilled3.xlsx',
                   usecols="A:F",nrows=4)
meta = metaDF.to_json(orient="split")
metaParsed = json.loads(meta)['data']

In [121]:
metaData = {
    'name' : metaParsed[0][1],
    'email' :  metaParsed[1][1],
    'directFetch' :  metaParsed[2][1],
    'handFetch' :  metaParsed[3][1],
    'comment' : metaParsed[0][5]
    }
print(metaData)

{'name': 'Happy Researcher', 'email': 'happy@psu.edu', 'directFetch': 'T', 'handFetch': 'T', 'comment': None}


In [130]:
df2 = pd.read_excel('templateSampleFilled3.xlsx',
                   usecols="A:N",nrows=4,skiprows=8)


In [131]:
result = df2.to_json(orient="records")
parsed = json.loads(result)
print(json.dumps(parsed, indent=4))

[
    {
        "id/nickname": 1,
        "Composition": "Fe60 Ni20 Cr10 Zr10 ",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 750000000,
        "Uncertainty [SI]": 25000000,
        "Unit [SI]": "Pa",
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153",
        "special": null
    },
    {
        "id/nickname": 2,
        "Composition": "Fe60 Ni10 Cr20 Zr10",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 650000000,
        "Uncertainty [SI]": 25000000,
        "Unit [SI]": "Pa",
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153",
        "special": null
    },
    {
        "id/nickname": 3,
        "Composition": "Fe60 Ni10 Cr10 Zr20",
        "Structure": "3

In [6]:
datapoint = parsed[0]

In [78]:
# representation of (1) IUPAC standardized formula, (2) pymatgen reduced
# composition object, (3) reduced formula, and (4) chemical system

def compStr2compList(s):
    try:
        compObj = mg.Composition(s)
        if not compObj.valid:
            print("Composition invalid")
        return [compObj.iupac_formula, compObj.reduced_composition,
                compObj.reduced_formula, compObj.chemical_system, compObj.__len__()]
    except:
        print("Warning! Can't parse composition!: "+s)
        return ['', mg.Composition(), '', '', 0]

In [79]:
for s in ['Fe2Ni3', 'Fe10 Ni10 Al80', 'NiZrNdTaB', 'BTaNdZrNi', 'Zr3 (AlTaNdTi)2']:
    print(compStr2compList(s))


['Fe2 Ni3', Comp: Fe2 Ni3, 'Fe2Ni3', 'Fe-Ni', 2]
['Fe10 Ni10 Al80', Comp: Al8 Fe1 Ni1, 'Al8FeNi', 'Al-Fe-Ni', 3]
['Nd1 Zr1 Ta1 Ni1 B1', Comp: Nd1 Zr1 Ta1 Ni1 B1, 'NdZrTaNiB', 'B-Nd-Ni-Ta-Zr', 5]
['Nd1 Zr1 Ta1 Ni1 B1', Comp: Nd1 Zr1 Ta1 Ni1 B1, 'NdZrTaNiB', 'B-Nd-Ni-Ta-Zr', 5]
['Nd2 Zr3 Ti2 Ta2 Al2', Comp: Nd2 Zr3 Ta2 Ti2 Al2, 'Nd2Zr3Ta2(TiAl)2', 'Al-Nd-Ta-Ti-Zr', 5]


In [144]:
# If composition -> keep as is
# if all uppercase (e.g. BCC, FCC) -> keep as is
# otherwise -> make all lowercase

def phaseNameUnifier(s):
    exceptionToUpper = ['b0', 'b1', 'b2', 'a0', 'a1', 'a2']
    try:
        isComp = mg.Composition(s).valid
    except:
        isComp = False

    if s in exceptionToUpper:
        return s.upper()
    elif isComp:
        return s
    elif s.isupper():
        return s
    else:
        return s.lower()

In [147]:
for s in ['BCC', 'FCC', 'Fe3Ni', 'Al2O3', 'Laves', 'Sigma', 'b2']:
    print(phaseNameUnifier(s))

BCC
FCC
Fe3Ni
Al2O3
laves
sigma
B2


In [150]:
# individual phases, interpreting (1) multiple phases
# of the same type, (2) composition-defined phases, and
# (3) named phases. Processes them in a unified way.

def structStr2list(s):
    ls = []
    try:
        s = s.replace(' ','')
        tempLs = list(s.split('+'))
        for phase in tempLs:
            if phase[0].isdigit():
                for i in range(int(phase[0])):
                    ls.append(phaseNameUnifier(phase[1:]))
            else:
                ls.append(phaseNameUnifier(phase))
        ls.sort()
        if ls.__len__()>0:
            return [ls, ls.__len__()]
        else:
            return []
    except:
        print('Warning! Error parsing structure list.')
        return []

In [152]:
for s in ['BCC', 'FeC3', 'BCC+TiAl5', 'FCC+BCC', 'BCC+FCC+Laves+b2', '3BCC', '2BCC+2FCC+Sigma+Laves']:
    print(structStr2list(s))

[['BCC'], 1]
[['FeC3'], 1]
[['BCC', 'TiAl5'], 2]
[['BCC', 'FCC'], 2]
[['B2', 'BCC', 'FCC', 'laves'], 4]
[['BCC', 'BCC', 'BCC'], 3]
[['BCC', 'BCC', 'FCC', 'FCC', 'laves', 'sigma'], 6]


In [153]:
def processNameUnifier(s):
    exception = []

    if s in exception:
        return s
    elif s.isupper():
        return s
    else:
        return s.lower()

In [154]:
for s in ['AC', 'A', 'HIP', 'SPS']:
    print(processNameUnifier(s))


AC
A
HIP
SPS


In [158]:
def processStr2list(s):
    ls = []
    try:
        s = s.replace(' ','')
        tempLs = list(s.split('+'))
        for process in tempLs:
            if process[0].isdigit():
                for i in range(int(process[0])):
                    ls.append(processNameUnifier(process[1:]))
            else:
                ls.append(processNameUnifier(process))
        if ls.__len__()>0:
            return [ls, ls.__len__()]
        else:
            return []
    except:
        print('Warning! Error parsing process list.')
        return []

In [163]:
for s in ['AC', 'A', 'anneal + HIP', 'SPS+anneal']:
    print(processStr2list(s))

[['AC'], 1]
[['A'], 1]
[['anneal', 'HIP'], 2]
[['SPS', 'anneal'], 2]


In [164]:
def datapoint2entry(metaD, dataP):
    # metadata
    entry = {'meta' : metaD, 'material' : {}, 'property' : {}, 'reference' : {}}

    # composition
    try:
        compList = compStr2compList(dataP['Composition'])
    except:
        print('Warning. Parsing an entry with an empty composition field.')

    try:
        entry['material'].update({
                'formula' : compList[0],
                'composition' : compList[1],
                'reducedFormula' : compList[2],
                'system' : compList[3],
                'nComponents' : compList[4]})
    except:
        pass

    # structure
    try:
        structList = structStr2list(dataP['Structure'])
    except:
        print('Warning! Parsing an entry with an empty structure field.')

    try:
        entry['material'].update({
                'structure' : structList[0],
                'nPhases' : structList[1]})
    except:
        pass

    # processing
    try:
        processingList = processStr2list(dataP['Processing'])
    except:
        print('Warning! Parsing an entry with an empty structure field.')

    try:
        entry['material'].update({
                'processes' : processingList[0],
                'nProcessSteps' : processingList[1]})
    except:
        pass

    # comment
    try:
        entry['material'].update({
                'comment' : dataP['Material Comment']})
    except:
        pass

    try:
        entry['property'].update({
            'name' : dataP['Name'],
            'source' : dataP['Source'],
            'temperature' : dataP['Temperature [K]'],
            'value' : dataP['Value [SI]'],
            #'unitName' : dataP['Unit [SI]']
            })
        entry['reference'].update({
                'pointer' : dataP['Pointer'],
                'doi' : dataP['DOI']})
    except:
        pass

    return entry

In [165]:
datapoint2entry(metaData, datapoint)


{'meta': {'name': 'Happy Researcher',
  'email': 'happy@psu.edu',
  'directFetch': 'T',
  'handFetch': 'T',
  'comment': None},
 'material': {'formula': 'Zr10 Cr10 Fe60 Ni20',
  'composition': Comp: Zr1 Cr1 Fe6 Ni2,
  'reducedFormula': 'ZrCr(Fe3Ni)2',
  'system': 'Cr-Fe-Ni-Zr',
  'nComponents': 4,
  'structure': ['BCC', 'FCC'],
  'nPhases': 2,
  'processes': ['HIP', 'A'],
  'nProcessSteps': 2,
  'comment': None},
 'property': {'name': 'UTS',
  'source': 'EXP',
  'temperature': 298,
  'value': 750000000.0},
 'reference': {'pointer': 'T7', 'doi': '10.1557/jmr.2018.153'}}