In [16]:
import pandas as pd
import json
import pymatgen as mg
from datetime import datetime

In [17]:
metaDF = pd.read_excel('templateSampleFilled3.xlsx',
                   usecols="A:F",nrows=20)
meta = metaDF.to_json(orient="split")
metaParsed = json.loads(meta)['data']

In [18]:
# timestamp
now = datetime.now()
dateString = now.strftime('%Y-%d-%b-%H-%M-%S')
print(dateString)

2021-30-Mar-14-08-04


In [19]:
metaData = {
    'name' : metaParsed[0][1],
    'email' :  metaParsed[1][1],
    'directFetch' :  metaParsed[2][1],
    'handFetch' :  metaParsed[3][1],
    'comment' : metaParsed[0][5],
    'timeStamp' : dateString
    }
print(metaData)

{'name': 'Happy Researcher', 'email': 'happy@psu.edu', 'directFetch': 'T', 'handFetch': 'T', 'comment': None, 'timeStamp': '2021-30-Mar-14-08-04'}


In [20]:
df2 = pd.read_excel('templateSampleFilled3.xlsx',
                   usecols="A:N",nrows=4,skiprows=8)


In [21]:
result = df2.to_json(orient="records")
parsed = json.loads(result)
print(json.dumps(parsed, indent=4))

[
    {
        "id/nickname": 1,
        "Composition": "Fe60 Ni20 Cr10 Zr10 ",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 750000000,
        "Uncertainty [SI]": 25000000,
        "Unit [SI]": "Pa",
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153",
        "special": null
    },
    {
        "id/nickname": 2,
        "Composition": "Fe60 Ni10 Cr20 Zr10",
        "Structure": "BCC+FCC",
        "Processing": "HIP+A",
        "Material Comment": null,
        "Name": "UTS",
        "Source": "EXP",
        "Temperature [K]": 298,
        "Value [SI]": 650000000,
        "Uncertainty [SI]": 25000000,
        "Unit [SI]": "Pa",
        "Pointer": "T7",
        "DOI": "10.1557/jmr.2018.153",
        "special": null
    },
    {
        "id/nickname": 3,
        "Composition": "Fe60 Ni10 Cr10 Zr20",
        "Structure": "3

In [22]:
datapoint = parsed[0]

In [23]:
df3 = pd.read_excel('wrong_senkov_table_1_mongoDB.xlsx',
                   usecols="A:N",nrows=4,skiprows=8)


In [24]:
result3 = df3.to_json(orient="records")
parsed3 = json.loads(result3)
print(json.dumps(parsed3, indent=4))

[
    {
        "id/nickname": 0,
        "Composition": "MoNbTaW",
        "Structure": "BCC",
        "Processing": "AC+A",
        "Material Comment": "Test",
        "Name": "UTS",
        "Source": "ML",
        "Temperature [K]": 298.0,
        "Value [SI]": 10000000.0,
        "Unit [SI]": "Pa",
        "Pointer": "T1",
        "DOI": "10.1557/jmr.2018.153",
        "Col1": null,
        "Col2": null
    },
    {
        "id/nickname": 1,
        "Composition": "MoNbTaVW",
        "Structure": "BCC",
        "Processing": null,
        "Material Comment": null,
        "Name": null,
        "Source": null,
        "Temperature [K]": null,
        "Value [SI]": null,
        "Unit [SI]": null,
        "Pointer": "T1",
        "DOI": "10.1557/jmr.2018.153",
        "Col1": null,
        "Col2": null
    },
    {
        "id/nickname": 2,
        "Composition": "HfNbTaTiZ",
        "Structure": "BCC",
        "Processing": null,
        "Material Comment": null,
        "Name": nul

In [None]:
# representation of (1) IUPAC standardized formula, (2) pymatgen reduced
# composition object, (3) reduced formula, and (4) chemical system

def compStr2compList(s):
    try:
        compObj = mg.Composition(s).reduced_composition
        if not compObj.valid:
            print("Composition invalid")
        return [compObj.iupac_formula, compObj.as_dict(), compObj.anonymized_formula,
                compObj.reduced_formula, compObj.chemical_system, compObj.__len__()]
    except:
        print("Warning! Can't parse composition!: "+s)
        return ['', [], '', '', '', 0]

In [None]:
for s in ['Fe2Ni3', 'Fe10 Ni10 Al80', 'NiZrNdTaB', 'BTaNdZrNi', 'Zr3 (AlTaNdTi)2']:
    print(compStr2compList(s))


In [None]:
# If composition -> keep as is
# if all uppercase (e.g. BCC, FCC) -> keep as is
# otherwise -> make all lowercase

def phaseNameUnifier(s):
    exceptionToUpper = ['b0', 'b1', 'b2', 'a0', 'a1', 'a2']
    try:
        isComp = mg.Composition(s).valid
    except:
        isComp = False

    if s in exceptionToUpper:
        return s.upper()
    elif isComp:
        return s
    elif s.isupper():
        return s
    else:
        return s.lower()

In [None]:
for s in ['BCC', 'FCC', 'Fe3Ni', 'Al2O3', 'Laves', 'Sigma', 'b2']:
    print(phaseNameUnifier(s))

In [None]:
# individual phases, interpreting (1) multiple phases
# of the same type, (2) composition-defined phases, and
# (3) named phases. Processes them in a unified way.

def structStr2list(s):
    ls = []
    try:
        s = s.replace(' ','')
        tempLs = list(s.split('+'))
        for phase in tempLs:
            if phase[0].isdigit():
                for i in range(int(phase[0])):
                    ls.append(phaseNameUnifier(phase[1:]))
            else:
                ls.append(phaseNameUnifier(phase))
        ls.sort()
        if ls.__len__()>0:
            return [ls, ls.__len__()]
        else:
            return []
    except:
        print('Warning! Error parsing structure list.')
        return []

In [None]:
for s in ['BCC', 'FeC3', 'BCC+TiAl5', 'FCC+BCC', 'BCC+FCC+Laves+b2', '3BCC', '2BCC+2FCC+Sigma+Laves']:
    print(structStr2list(s))

In [None]:
def processNameUnifier(s):
    exception = []

    if s in exception:
        return s
    elif s.isupper():
        return s
    else:
        return s.lower()

In [None]:
for s in ['AC', 'A', 'HIP', 'SPS']:
    print(processNameUnifier(s))


In [None]:
def processStr2list(s):
    ls = []
    try:
        s = s.replace(' ','')
        tempLs = list(s.split('+'))
        for process in tempLs:
            if process[0].isdigit():
                for i in range(int(process[0])):
                    ls.append(processNameUnifier(process[1:]))
            else:
                ls.append(processNameUnifier(process))
        if ls.__len__()>0:
            return [ls, ls.__len__()]
        else:
            return []
    except:
        print('Warning! Error parsing process list.')
        return []

In [None]:
for s in ['AC', 'A', 'anneal + HIP', 'SPS+anneal']:
    print(processStr2list(s))

In [None]:
def datapoint2entry(metaD, dataP):

    # metadata
    entry = {'meta' : metaD, 'material' : {}, 'property' : {}, 'reference' : {}}

    # composition
    try:
        compList = compStr2compList(dataP['Composition'])
    except:
        print('Warning. Parsing an entry with an empty composition field.')

    try:
        entry['material'].update({
                'formula' : compList[0],
                'compositionDictionary' : compList[1],
                'anonymizedFormula' : compList[2],
                'reducedFormula' : compList[3],
                'system' : compList[4],
                'nComponents' : compList[5]})
    except:
        pass

    # structure
    try:
        structList = structStr2list(dataP['Structure'])
    except:
        print('Warning! Parsing an entry with an empty structure field.')

    try:
        entry['material'].update({
                'structure' : structList[0],
                'nPhases' : structList[1]})
    except:
        pass

    # processing
    try:
        processingList = processStr2list(dataP['Processing'])
    except:
        print('Warning! Parsing an entry with an empty structure field.')

    try:
        entry['material'].update({
                'processes' : processingList[0],
                'nProcessSteps' : processingList[1]})
    except:
        pass

    # comment
    try:
        entry['material'].update({
                'comment' : dataP['Material Comment']})
    except:
        pass

    try:
        entry['property'].update({
            'name' : dataP['Name'],
            'source' : dataP['Source'],
            'temperature' : dataP['Temperature [K]'],
            'value' : dataP['Value [SI]'],
            #'unitName' : dataP['Unit [SI]']
            })
        entry['reference'].update({
                'pointer' : dataP['Pointer'],
                'doi' : dataP['DOI']})
    except:
        pass

    return entry

In [None]:
datapoint2entry(metaData, datapoint)


In [9]:
cred = json.load(open('credentialsAdam.json'))

In [10]:
from pymongo import MongoClient

client_string='mongodb+srv://'+cred['name']+':'+cred['dbKey']+'@testcluster.g3kud.mongodb.net/ULTREA_materials?retryWrites=true&w=majority'
database_name='ULTREA_materials'
collection_name=cred['name']

In [11]:
client = MongoClient(client_string)
collection = client[database_name][collection_name]


In [None]:
for datapoint in parsed:
    uploadEntry = datapoint2entry(metaData, datapoint)
    comp = uploadEntry['material']['formula'].replace(' ','')
    try:
        collection.insert_one(uploadEntry)
        print('Succesfully uploaded a datapoint for '+comp)
    except:
        print('Upload of '+comp+' failed!')

In [1]:
logger = open("test.csv", "w")
logger.write('aaa,adwd,aada\n')
logger.write('aaa,adwd,aada\n')

logger.close()


In [27]:
# Purge collection. All data from it will be lost!
# collection.remove({})


  


{'n': 8,
 'opTime': {'ts': Timestamp(1617128506, 8), 't': 10},
 'electionId': ObjectId('7fffffff000000000000000a'),
 'ok': 1.0,
 '$clusterTime': {'clusterTime': Timestamp(1617128506, 8),
  'signature': {'hash': b'=\x95\x94\xf5\xca\x07\xa96\x84=`\xaea\xdf\x16\x9a3\x0b\x00e',
   'keyId': 6929355718306824195}},
 'operationTime': Timestamp(1617128506, 8)}