In [1]:
import json
import pandas as pd
from crossref.restful import Works

works = Works()

# import as dataframe
df_2018 = pd.read_csv('2018_data/2018_data.csv')
df_2019 = pd.read_csv('2019_data/2019_data.csv')

# combine and index references
df = pd.concat([df_2018, df_2019])
df['IDENTIFIER: Reference ID'] = df['REFERENCE: tag'].astype('category').cat.codes.apply(lambda x: x+1)

# output all data with ref id - this is the input file for process.ipynb
df.to_csv('combined_data.csv')

# for ref csv
df = df.drop_duplicates('REFERENCE: doi')
df

Unnamed: 0,REFERENCE: tag,REFERENCE: doi,FORMULA,PROPERTY: Type of phases,PROPERTY: Single/Multiphase,PROPERTY: synthesis method,PROPERTY: grain size ($\mu$m),PROPERTY: ROM Density (g/cm$^3$),PROPERTY: HV,PROPERTY: Type of test,...,PROPERTY: Exp. Young modulus (GPa),PROPERTY: O content (wppm),PROPERTY: N content (wppm),PROPERTY: C content (wppm),REFERENCE: comment,Internal Reference #,Original DOI,PROPERTY: Exp. Density (g/cm$^3$),Unnamed: 22,IDENTIFIER: Reference ID
0,4,10.1016/j.jmmm.2014.07.023,Al0.25CoFeNi,FCC,S,AC,,7.9,138,C,...,,,,,,,,,,27
9,5,10.1016/j.actamat.2014.08.026,CoCrFeNi,FCC,S,CR,24,8.2,,T,...,,,,,elongation is total,,,,,37
16,6,10.1016/j.intermet.2015.01.004,CoCrFeNb0.103Ni,FCC+Laves (C14),M,AC,150-200,8.2,,T,...,,,,,all YS recorded as +1 MPa,,,,,46
22,7,10.1016/j.jallcom.2009.08.090,Al0.25CoCrFeNi,FCC,S,A,,7.7,110,,...,,,,,,,,,,54
42,8,10.1007/s11837-013-0753-6,Al0.5CoCrFeMo0.5Ni,FCC+Sigma,M,AC,,7.7,425,,...,,,,,,,,,,63
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904,J135-18,10.1016/j.matlet.2015.08.099,AlNbTiVZr0.5,,,,,,,C,...,,,,,,,,5.64,,132
905,J135-19,10.1179/1743284715Y.0000000032,AlNbTiZr,,,,,,539.6,C,...,,,,,,,,5.79,,133
906,J135-24,10.1016/j.actamat.2012.11.032,NbTiVZr,,,,,,335,C,...,,,,,,,,6.52,,134
908,new_ref_2,10.1002/maco.201709833,AlCrFeNiMo0.5,BCC+B2,M,AC,,,623.7,C,...,,,,,,,,,,265


In [2]:
# query crossref (only necessary to run if new references have been added)
ref_data = {row['IDENTIFIER: Reference ID']: works.doi(row['REFERENCE: doi']) for index, row in df.iterrows()}
json.dump(ref_data, open('references/reference_data.json', 'w'))

# load if no new refs have been added
# ref_data = json.load(open('references/reference_data.json', 'r'))

In [3]:
# additional ref info not found in crossref
ref_data[48] = {'title':['Solution strengthening of ductile refractory HfMoxNbTaTiZr high-entropy alloys'],
                 'URL':'https://doi.org/10.1016/j.matlet.2016.03.133', 
                 'issued':{'date-parts':[[2016]]}} 

In [4]:
df['REFERENCE: year'] = df['IDENTIFIER: Reference ID'].apply(lambda x: str(ref_data[x]['issued']['date-parts'][0][0]) if ref_data[x] != None else None)
df['REFERENCE: title'] = df['IDENTIFIER: Reference ID'].apply(lambda x: ref_data[x]['title'][0] if ref_data[x] != None else None)
df['REFERENCE: url'] = df['IDENTIFIER: Reference ID'].apply(lambda x: ref_data[x]['URL'] if ref_data[x] != None else None)
df

Unnamed: 0,REFERENCE: tag,REFERENCE: doi,FORMULA,PROPERTY: Type of phases,PROPERTY: Single/Multiphase,PROPERTY: synthesis method,PROPERTY: grain size ($\mu$m),PROPERTY: ROM Density (g/cm$^3$),PROPERTY: HV,PROPERTY: Type of test,...,PROPERTY: C content (wppm),REFERENCE: comment,Internal Reference #,Original DOI,PROPERTY: Exp. Density (g/cm$^3$),Unnamed: 22,IDENTIFIER: Reference ID,REFERENCE: year,REFERENCE: title,REFERENCE: url
0,4,10.1016/j.jmmm.2014.07.023,Al0.25CoFeNi,FCC,S,AC,,7.9,138,C,...,,,,,,,27,2014,Effects of Al and Si addition on the structure...,http://dx.doi.org/10.1016/j.jmmm.2014.07.023
9,5,10.1016/j.actamat.2014.08.026,CoCrFeNi,FCC,S,CR,24,8.2,,T,...,,elongation is total,,,,,37,2014,Temperature dependence of the mechanical prope...,http://dx.doi.org/10.1016/j.actamat.2014.08.026
16,6,10.1016/j.intermet.2015.01.004,CoCrFeNb0.103Ni,FCC+Laves (C14),M,AC,150-200,8.2,,T,...,,all YS recorded as +1 MPa,,,,,46,2015,Effects of Nb additions on the microstructure ...,http://dx.doi.org/10.1016/j.intermet.2015.01.004
22,7,10.1016/j.jallcom.2009.08.090,Al0.25CoCrFeNi,FCC,S,A,,7.7,110,,...,,,,,,,54,2009,Microstructure and mechanical property of as-c...,http://dx.doi.org/10.1016/j.jallcom.2009.08.090
42,8,10.1007/s11837-013-0753-6,Al0.5CoCrFeMo0.5Ni,FCC+Sigma,M,AC,,7.7,425,,...,,,,,,,63,2013,Effect of Aluminum Content on Microstructure a...,http://dx.doi.org/10.1007/s11837-013-0753-6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
904,J135-18,10.1016/j.matlet.2015.08.099,AlNbTiVZr0.5,,,,,,,C,...,,,,,5.64,,132,2015,An AlNbTiVZr0.5 high-entropy alloy combining h...,http://dx.doi.org/10.1016/j.matlet.2015.08.099
905,J135-19,10.1179/1743284715Y.0000000032,AlNbTiZr,,,,,,539.6,C,...,,,,,5.79,,133,2015,Effect of Al on structure and mechanical prope...,http://dx.doi.org/10.1179/1743284715y.0000000032
906,J135-24,10.1016/j.actamat.2012.11.032,NbTiVZr,,,,,,335,C,...,,,,,6.52,,134,2013,"Low-density, refractory multi-principal elemen...",http://dx.doi.org/10.1016/j.actamat.2012.11.032
908,new_ref_2,10.1002/maco.201709833,AlCrFeNiMo0.5,BCC+B2,M,AC,,,623.7,C,...,,,,,,,265,2017,Structures and corrosion properties of the AlC...,http://dx.doi.org/10.1002/maco.201709833


In [5]:
cols = ['IDENTIFIER: Reference ID', 'REFERENCE: title', 'REFERENCE: year', 'REFERENCE: doi', 'REFERENCE: url']
df = df[cols]

df.sort_values('IDENTIFIER: Reference ID').to_csv('references/references.csv', index=False)