In [1]:
import os
import pandas as pd
import numpy as np

df = pd.read_csv(os.path.abspath('MPEA_dataset_for_stats.csv'))

df_2019 = df[df['REFERENCE: tag'].str.contains('J')]

print('TOTAL records = {}'.format(len(df)))
print('Total records (2018) = {}'.format(len(df) - len(df_2019)))
print('Total records (2019) = {}'.format(len(df_2019)))
print('\n')
print('TOTAL unique references: {}'.format(len(df['REFERENCE: doi'].unique())))
print('\n')
print('Total unique compositions = {}'.format(len(df['FORMULA'].unique())))
print('Total unique compositions (2018) = {}'.format(len(df['FORMULA'].unique())-len(df_2019['FORMULA'].unique())))
print('Total unique compositions (2019) = {}'.format(len(df_2019['FORMULA'].unique())))
print('\n')
print('TOTAL YS: {}'.format(len(df['PROPERTY: YS (MPa)'].dropna())))
print('TOTAL UTS: {}'.format(len(df['PROPERTY: UTS (MPa)'].dropna())))
print('TOTAL HV: {}'.format(len(df['PROPERTY: HV'].dropna())))
print('TOTAL Elongation: {}'.format(len(df['PROPERTY: Elongation (%)'].dropna())))

TOTAL records = 1654
Total records (2018) = 661
Total records (2019) = 993


TOTAL unique references: 215


Total unique compositions = 638
Total unique compositions (2018) = 293
Total unique compositions (2019) = 345


TOTAL YS: 1146
TOTAL UTS: 563
TOTAL HV: 564
TOTAL Elongation: 698


In [2]:
# db at-a-glance table
props = ['REFERENCE: doi', 'FORMULA', 'PROPERTY: YS (MPa)', 'PROPERTY: UTS (MPa)', 'PROPERTY: Elongation (%)']
df_table = df[df['PROPERTY: Test temperature ($^\circ$C)'] == 25]
df_table = df_table.sort_values(by='FORMULA')
df_table = df_table.drop_duplicates(subset=['FORMULA'])
df_table = df_table[props]
df_table = df_table.dropna()
# df_table['FORMULA'] = df_table['FORMULA'].apply(lambda x: Composition(x).get_integer_formula_and_factor()[0])
df_table = df_table.sort_values('PROPERTY: YS (MPa)', ascending=False)
df_table = df_table[df_table['REFERENCE: doi']!='10.1016/j.msea.2006.11.049']
df_table = df_table.head(25)
df_table['PROPERTY: YS (MPa)'] = df_table['PROPERTY: YS (MPa)'].astype(int)
df_table['PROPERTY: UTS (MPa)'] = df_table['PROPERTY: UTS (MPa)'].astype(int)
df_table['PROPERTY: Elongation (%)'] = df_table['PROPERTY: Elongation (%)'].astype(int)

# df_table['FORMULA'] = df_table['FORMULA'].apply(lambda x:  '\ce{' + x + '}')
latex_table = df_table.to_latex(index=False, escape=False)
print(latex_table) 
with open('prop_table.tex', 'w') as tf:
     tf.write(latex_table)

\begin{tabular}{llrrr}
\toprule
                 REFERENCE: doi &                                      FORMULA &  PROPERTY: YS (MPa) &  PROPERTY: UTS (MPa) &  PROPERTY: Elongation (%) \\
\midrule
         10.1002/maco.201709833 &                  Al1 Cr1 Fe1 Mo0.5 Ni1 Ti0.5 &                2228 &                 3166 &                        10 \\
              10.1063/1.2734517 &  Al0.667 Co0.667 Cr0.667 Fe0.667 Ni0.667 Ti1 &                2220 &                 2720 &                         6 \\
         10.1002/maco.201709833 &                  Al1 Cr1 Fe1 Mo0.5 Ni1 Ti0.4 &                2185 &                 3673 &                        14 \\
         10.1002/maco.201709833 &                 Al1 Cr1 Fe1 Mo0.5 Ni1 Ti0.25 &                2161 &                 3641 &                        14 \\
  10.1016/j.actamat.2016.01.018 &                              Hf1 Nb1 Ta1 Zr1 &                2100 &                 2200 &                         3 \\
      10.1007/s11837-014-1066

In [3]:
def write_prop_table(df):
    df['Alloy composition'] = df['FORMULA']
    cols = ['Alloy composition', 'PROPERTY: Processing method', 'PROPERTY: Microstructure', 'PROPERTY: grain size ($\mu$m)',
           'PROPERTY: ROM Density (g/cm$^3$)', 'PROPERTY: Exp. Density (g/cm$^3$)', 'PROPERTY: Test temperature ($^\circ$C)', 
            'PROPERTY: HV', 'PROPERTY: YS (MPa)', 'PROPERTY: UTS (MPa)', 'PROPERTY: Elongation (%)', 'PROPERTY: Elongation plastic (%)',
           'PROPERTY: Young modulus (GPa)', 'PROPERTY: O content (wppm)', 'PROPERTY: N content (wppm)', 'PROPERTY: C content (wppm)']
    df = df[cols]
    dft = df.describe(include='all').T
    dft['unique'] = [len(df[key].unique()) for key in df.keys()]
    dft = dft.drop(['25%', '50%', '75%', 'freq', 'top'], axis=1)
    #dft = dft.drop(['REFERENCE: tag', 'REFERENCE: doi', 'PROPERTY: Single/Multiphase', 'PROPERTY: BCC/FCC/other', 'REFERENCE: comment'])

    dft['mean'] = [np.round(v, 1) if not np.isnan(v) else '-' for v in dft['mean'] ]
    dft['std'] = [np.round(v, 1) if not np.isnan(v) else '-' for v in dft['std']]
    dft['min'] = [np.round(v, 3) if not np.isnan(v) else '-' for v in dft['min']]
    dft['max'] = [np.round(v, 2) if not np.isnan(v) else '-' for v in dft['max']]
    dft['count'] = [int(v) if not np.isnan(v) else '-' for v in dft['count']]
    dft['unique'] = [int(v) if not np.isnan(v) else '-' for v in dft['unique']]


    print(dft.head())
    tex_lines = ['\\begin{tabular}{lccccccc}']
    tex_lines.append('\\toprule')
    print(dft.index)
    header = ['Property']
    header.extend([c for c in dft.columns])
    print(header)
    
    tex_lines.append(' & '.join(header) + ' \\\\')
    tex_lines.append('\\midrule')
    
    
    for i in dft.index:
        
        prop_name = i.replace('%', '\%')
        prop_name = prop_name.replace('PROPERTY: ', '')
        cols = [prop_name]
        cols.extend([str(dft[prop][i]) for prop in dft.keys()])
        tex_lines.append(' & '.join(cols) + ' \\\\')
        
    tex_lines.append('\\bottomrule')
    tex_lines.append('\\end{tabular}')

    tex_file = 'prop_stats.tex'
    with open(tex_file, 'w') as fw:
        for line in tex_lines:
            fw.write(line)
            fw.write('\n')

write_prop_table(df)

                                  count  unique   mean    std    min   max
Alloy composition                  1654     638      -      -      -     -
PROPERTY: Processing method        1492       6      -      -      -     -
PROPERTY: Microstructure           1071      26      -      -      -     -
PROPERTY: grain size ($\mu$m)       313     179  105.7  246.5  0.018  2000
PROPERTY: ROM Density (g/cm$^3$)    633      71    7.8    1.8    2.7  13.8
Index(['Alloy composition', 'PROPERTY: Processing method',
       'PROPERTY: Microstructure', 'PROPERTY: grain size ($\mu$m)',
       'PROPERTY: ROM Density (g/cm$^3$)', 'PROPERTY: Exp. Density (g/cm$^3$)',
       'PROPERTY: Test temperature ($^\circ$C)', 'PROPERTY: HV',
       'PROPERTY: YS (MPa)', 'PROPERTY: UTS (MPa)', 'PROPERTY: Elongation (%)',
       'PROPERTY: Elongation plastic (%)', 'PROPERTY: Young modulus (GPa)',
       'PROPERTY: O content (wppm)', 'PROPERTY: N content (wppm)',
       'PROPERTY: C content (wppm)'],
      dtype='obje