In [1]:
import os
import numpy as np
import pandas as pd
import pymatgen as mg

df = pd.read_csv(os.path.abspath('MPEA_dataset.csv'))

In [2]:
def get_composition_df(formula):
    comp = mg.Composition(formula)
    weights = [round(comp.get_atomic_fraction(ele), 3) for ele in comp.elements]
    elements = [str(x) for x in comp.elements]
    df = pd.DataFrame([[formula]+weights], columns=['Alloy name']+elements)
    return df

comp_df = pd.concat([get_composition_df(x) for x in df['FORMULA']])
comp_df = comp_df.reindex(sorted(comp_df.columns), axis=1)
print(comp_df.drop_duplicates())
comp_df.drop_duplicates().to_csv('stats_and_tables/compositions.csv', index=False)

    Ag     Al                                 Alloy name   B      C  Ca  \
0  NaN  0.077                         Al0.25 Co1 Fe1 Ni1 NaN    NaN NaN   
0  NaN  0.143                          Al0.5 Co1 Fe1 Ni1 NaN    NaN NaN   
0  NaN  0.200                         Al0.75 Co1 Fe1 Ni1 NaN    NaN NaN   
0  NaN  0.250                            Al1 Co1 Fe1 Ni1 NaN    NaN NaN   
0  NaN    NaN                                Co1 Fe1 Ni1 NaN    NaN NaN   
..  ..    ...                                        ...  ..    ...  ..   
0  NaN    NaN                        Hf1 Mo1 Nb1 Ta1 Ti1 NaN    NaN NaN   
0  NaN    NaN              C0.111 Co1 Cr1 Fe1 Ni1 W0.111 NaN  0.026 NaN   
0  NaN    NaN              C0.429 Co1 Cr1 Fe1 Ni1 W0.429 NaN  0.088 NaN   
0  NaN  0.077  Al0.267 Co1 Cr0.667 Fe0.667 Ni0.667 Ti0.2 NaN    NaN NaN   
0  NaN  0.250                            Al1 Nb1 Ti1 Zr1 NaN    NaN NaN   

       Co     Cr  Cu     Fe  ...  Sc  Si  Sn   Ta     Ti   V      W   Y  Zn  \
0   0.308    NaN NaN

In [4]:
# group by alloy, reformat for tabl
cols = ['FORMULA', 'IDENTIFIER: Reference ID', 'REFERENCE: doi', 'PROPERTY: Processing method', 'PROPERTY: Microstructure', 'PROPERTY: O content (wppm)', 
        'PROPERTY: N content (wppm)', 'PROPERTY: C content (wppm)', 'PROPERTY: grain size ($\mu$m)', 'PROPERTY: ROM Density (g/cm$^3$)', 
        'PROPERTY: Exp. Density (g/cm$^3$)', 'PROPERTY: HV', 'PROPERTY: Type of test', 'PROPERTY: Test temperature ($^\circ$C)', 'PROPERTY: YS (MPa)',
       'PROPERTY: UTS (MPa)', 'PROPERTY: Elongation (%)', 'PROPERTY: Elongation plastic (%)', 'PROPERTY: Young modulus (GPa)', 'REFERENCE: year']
df = df[cols]


import csv

grouped = df.groupby('FORMULA')
rows = []
with open('stats_and_tables/grouped_by_composition.csv', 'w', newline='') as csvfile:
    writer = csv.writer(csvfile)
    
    header_row = ['Alloy composition']+[x for x in df.keys()]
    writer.writerow(header_row)
    for g in grouped:
        
        r1 = [g[0]]+['' for x in range(len(g[1].keys()))]
        rows.append(r1)
        writer.writerow(r1)

        for row in g[1].iterrows():
            
            r2 = ['']+[x if str(x) != 'nan' else '' for x in row[1]]
            rows.append(r2)
            writer.writerow(r2)

new_df = pd.DataFrame(rows, columns=header_row)
new_df = new_df.drop('FORMULA', axis=1)

def highlight(s):
    
    inputs = ['IDENTIFIER: Reference ID', 'REFERENCE: doi', 'PROPERTY: Processing method', 'PROPERTY: Microstructure', 'PROPERTY: O content (wppm)', 
              'PROPERTY: N content (wppm)', 'PROPERTY: C content (wppm)']
    
    outputs = ['PROPERTY: grain size ($\mu$m)', 'PROPERTY: ROM Density (g/cm$^3$)', 'PROPERTY: Exp. Density (g/cm$^3$)', 'PROPERTY: HV', 
               'PROPERTY: Type of test', 'PROPERTY: Test temperature ($^\circ$C)', 'PROPERTY: YS (MPa)', 'PROPERTY: UTS (MPa)', 'PROPERTY: Elongation (%)', 
               'PROPERTY: Elongation plastic (%)', 'PROPERTY: Young modulus (GPa)']
    
    print(s.name)
    if s.name == 'Alloy composition':
        return ['font-weight: bold' for v in s]
    elif s.name in inputs:
        return ['background-color: #cfc0a2' for v in s]
    elif s.name in outputs:
        return ['background-color: #a2cfc0' for v in s]
    else:
        return ['' for v in s]

new_df = new_df.style.apply(highlight)
new_df.to_excel('stats_and_tables/grouped_by_composition.xlsx', engine='openpyxl')

Alloy composition
IDENTIFIER: Reference ID
REFERENCE: doi
PROPERTY: Processing method
PROPERTY: Microstructure
PROPERTY: O content (wppm)
PROPERTY: N content (wppm)
PROPERTY: C content (wppm)
PROPERTY: grain size ($\mu$m)
PROPERTY: ROM Density (g/cm$^3$)
PROPERTY: Exp. Density (g/cm$^3$)
PROPERTY: HV
PROPERTY: Type of test
PROPERTY: Test temperature ($^\circ$C)
PROPERTY: YS (MPa)
PROPERTY: UTS (MPa)
PROPERTY: Elongation (%)
PROPERTY: Elongation plastic (%)
PROPERTY: Young modulus (GPa)
REFERENCE: year
