In [17]:
import pickle
import pandas as pd
import os
import matminer
from ase.io import read as ase_read
import pymatgen
from pymatgen.io.ase import AseAtomsAdaptor
from pymatgen.core.structure import Structure
from pymatgen.io.cif import CifBlock
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers.structure import *
from matminer.featurizers.composition import *
from pymatgen.io.cif import CifParser
from pymatgen.io.ase import AseAtomsAdaptor
from tqdm import tqdm

In [18]:
def run_featurization(structure: pymatgen.core.structure.Structure) -> dict:
    """Computes features for one pymatgen structure"""
    feature_calculator = MultipleFeaturizer([
                                        DensityFeatures(),
                                        OrbitalFieldMatrix(),
                                      #  StructuralHeterogeneity(), 
                                        MaximumPackingEfficiency(),
                                        XRDPowderPattern(), 
                                        JarvisCFID(), 
                                        StructuralComplexity(),
                                        ])
    features = feature_calculator.featurize(structure)
    features = dict(zip(feature_calculator.feature_labels(), features)),
    return features

In [19]:
feature_calculator = MultipleFeaturizer([
                                        DensityFeatures(),
                                        OrbitalFieldMatrix(),
                                      #  StructuralHeterogeneity(), 
                                        MaximumPackingEfficiency(),
                                        XRDPowderPattern(), 
                                        JarvisCFID(), 
                                        StructuralComplexity(),
                                        ])

In [20]:
feature_labels = feature_calculator.feature_labels()


In [32]:
cif_atoms = list(filter(
    lambda x: x.endswith('cif'),
    (map(lambda x: os.path.join('WSe2_500', x),
    os.listdir("WSe2_500")))
))

In [33]:
cif_atoms

['WSe2_500/WSe2_Mo3W60Se123S2_0b48a326-5b68-4399-8512-6ab64a11cbcb.cif',
 'WSe2_500/WSe2_Mo2W60Se113S8_ac6af8a5-0ed0-4339-a7a0-4268906bba79.cif',
 'WSe2_500/WSe2_Mo3W61Se117S8_362351ce-a10e-4187-b464-9254d02a75cf.cif',
 'WSe2_500/WSe2_Mo2W61Se122S3_5bf89cd4-c80c-4527-9f94-2c03440ac44a.cif',
 'WSe2_500/WSe2_Mo3W61Se117S6_ec52c755-6a98-4ed8-9f60-8f08b62c00c8.cif',
 'WSe2_500/WSe2_Mo1W61Se117S5_072932ae-3309-4dce-ad0b-6125fd1fdabd.cif',
 'WSe2_500/WSe2_W60Se118S5_e30bffb0-3771-4e95-8c51-9fdf5a3136dd.cif',
 'WSe2_500/WSe2_Mo1W61Se122S4_9bfa3ae8-b236-4e0e-93e6-5d7e0936c30d.cif',
 'WSe2_500/WSe2_Mo1W63Se125_d5102f38-5e65-4281-92ed-899b27baed8b.cif',
 'WSe2_500/WSe2_Mo1W61Se122S4_30222e4f-9e5e-4314-87d2-123a22092732.cif',
 'WSe2_500/WSe2_Mo1W61Se122S4_2a3800af-d3a9-4774-93a0-3e1671b6c3ac.cif',
 'WSe2_500/WSe2_Mo4W56Se112S8_e9dfa18c-4a69-48b7-a1e7-10b666e32837.cif',
 'WSe2_500/WSe2_W63Se125S2_3480add9-1fc1-4b55-96a9-e7380d46b87d.cif',
 'WSe2_500/WSe2_W62Se126_fe433eb7-fdf9-43ec-9c95-cee110d5ab

In [34]:
df=pd.DataFrame({'files':cif_atoms})

In [35]:
df

Unnamed: 0,files
0,WSe2_500/WSe2_Mo3W60Se123S2_0b48a326-5b68-4399...
1,WSe2_500/WSe2_Mo2W60Se113S8_ac6af8a5-0ed0-4339...
2,WSe2_500/WSe2_Mo3W61Se117S8_362351ce-a10e-4187...
3,WSe2_500/WSe2_Mo2W61Se122S3_5bf89cd4-c80c-4527...
4,WSe2_500/WSe2_Mo3W61Se117S6_ec52c755-6a98-4ed8...
...,...
495,WSe2_500/WSe2_Mo4W58Se125S3_abf483f4-480d-43fb...
496,WSe2_500/WSe2_Mo2W59Se114S8_8547bfd8-e09d-4f0d...
497,WSe2_500/WSe2_Mo1W61Se122S4_3a9b2c27-67d9-4092...
498,WSe2_500/WSe2_Mo4W56Se112S8_159b0243-1408-4e90...


In [36]:
def read_ase(i):
    st=ase_read(i)
    st.set_pbc((True, True, False))
    structure=AseAtomsAdaptor.get_structure(st)
    return(structure)
df['structure']=df['files'].apply(read_ase)

In [37]:
features = feature_calculator.featurize_dataframe(df, col_id='structure', ignore_errors=True, pbar=True)


MultipleFeaturizer: 100% 500/500 [00:06<00:00, 79.54it/s] 


In [143]:
features['_id']=features['files'].str.replace(name+ '/', '')
features['_id']=features['_id'].str.replace('.cif', '')
features=features.drop(columns=['files', 'structure'])


  features['_id']=features['_id'].str.replace('.cif', '')


In [232]:
features.to_csv(name+'/matminer.csv')