In [4]:
import numpy as np
from numpy import array
import pandas as pd
from matminer.featurizers.conversions import StrToComposition, CompositionToStructureFromMP
from matminer.featurizers.structure import DensityFeatures, SiteStatsFingerprint
from matminer.featurizers.composition import ElementProperty, CohesiveEnergyMP

# Load data
df = pd.read_json('target.json')

# Create composition
df = StrToComposition().featurize_dataframe(df, "formula")

# Create structure feature
CtoS_feat = CompositionToStructureFromMP(mapi_key="D8Q0TTf1B2JyAhM9")
df = CtoS_feat.featurize_dataframe(df, "composition", ignore_errors=True)

# Create density, vpa, packing fraction features
df_feat = DensityFeatures()
df = df_feat.featurize_dataframe(df, "structure", ignore_errors=True)

# Create various elemental properties
ep_boiling_feat = ElementProperty("pymatgen", ["boiling_point"], ["minimum", "mean", "maximum"])
df = ep_boiling_feat.featurize_dataframe(df, "composition")

ep_molar_feat = ElementProperty("pymatgen", ["molar_volume"], ["minimum", "mean", "maximum"])
df = ep_molar_feat.featurize_dataframe(df, "composition", ignore_errors=True)

ep_ionization_feat = ElementProperty("magpie", ["FirstIonizationEnergy"], ["minimum", "mean", "maximum"])
df = ep_ionization_feat.featurize_dataframe(df, "composition")

# Create bond length and angle features
ssf_bondlength_feat = SiteStatsFingerprint.from_preset("BondLength-dejong2016")
df = ssf_bondlength_feat.featurize_dataframe(df, "structure", ignore_errors=True)

ssf_bondangle_feat = SiteStatsFingerprint.from_preset("BondAngle-dejong2016")
df = ssf_bondangle_feat.featurize_dataframe(df, "structure", ignore_errors=True)

# Example for cohesive energy
CE_feat = CohesiveEnergyMP(mapi_key="D8Q0TTf1B2JyAhM9")
df = CE_feat.featurize_dataframe(df, "composition", ignore_errors=True)

# Excluded columns
excluded = ["kL", "formula", "composition", "structure",
            # Bond length and angle exclusions
            "holder_mean::-4 Average bond length", "holder_mean::-3 Average bond length", 
            "holder_mean::-2 Average bond length", "holder_mean::-1 Average bond length", 
            "holder_mean::1 Average bond length", "holder_mean::2 Average bond length", 
            "holder_mean::3 Average bond length", "holder_mean::4 Average bond length", 
            "std_dev Average bond length", "geom_std_dev Average bond length",
            "holder_mean::-4 Average bond angle", "holder_mean::-3 Average bond angle", 
            "holder_mean::-2 Average bond angle", "holder_mean::-1 Average bond angle", 
            "holder_mean::1 Average bond angle", "holder_mean::2 Average bond angle", 
            "holder_mean::3 Average bond angle", "holder_mean::4 Average bond angle", 
            "std_dev Average bond angle", "geom_std_dev Average bond angle"]

# Drop excluded columns
X = df.drop(excluded, axis=1)

# Save features to CSV
X.to_csv('features.csv', index=False)


StrToComposition:   0%|          | 0/1900 [00:00<?, ?it/s]



CompositionToStructureFromMP:   0%|          | 0/1900 [00:00<?, ?it/s]

DensityFeatures:   0%|          | 0/1900 [00:00<?, ?it/s]

ElementProperty:   0%|          | 0/1900 [00:00<?, ?it/s]

ElementProperty:   0%|          | 0/1900 [00:00<?, ?it/s]

ElementProperty:   0%|          | 0/1900 [00:00<?, ?it/s]

SiteStatsFingerprint:   0%|          | 0/1900 [00:00<?, ?it/s]

SiteStatsFingerprint:   0%|          | 0/1900 [00:00<?, ?it/s]

  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.isnan(np.arccos(dot)):
  if np.

CohesiveEnergyMP:   0%|          | 0/1900 [00:00<?, ?it/s]

