# Test other features

*Authors: Enze Chen (University of California, Berkeley)*

### Import Python packages

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

with open('../assets/files/mp_api_key.txt', 'r') as f:
    api_key = f.readlines()[1].strip()
    
from pymatgen.ext.matproj import MPRester

  in "/home/jovyan/.pmgrc.yaml", line 5, column 1
could not find expected ':'
  in "/home/jovyan/.pmgrc.yaml", line 6, column 17. You may need to reconfigure your yaml file.


### Make a query

I've kept this very simple for demonstration.

In [2]:
criteria = {'diel': {'$exists': True}, 'nelements': {'$lt': 3}}
props = ['material_id', 'pretty_formula', 'structure']

with MPRester(api_key) as mpr:
    res = mpr.query(criteria=criteria, properties=props)

100%|██████████| 1079/1079 [00:02<00:00, 374.84it/s]


### Convert to a DataFrame

But _because_ my query was so simple, I am able to take advantage of built-in converters from `list(dict)` to `DataFrame`. 😎
See [here](https://stackoverflow.com/questions/20638006/convert-list-of-dictionaries-to-a-pandas-dataframe) for more info (try using Google for other answers).

In [3]:
df = pd.DataFrame(res)   # ta-da! Easy peasy 
df

Unnamed: 0,material_id,pretty_formula,structure
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]"
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505..."
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6..."
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984..."
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]"
...,...,...,...
1074,mp-8938,Si2Mo,"[[2.308237 2.68706104 5.51457779] Si, [ 1.13..."
1075,mp-9063,RbSe,"[[1.47222948 2.54997625 3.1826505 ] Rb, [6.477..."
1076,mp-971,K2O,"[[1.6219385 1.6219385 1.6219385] K, [4.8658155..."
1077,mvc-11115,TiO2,"[[3.25103177 0. 1.18713198] Ti, [ 5.77..."


### Other featurizers

In addition to `ElementProperty`, matminer has [many more featurizers](https://hackingmaterials.lbl.gov/matminer/matminer.featurizers.html).
Below we import a few and demonstrate how they work, starting with **composition** featurizers (you've seen `ElementProperty` already).

In [4]:
from matminer.featurizers.conversions import StrToComposition

from matminer.featurizers.composition.composite import ElementProperty
from matminer.featurizers.composition.element import Stoichiometry, BandCenter
from matminer.featurizers.composition.orbital import AtomicOrbitals, ValenceOrbital
from matminer.featurizers.composition.packing import AtomicPackingEfficiency
from matminer.featurizers.composition.thermo import CohesiveEnergy   # I include this, but unfortunately it doesn't seem to work. API key error.

In [5]:
str_comp = StrToComposition()
df = str_comp.featurize_dataframe(df, 'pretty_formula')

StrToComposition:   0%|          | 0/1079 [00:00<?, ?it/s]

#### Demos

Now we'll demonstrate using a few featurizers that we've imported.
We don't have to do all of them because they all follow the same template format.

In [6]:
featurizer = Stoichiometry()
df2 = featurizer.featurize_dataframe(df, 'composition')
df2

Stoichiometry:   0%|          | 0/1079 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,0-norm,2-norm,3-norm,5-norm,7-norm,10-norm
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",2,0.707107,0.629961,0.574349,0.552045,0.535887
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",2,0.707107,0.629961,0.574349,0.552045,0.535887
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",2,0.707107,0.629961,0.574349,0.552045,0.535887
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",2,0.707107,0.629961,0.574349,0.552045,0.535887
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",2,0.707107,0.629961,0.574349,0.552045,0.535887
...,...,...,...,...,...,...,...,...,...,...
1074,mp-8938,Si2Mo,"[[2.308237 2.68706104 5.51457779] Si, [ 1.13...","(Si, Mo)",2,0.745356,0.693361,0.670782,0.667408,0.666732
1075,mp-9063,RbSe,"[[1.47222948 2.54997625 3.1826505 ] Rb, [6.477...","(Rb, Se)",2,0.707107,0.629961,0.574349,0.552045,0.535887
1076,mp-971,K2O,"[[1.6219385 1.6219385 1.6219385] K, [4.8658155...","(K, O)",2,0.745356,0.693361,0.670782,0.667408,0.666732
1077,mvc-11115,TiO2,"[[3.25103177 0. 1.18713198] Ti, [ 5.77...","(Ti, O)",2,0.745356,0.693361,0.670782,0.667408,0.666732


In [7]:
featurizer = ValenceOrbital()
df2 = featurizer.featurize_dataframe(df, 'composition')
df2

ValenceOrbital:   0%|          | 0/1079 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,avg s valence electrons,avg p valence electrons,avg d valence electrons,avg f valence electrons,frac s valence electrons,frac p valence electrons,frac d valence electrons,frac f valence electrons
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",2.000000,2.000000,5.000000,0.0,0.222222,0.222222,0.555556,0.000000
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",2.000000,5.000000,5.000000,0.0,0.166667,0.416667,0.416667,0.000000
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",2.000000,2.000000,0.000000,0.0,0.500000,0.500000,0.000000,0.000000
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",2.000000,2.000000,0.000000,0.0,0.500000,0.500000,0.000000,0.000000
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",2.000000,1.000000,1.000000,0.0,0.500000,0.250000,0.250000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...
1074,mp-8938,Si2Mo,"[[2.308237 2.68706104 5.51457779] Si, [ 1.13...","(Si, Mo)",1.666667,1.333333,1.666667,0.0,0.357143,0.285714,0.357143,0.000000
1075,mp-9063,RbSe,"[[1.47222948 2.54997625 3.1826505 ] Rb, [6.477...","(Rb, Se)",1.500000,2.000000,5.000000,0.0,0.176471,0.235294,0.588235,0.000000
1076,mp-971,K2O,"[[1.6219385 1.6219385 1.6219385] K, [4.8658155...","(K, O)",1.333333,1.333333,0.000000,0.0,0.500000,0.500000,0.000000,0.000000
1077,mvc-11115,TiO2,"[[3.25103177 0. 1.18713198] Ti, [ 5.77...","(Ti, O)",2.000000,2.666667,0.666667,0.0,0.375000,0.500000,0.125000,0.000000


### Structural featurizers

To use this featurizer, we have to query for the [`Structure`](https://pymatgen.org/pymatgen.core.structure.html) object directly from Pymatgen and featurize it.
It doesn't always work, so we have some extra cleanup.

First, some imports. 
There are [many more](https://hackingmaterials.lbl.gov/matminer/featurizer_summary.html#structure) available.

In [8]:
from matminer.featurizers.structure.order import DensityFeatures, ChemicalOrdering, StructuralComplexity

In [9]:
featurizer = DensityFeatures()
df3 = featurizer.featurize_dataframe(df, 'structure', ignore_errors=True)
df3 = df3.dropna(how='any', axis=0)
df3

DensityFeatures:   0%|          | 0/1079 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,density,vpa,packing fraction
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",4.937886,44.545542,0.596286
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",2.527162,37.899200,0.139309
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",2.953047,11.748073,0.287759
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",3.341014,10.015975,0.750898
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",5.150550,16.641452,0.511833
...,...,...,...,...,...,...,...
1074,mp-8938,Si2Mo,"[[2.308237 2.68706104 5.51457779] Si, [ 1.13...","(Si, Mo)",6.204201,13.570712,0.587555
1075,mp-9063,RbSe,"[[1.47222948 2.54997625 3.1826505 ] Rb, [6.477...","(Rb, Se)",3.347843,40.778314,0.744663
1076,mp-971,K2O,"[[1.6219385 1.6219385 1.6219385] K, [4.8658155...","(K, O)",2.291176,22.756312,1.319916
1077,mvc-11115,TiO2,"[[3.25103177 0. 1.18713198] Ti, [ 5.77...","(Ti, O)",3.548800,12.456820,0.355992


In [10]:
featurizer = StructuralComplexity()
df3 = featurizer.featurize_dataframe(df, 'structure', ignore_errors=True)
df3 = df3.dropna(how='any', axis=0)
df3

StructuralComplexity:   0%|          | 0/1079 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,structural complexity per atom,structural complexity per cell
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",1.000000,2.000000
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",1.000000,4.000000
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",1.000000,4.000000
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",1.000000,2.000000
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",1.000000,2.000000
...,...,...,...,...,...,...
1074,mp-8938,Si2Mo,"[[2.308237 2.68706104 5.51457779] Si, [ 1.13...","(Si, Mo)",0.918296,8.264663
1075,mp-9063,RbSe,"[[1.47222948 2.54997625 3.1826505 ] Rb, [6.477...","(Rb, Se)",1.959148,23.509775
1076,mp-971,K2O,"[[1.6219385 1.6219385 1.6219385] K, [4.8658155...","(K, O)",0.918296,2.754888
1077,mvc-11115,TiO2,"[[3.25103177 0. 1.18713198] Ti, [ 5.77...","(Ti, O)",2.396241,28.754888


### DOS featurizers?

I've never tried this before, but since we're working with electronic materials (dielectrics), it's conceivable that the [band structure](https://en.wikipedia.org/wiki/Electronic_band_structure) and [density of states (DOS)](https://en.wikipedia.org/wiki/Density_of_states) of the material will matter.
So we'll try to get the DOS of all of our materials and then [featurize that](https://hackingmaterials.lbl.gov/matminer/featurizer_summary.html#dos). 🤞🏼
Since the DOS is memory intensive, I'll only pull a few. 

Here I use the [`df.apply`](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.apply.html) method to create a new column by _calling a function_ on an existing column.

In [11]:
slices = df.iloc[:10, :].copy()
with MPRester(api_key) as mpr:
    slices['dos'] = slices['material_id'].apply(mpr.get_dos_by_material_id)

In [12]:
from matminer.featurizers.dos import DOSFeaturizer, Hybridization, DosAsymmetry
featurizer = DOSFeaturizer()
slices2 = featurizer.featurize_dataframe(slices, 'dos')
slices2.head()

DOSFeaturizer:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,dos,cbm_hybridization,cbm_character_1,cbm_specie_1,cbm_location_1,cbm_score_1,vbm_hybridization,vbm_character_1,vbm_specie_1,vbm_location_1,vbm_score_1
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",Complete DOS for Full Formula (Ba1 Te1)\nReduc...,0.554323,d,Ba,0.0;0.0;0.0,0.84599,0.45697,p,Te,0.5;0.5;0.5,0.88316
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",Complete DOS for Full Formula (Br2 Cl2)\nReduc...,1.56546,p,Br,0.0;0.0;0.0,0.276138,1.380442,p,Br,0.0;0.0;0.0,0.298244
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",Complete DOS for Full Formula (B2 P2)\nReduced...,2.067295,p,B,0.333333;0.666667;0.000274,0.156324,1.401559,p,B,0.333333;0.666667;0.000274,0.263503
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",Complete DOS for Full Formula (Mg1 O1)\nReduce...,1.137726,s,Mg,0.0;0.0;0.0,0.402906,0.08443,p,O,0.333333;0.666667;0.5,0.985728
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",Complete DOS for Full Formula (Zr1 C1)\nReduce...,0.89708,d,Zr,0.0;0.0;0.0,0.684281,1.045257,p,C,0.25;0.25;0.25,0.518033


In [13]:
featurizer = Hybridization()
slices2 = featurizer.featurize_dataframe(slices, 'dos')
slices2.head()

Hybridization:   0%|          | 0/10 [00:00<?, ?it/s]

Unnamed: 0,material_id,pretty_formula,structure,composition,dos,cbm_s,cbm_p,cbm_d,cbm_f,cbm_sp,...,vbm_s,vbm_p,vbm_d,vbm_f,vbm_sp,vbm_sd,vbm_sf,vbm_pd,vbm_pf,vbm_df
0,mp-1000,BaTe,"[[0. 0. 0.] Ba, [3.544879 3.544879 3.544879] Te]","(Ba, Te)",Complete DOS for Full Formula (Ba1 Te1)\nReduc...,0.003823,0.07996,0.916217,0.0,0.001223,...,0.002449,0.929172,0.068379,0.0,0.009102,0.00067,0.0,0.254143,0.0,0.0
1,mp-1008492,BrCl,"[[0. 0. 0.] Br, [0. 0. 3.9053505...","(Br, Cl)",Complete DOS for Full Formula (Br2 Cl2)\nReduc...,0.050917,0.946114,0.002969,0.0,0.192693,...,0.000421,0.998468,0.001112,0.0,0.00168,2e-06,0.0,0.00444,0.0,0.0
2,mp-1008559,BP,"[[ 1.600141 -0.92384351 2.65049818] B, [1.6...","(B, P)",Complete DOS for Full Formula (B2 P2)\nReduced...,0.4578,0.5422,0.0,0.0,0.992877,...,0.00239,0.99761,0.0,0.0,0.009538,0.0,0.0,0.0,0.0,0.0
3,mp-1009129,MgO,"[[0. 0. 0.] Mg, [1.46901 0.84813536 1.33984...","(Mg, O)",Complete DOS for Full Formula (Mg1 O1)\nReduce...,0.778699,0.221301,0.0,0.0,0.689307,...,0.003194,0.996806,0.0,0.0,0.012735,0.0,0.0,0.0,0.0,0.0
4,mp-1009894,ZrC,"[[0. 0. 0.] Zr, [1.276538 1.276538 1.276538] C]","(Zr, C)",Complete DOS for Full Formula (Zr1 C1)\nReduce...,0.019491,0.296228,0.684281,0.0,0.023095,...,0.012587,0.66802,0.319393,0.0,0.033633,0.016081,0.0,0.853443,0.0,0.0
