# THE EFFECT OF PURE METAL'S ATOMIC PROPERTIES AND SURFACE CHARACTERISTICS ON ITS WORK FUNCTION: AN ANALYSIS USING SUPPORT VECTOR REGRESSION MODEL


## Requirement

This prediction  model is coded using Python 3.13.9. All the libraries that required can be installed using this cell below

In [1]:
%pip install -r requirements.txt

Note: you may need to restart the kernel to use updated packages.


## Importing Libraries

In [2]:
from mp_api.client import MPRester
from pymatgen.core.periodic_table import Element
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import re

MP_API_KEY = "oaqaUyUUgOtqC6jLsILaljDuuUEvrX89"

## Data Acquisition

### Data from Materials Project

In [3]:
with MPRester(MP_API_KEY) as mpr:
    summary_docs = mpr.materials.summary._search(
        is_metal = True,
        theoretical = False,
        nelements = [1, 1],
        fields = ["material_id", "volume", "structure", "symmetry"]
    )
    
    material_id = [doc.material_id for doc in summary_docs]
    
    surface_properties_docs = mpr.materials.surface_properties.search(
        material_ids = material_id,
        fields = ["material_id", "pretty_formula", "surfaces"]
    )
    
    unique_elements = list({doc.pretty_formula for doc in surface_properties_docs})

Retrieving SummaryDoc documents:   0%|          | 0/305 [00:00<?, ?it/s]

Retrieving SurfacePropDoc documents:   0%|          | 0/99 [00:00<?, ?it/s]

### Data from pymatgen

In [4]:
def valence_electrons_count(conf:str)->int: #Parsing function to count valence electrons
    patterns = r'(\d+)([spdfgh])(\d{1,2})'
    subshells = re.findall(patterns,str(conf))
    if not subshells:
        return 0
    
    parsed_subshells = []
    for n, l, e in subshells:
        try:
            parsed_subshells.append((int(n), l, int(e)))
        except ValueError:
            continue
    
    if not parsed_subshells:
        return 0
    
    max_n=max(n for n, l, e in parsed_subshells)
    
    valence = 0
    for n, l, e in parsed_subshells:
        if n == max_n:
            valence+=e
        if l == 'd' and n == max_n - 1:
            valence+=e
        if l == 'f' and n == max_n - 2:
            valence+=e
        if l == 'g' and n == max_n - 3:
            valence+=e
        if l == 'h' and n == max_n - 4:
            valence+=e
    
    return valence

sifat_atomik = []

for simbol in tqdm(unique_elements):
    try:
        unsur = Element(simbol)
        
        config = unsur.electronic_structure
        
        data_unsur = {
            'formula_pretty': simbol,
            'atomic_number': unsur.Z,
            'atomic_radius': unsur.atomic_radius,
            '1st_ionization_energy_eV': unsur.ionization_energy, # Ionisasi pertama
            'electron_affinity_eV': unsur.electron_affinity,
            'electronegativity': unsur.X, # Skala Pauling
            'valence_electrons': valence_electrons_count(config),
            'Youngs_modulus': unsur.youngs_modulus,
            'shear_modulus': unsur.rigidity_modulus,
            'bulk_modulus': unsur.bulk_modulus
        }
        
        # 4. Tambahkan dictionary ke list utama
        sifat_atomik.append(data_unsur)
        
    except Exception as e:
        # Menangani jika ada data yang hilang di pymatgen (jarang terjadi)
        print(f"Gagal mengambil data Pymatgen untuk unsur '{simbol}': {e}")

  0%|          | 0/65 [00:00<?, ?it/s]

  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'Youngs_modulus': unsur.youngs_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'Youngs_modulus': unsur.youngs_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'shear_modulus': unsur.rigidity_modulus,
  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'Youngs_modulus': unsur.youngs_modulus,
  'shear_modulus': unsur.rigidity_modulus,
  'bulk_modulus': unsur.bulk_modulus
  'shear_modulus': unsur

### Create Dataframe using pandas

In [5]:
print(summary_docs)

[[4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mvolume[0;0m=46.205987384126566,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.cubic: 'Cubic'>, symbol='Fm-3m', hall=None, number=225, point_group='m-3m', symprec=0.1, angle_tolerance=5.0, version='2.5.0'),
[1mmaterial_id[0;0m=MPID(mp-10018),
[1mstructure[0;0m=Structure Summary
Lattice
    abc : 4.027829901198107 4.0278292285621005 4.02782982
 angles : 59.99999514264196 60.00000066686326 59.999994872937556
 volume : 46.205987384126566
      A : np.float64(3.48820304) np.float64(-0.0) np.float64(2.01391491)
      B : np.float64(1.16273435) np.float64(3.28870854) np.float64(2.01391491)
      C : np.float64(0.0) np.float64(-0.0) np.float64(4.02782982)
    pbc : True True True
PeriodicSite: Ac (0.0, 0.0, 0.0) [-0.0, -0.0, 0.0],
), [4m[1mMPDataDoc<SummaryDoc>[0;0m[0;0m(
[1mvolume[0;0m=34.77213082803506,
[1msymmetry[0;0m=SymmetryData(crystal_system=<CrystalSystem.hex_: 'Hexagonal'>, symbol='P6_3/mmc', hall=None, numb

In [6]:
summary_docs_list = []
for doc in summary_docs:
    summary_docs_list.append({
        "material_id": doc.material_id,
        "symmetry_crystal_system": doc.symmetry.crystal_system if doc.symmetry else None,
        "symmetry_symbol": doc.symmetry.symbol if doc.symmetry else None,
        "symmetry_number": doc.symmetry.number if doc.symmetry else None,
        "symmetry_point_group": doc.symmetry.point_group if doc.symmetry else None,
        "volume": doc.volume,
        "lattice_a": doc.structure.lattice.a if doc.structure else None,
        "lattice_b": doc.structure.lattice.b if doc.structure else None,
        "lattice_c": doc.structure.lattice.c if doc.structure else None,
        "lattice_alpha": doc.structure.lattice.alpha if doc.structure else None,
        "lattice_beta": doc.structure.lattice.beta if doc.structure else None,
        "lattice_gamma": doc.structure.lattice.gamma if doc.structure else None
    })

summary_docs_df = pd.DataFrame(summary_docs_list)

summary_docs_df

Unnamed: 0,material_id,symmetry_crystal_system,symmetry_symbol,symmetry_number,symmetry_point_group,volume,lattice_a,lattice_b,lattice_c,lattice_alpha,lattice_beta,lattice_gamma
0,mp-10018,Cubic,Fm-3m,225,m-3m,46.205987,4.027830,4.027829,4.027830,59.999995,60.000001,59.999995
1,mp-10597,Hexagonal,P6_3/mmc,194,6/mmm,34.772131,2.922299,2.922300,4.701662,90.000000,90.000000,120.000012
2,mp-124,Cubic,Fm-3m,225,m-3m,17.285231,2.902219,2.902218,2.902217,60.000010,60.000015,59.999991
3,mp-8566,Hexagonal,P6_3/mmc,194,6/mmm,69.456950,2.911132,2.911131,9.463709,90.000000,90.000000,119.999983
4,mp-134,Cubic,Fm-3m,225,m-3m,16.471718,2.855954,2.855954,2.855955,60.000003,60.000003,60.000002
...,...,...,...,...,...,...,...,...,...,...,...,...
300,mp-162,Cubic,Fm-3m,225,m-3m,29.820905,3.480796,3.480796,3.480796,60.000000,60.000000,60.000000
301,mp-71,Cubic,Im-3m,229,m-3m,30.294211,3.401422,3.401422,3.401422,109.471221,109.471221,109.471221
302,mp-79,Hexagonal,P6_3/mmc,194,6/mmm,28.845109,2.614360,2.614359,4.873161,90.000000,90.000000,119.999983
303,mp-131,Hexagonal,P6_3/mmc,194,6/mmm,46.999319,3.239231,3.239232,5.172220,90.000000,90.000000,120.000002


In [7]:
print(surface_properties_docs)

[[4m[1mMPDataDoc<SurfacePropDoc>[0;0m[0;0m(
[1msurfaces[0;0m=[SurfaceEntry(miller_index=[2, 1, 0], surface_energy_EV_PER_ANG2=0.004497312664608207, surface_energy=0.0720548893422641, is_reconstructed=False, structure="# generated using pymatgen\ndata_Cs\n_symmetry_space_group_name_H-M   'P 1'\n_cell_length_a   6.14898100\n_cell_length_b   13.75341998\n_cell_length_c   27.50684041\n_cell_angle_alpha   89.99999814\n_cell_angle_beta   90.00000000\n_cell_angle_gamma   90.00000000\n_symmetry_Int_Tables_number   1\n_chemical_formula_structural   Cs\n_chemical_formula_sum   Cs10\n_cell_volume   2326.24023909\n_cell_formula_units_Z   10\nloop_\n _symmetry_equiv_pos_site_id\n _symmetry_equiv_pos_as_xyz\n  1  'x, y, z'\nloop_\n _atom_site_type_symbol\n _atom_site_label\n _atom_site_symmetry_multiplicity\n _atom_site_fract_x\n _atom_site_fract_y\n _atom_site_fract_z\n _atom_site_occupancy\n  Cs  Cs1  1  0.000000  0.597893  0.402021  1\n  Cs  Cs2  1  0.000000  0.207324  0.295275  1\n  Cs  Cs

In [8]:
# Extract surface properties for all materials
all_surface_properties = []
for doc in surface_properties_docs:
    for surface in doc.surfaces:
        all_surface_properties.append({
            'material_id': doc.material_id,
            'pretty_formula': doc.pretty_formula,
            'work_function': surface.work_function,
            'miller_index': surface.miller_index,
            'surface_energy': surface.surface_energy,
            'fermi_energy': surface.efermi
        })

surface_properties_df = pd.DataFrame(all_surface_properties)

surface_properties_df

Unnamed: 0,material_id,pretty_formula,work_function,miller_index,surface_energy,fermi_energy
0,mp-1,Cs,1.770087,"[2, 1, 0]",0.072055,-0.712500
1,mp-1,Cs,2.041152,"[1, 1, 0]",0.060331,-0.762000
2,mp-1,Cs,1.856153,"[2, 1, 1]",0.069580,-0.693000
3,mp-1,Cs,1.970907,"[1, 0, 0]",0.072652,-0.683500
4,mp-1,Cs,1.940531,"[2, 2, 1]",0.075009,-0.695800
...,...,...,...,...,...,...
1153,mp-95,Sr,2.402942,"[3, 3, 2]",0.416642,-0.316600
1154,mp-95,Sr,2.056210,"[3, 2, 2]",0.422999,-0.266700
1155,mp-95,Sr,2.267086,"[3, 1, 1]",0.400933,-0.311000
1156,mp-95,Sr,2.245611,"[3, 2, 1]",0.403423,-0.279600


In [9]:
# Merge the two dataframes
merged_summary_surface_df_filtered = pd.merge(
    surface_properties_df,
    summary_docs_df,
    left_on=['material_id'],
    right_on=['material_id'],
    how='inner'  # Use 'inner' merge to keep only rows that match in both dataframes
)

merged_summary_surface_df_filtered

Unnamed: 0,material_id,pretty_formula,work_function,miller_index,surface_energy,fermi_energy,symmetry_crystal_system,symmetry_symbol,symmetry_number,symmetry_point_group,volume,lattice_a,lattice_b,lattice_c,lattice_alpha,lattice_beta,lattice_gamma
0,mp-1,Cs,1.770087,"[2, 1, 0]",0.072055,-0.712500,Cubic,Im-3m,229,m-3m,122.476825,5.418661,5.418661,5.418660,109.471226,109.471227,109.471213
1,mp-1,Cs,2.041152,"[1, 1, 0]",0.060331,-0.762000,Cubic,Im-3m,229,m-3m,122.476825,5.418661,5.418661,5.418660,109.471226,109.471227,109.471213
2,mp-1,Cs,1.856153,"[2, 1, 1]",0.069580,-0.693000,Cubic,Im-3m,229,m-3m,122.476825,5.418661,5.418661,5.418660,109.471226,109.471227,109.471213
3,mp-1,Cs,1.970907,"[1, 0, 0]",0.072652,-0.683500,Cubic,Im-3m,229,m-3m,122.476825,5.418661,5.418661,5.418660,109.471226,109.471227,109.471213
4,mp-1,Cs,1.940531,"[2, 2, 1]",0.075009,-0.695800,Cubic,Im-3m,229,m-3m,122.476825,5.418661,5.418661,5.418660,109.471226,109.471227,109.471213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1153,mp-95,Sr,2.402942,"[3, 3, 2]",0.416642,-0.316600,Cubic,Im-3m,229,m-3m,55.449151,4.160756,4.160756,4.160757,109.471235,109.471234,109.471186
1154,mp-95,Sr,2.056210,"[3, 2, 2]",0.422999,-0.266700,Cubic,Im-3m,229,m-3m,55.449151,4.160756,4.160756,4.160757,109.471235,109.471234,109.471186
1155,mp-95,Sr,2.267086,"[3, 1, 1]",0.400933,-0.311000,Cubic,Im-3m,229,m-3m,55.449151,4.160756,4.160756,4.160757,109.471235,109.471234,109.471186
1156,mp-95,Sr,2.245611,"[3, 2, 1]",0.403423,-0.279600,Cubic,Im-3m,229,m-3m,55.449151,4.160756,4.160756,4.160757,109.471235,109.471234,109.471186


In [10]:
print(sifat_atomik)

[{'formula_pretty': 'Na', 'atomic_number': 11, 'atomic_radius': 1.8, '1st_ionization_energy_eV': 5.13907696, 'electron_affinity_eV': 0.54792625, 'electronegativity': 0.93, 'valence_electrons': 1, 'Youngs_modulus': 10.0, 'shear_modulus': 3.3, 'bulk_modulus': 6.3}, {'formula_pretty': 'Ru', 'atomic_number': 44, 'atomic_radius': 1.3, '1st_ionization_energy_eV': 7.3605, 'electron_affinity_eV': 1.046272, 'electronegativity': 2.2, 'valence_electrons': 8, 'Youngs_modulus': 447.0, 'shear_modulus': 173.0, 'bulk_modulus': 220.0}, {'formula_pretty': 'Ho', 'atomic_number': 67, 'atomic_radius': 1.75, '1st_ionization_energy_eV': 6.0215, 'electron_affinity_eV': 0.338, 'electronegativity': 1.23, 'valence_electrons': 13, 'Youngs_modulus': 65.0, 'shear_modulus': 26.0, 'bulk_modulus': 40.0}, {'formula_pretty': 'Ga', 'atomic_number': 31, 'atomic_radius': 1.3, '1st_ionization_energy_eV': 5.999302, 'electron_affinity_eV': 0.30116615, 'electronegativity': 1.81, 'valence_electrons': 13, 'Youngs_modulus': None,

In [11]:
atomic_properties_df = pd.DataFrame(sifat_atomik)

atomic_properties_df

Unnamed: 0,formula_pretty,atomic_number,atomic_radius,1st_ionization_energy_eV,electron_affinity_eV,electronegativity,valence_electrons,Youngs_modulus,shear_modulus,bulk_modulus
0,Na,11,1.80,5.139077,0.547926,0.93,1,10.0,3.3,6.3
1,Ru,44,1.30,7.360500,1.046272,2.20,8,447.0,173.0,220.0
2,Ho,67,1.75,6.021500,0.338000,1.23,13,65.0,26.0,40.0
3,Ga,31,1.30,5.999302,0.301166,1.81,13,,,
4,Al,13,1.25,5.985769,0.432835,1.61,3,70.0,26.0,76.0
...,...,...,...,...,...,...,...,...,...,...
60,Sb,51,1.45,8.608389,1.047401,2.05,15,55.0,20.0,42.0
61,Sm,62,1.85,5.643710,0.162000,1.17,8,50.0,20.0,38.0
62,Eu,63,1.85,5.670385,0.116130,1.20,9,18.0,7.9,8.3
63,Yb,70,1.75,6.254160,-0.020000,1.10,16,24.0,9.9,31.0


In [12]:
all_merged_df = pd.merge(
    merged_summary_surface_df_filtered,
    atomic_properties_df,
    left_on = ['pretty_formula'],
    right_on = ['formula_pretty'],
    how = 'left'
)

all_merged_df = all_merged_df.drop(columns = ['formula_pretty'])

all_merged_df

Unnamed: 0,material_id,pretty_formula,work_function,miller_index,surface_energy,fermi_energy,symmetry_crystal_system,symmetry_symbol,symmetry_number,symmetry_point_group,...,lattice_gamma,atomic_number,atomic_radius,1st_ionization_energy_eV,electron_affinity_eV,electronegativity,valence_electrons,Youngs_modulus,shear_modulus,bulk_modulus
0,mp-1,Cs,1.770087,"[2, 1, 0]",0.072055,-0.712500,Cubic,Im-3m,229,m-3m,...,109.471213,55,2.6,3.893906,0.471598,0.79,1,1.7,,1.6
1,mp-1,Cs,2.041152,"[1, 1, 0]",0.060331,-0.762000,Cubic,Im-3m,229,m-3m,...,109.471213,55,2.6,3.893906,0.471598,0.79,1,1.7,,1.6
2,mp-1,Cs,1.856153,"[2, 1, 1]",0.069580,-0.693000,Cubic,Im-3m,229,m-3m,...,109.471213,55,2.6,3.893906,0.471598,0.79,1,1.7,,1.6
3,mp-1,Cs,1.970907,"[1, 0, 0]",0.072652,-0.683500,Cubic,Im-3m,229,m-3m,...,109.471213,55,2.6,3.893906,0.471598,0.79,1,1.7,,1.6
4,mp-1,Cs,1.940531,"[2, 2, 1]",0.075009,-0.695800,Cubic,Im-3m,229,m-3m,...,109.471213,55,2.6,3.893906,0.471598,0.79,1,1.7,,1.6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1153,mp-95,Sr,2.402942,"[3, 3, 2]",0.416642,-0.316600,Cubic,Im-3m,229,m-3m,...,109.471186,38,2.0,5.694867,0.052066,0.95,2,,6.1,
1154,mp-95,Sr,2.056210,"[3, 2, 2]",0.422999,-0.266700,Cubic,Im-3m,229,m-3m,...,109.471186,38,2.0,5.694867,0.052066,0.95,2,,6.1,
1155,mp-95,Sr,2.267086,"[3, 1, 1]",0.400933,-0.311000,Cubic,Im-3m,229,m-3m,...,109.471186,38,2.0,5.694867,0.052066,0.95,2,,6.1,
1156,mp-95,Sr,2.245611,"[3, 2, 1]",0.403423,-0.279600,Cubic,Im-3m,229,m-3m,...,109.471186,38,2.0,5.694867,0.052066,0.95,2,,6.1,
