# THE EFFECT OF PURE METAL'S ATOMIC PROPERTIES AND SURFACE CHARACTERISTICS ON ITS WORK FUNCTION: AN ANALYSIS USING SUPPORT VECTOR REGRESSION MODEL


## Requirement

This prediction  model is coded using Python 3.13.9. All the libraries that required can be installed using this cell below

In [None]:
%pip install -r requirements.txt

## Importing Libraries

In [None]:
from mp_api.client import MPRester
from pymatgen.core.periodic_table import Element
from pymatgen.core import Structure, Lattice
from tqdm.auto import tqdm
import numpy as np
import pandas as pd
import re

MP_API_KEY = "oaqaUyUUgOtqC6jLsILaljDuuUEvrX89"

## Data Acquisition

### Data from Materials Project

In [None]:
with MPRester(MP_API_KEY) as mpr:
    # Memilih material logam yang dibentuk oleh 1 unsur dan teramati lewat eksperimen
    summary_docs = mpr.materials.summary._search(
        is_metal = True,
        theoretical = False,
        elements = ['Li'],
        nelements = [1, 1],
        fields = ["material_id", "nsites", "volume", "structure", "symmetry"]
    )
    structures = {str(doc.material_id): doc.structure for doc in summary_docs}
    
    # Mengubah data struktur ke sel konvensional
    conventional = {mid: s.to_conventional() for mid, s in structures.items()}
    
    material_id = [doc.material_id for doc in summary_docs]
    
    # Menyeleksi material dari summary_docs yang sifat permukaannya telah dihitung nilainya
    surface_properties_docs = mpr.materials.surface_properties.search(
        material_ids = material_id,
        fields = ["material_id", "pretty_formula", "surfaces"]
    )
    
    # Membuat list dari unsur-unsur yang telah dikumpulkan
    unique_elements = list({doc.pretty_formula for doc in surface_properties_docs})

### Data from pymatgen

In [None]:
def valence_electrons_count(conf:str)->int: # Fungsi untuk menghitung banyaknya elektron valensi dari setiap unsur
    patterns = r'(\d+)([spdfgh])(\d{1,2})'
    subshells = re.findall(patterns,str(conf))
    if not subshells:
        return 0
    
    parsed_subshells = []
    for n, l, e in subshells:
        try:
            parsed_subshells.append((int(n), l, int(e)))
        except ValueError:
            continue
    
    if not parsed_subshells:
        return 0
    
    max_n=max(n for n, l, e in parsed_subshells)
    
    valence = 0
    for n, l, e in parsed_subshells:
        if n == max_n:
            valence+=e
        if l == 'd' and n == max_n - 1:
            valence+=e
        if l == 'f' and n == max_n - 2:
            valence+=e
        if l == 'g' and n == max_n - 3:
            valence+=e
        if l == 'h' and n == max_n - 4:
            valence+=e
    
    return valence

sifat_atomik = []

for simbol in tqdm(unique_elements):
    try:
        unsur = Element(simbol)
        
        config = unsur.electronic_structure
        
        data_unsur = {
            'formula_pretty': simbol,
            'atomic_number': unsur.Z,
            'atomic_radius': unsur.atomic_radius,
            '1st_ionization_energy_eV': unsur.ionization_energy, # Ionisasi pertama
            'electron_affinity_eV': unsur.electron_affinity,
            'electronegativity': unsur.X, # Skala Pauling
            'valence_electrons': valence_electrons_count(config),
            'Youngs_modulus': unsur.youngs_modulus,
            'shear_modulus': unsur.rigidity_modulus,
            'bulk_modulus': unsur.bulk_modulus
        }
        
        # 4. Tambahkan dictionary ke list utama
        sifat_atomik.append(data_unsur)
        
    except Exception as e:
        # Menangani jika ada data yang hilang di pymatgen (jarang terjadi)
        print(f"Gagal mengambil data Pymatgen untuk unsur '{simbol}': {e}")

### Create Dataframe using pandas

In [None]:
print(summary_docs)

In [None]:
print(conventional)

In [None]:
summary_docs_list = []
for doc in summary_docs:
    summary_docs_list.append({
        "material_id": doc.material_id,
        "symmetry_crystal_system": doc.symmetry.crystal_system if doc.symmetry else None,
        "symmetry_symbol": doc.symmetry.symbol if doc.symmetry else None,
        "nsites": doc.nsites,
        "volume": doc.volume
    })

print(summary_docs_list)

In [None]:
for doc_entry in summary_docs_list:
    material_id = str(doc_entry['material_id'])
    if material_id in conventional:
        structure = conventional[material_id]
        doc_entry['lattice_a'] = structure.lattice.abc[0]
        doc_entry['lattice_b'] = structure.lattice.abc[1]
        doc_entry['lattice_c'] = structure.lattice.abc[2]
        doc_entry['angle_alpha'] = structure.lattice.angles[0]
        doc_entry['angle_beta'] = structure.lattice.angles[1]
        doc_entry['angle_gamma'] = structure.lattice.angles[2]

print(summary_docs_list)

In [None]:
summary_docs_df = pd.DataFrame(summary_docs_list)

summary_docs_df

In [None]:
print(surface_properties_docs)

In [None]:
# Extract surface properties for all materials
surface_properties_lists = []
for doc in surface_properties_docs:
    for surface in doc.surfaces:
        surface_properties_lists.append({
            'material_id': doc.material_id,
            'pretty_formula': doc.pretty_formula,
            'work_function': surface.work_function,
            'miller_index': surface.miller_index,
            'surface_energy': surface.surface_energy,
            'fermi_energy': surface.efermi
        })

surface_properties_df = pd.DataFrame(surface_properties_lists)

surface_properties_df

In [None]:
# Merge the two dataframes
merged_summary_surface_df_filtered = pd.merge(
    surface_properties_df,
    summary_docs_df,
    left_on=['material_id'],
    right_on=['material_id'],
    how='inner'  # Use 'inner' merge to keep only rows that match in both dataframes
)

merged_summary_surface_df_filtered

In [None]:
print(sifat_atomik)

In [None]:
atomic_properties_df = pd.DataFrame(sifat_atomik)

atomic_properties_df

In [None]:
all_merged_df = pd.merge(
    merged_summary_surface_df_filtered,
    atomic_properties_df,
    left_on = ['pretty_formula'],
    right_on = ['formula_pretty'],
    how = 'left'
)

all_merged_df = all_merged_df.drop(columns = ['formula_pretty'])

all_merged_df

In [None]:
all_merged_df.info()

### Feature Enginering

In [None]:
unique_crystal_system = all_merged_df['symmetry_crystal_system'].unique()
print(unique_crystal_system)

In [None]:
unique_symbol = all_merged_df['symmetry_symbol'].unique()
print(unique_symbol)

In [None]:
grouping = all_merged_df.groupby('symmetry_crystal_system', sort=False)['symmetry_symbol'].unique()

print(grouping)

In [None]:
def classify_structure(row):
    # Membersihkan input
    system = str(row['symmetry_crystal_system']).split(':')[-1].replace("'>", "").strip()
    symbol = str(row['symmetry_symbol'])

    # Menggunakan match-case pada tuple (system, symbol)
    match (system, symbol):
        
        # --- Kategori Kubik ---
        case ('Cubic', s) if s.startswith('F'):
            return 'FCC'
        case ('Cubic', s) if s.startswith('I'):
            return 'BCC'
        case ('Cubic', s) if s.startswith('P'):
            return 'Simple Cubic'

        # --- Kategori Heksagonal ---
        case ('Hexagonal', 'P6_3/mmc'):
            return 'HCP' # Ini space group spesifik untuk HCP
        case ('Hexagonal', 'P6/mmm'):
            return 'Primitive Hexagonal'

        # --- Kategori Tetragonal ---
        case ('Tetragonal', s) if s.startswith('I'):
            return 'Body-Centered Tetragonal (BCT)'
        
        # --- Kategori Orthorhombic ---
        case ('Orthorhombic', s) if s.startswith('C'):
            return 'Base-Centered Orthorhombic'

        # --- Kategori Monoclinic ---
        case ('Monoclinic', s) if s.startswith('C'):
            return 'Base-Centered Monoclinic'
            
        # --- Kategori Trigonal ---
        case ('Trigonal', s) if s.startswith('R'):
            return 'Rhombohedral'

        # --- Default Case ---
        case _:
            return 'Other' # Untuk yang tidak terpetakan

# -------------------------------------------------------------------
# --- Terapkan fungsi ini untuk membuat kolom baru ---
# -------------------------------------------------------------------

# Pastikan Anda menggunakan DataFrame asli, BUKAN hasil groupby
all_merged_df['structure_type'] = all_merged_df.apply(classify_structure, axis=1)

# -------------------------------------------------------------------
# --- Lihat hasilnya ---
# -------------------------------------------------------------------
print("DataFrame dengan kolom 'structure_type' baru:")
print(all_merged_df[['symmetry_symbol', 'structure_type']].head())

print("\n--- Ringkasan Struktur di Data Anda ---")
print(all_merged_df['structure_type'].value_counts())

In [None]:
all_merged_df.head()

In [None]:
all_merged_df = all_merged_df.drop(columns=['symmetry_crystal_system'])

all_merged_df.info()

In [None]:
coordinates = []
species = ["Li"]

structure = Structure(species, coordinates)#some structure

for s in structure:
        coordinates.append(s.coords) #cartesian coordinates
        #coordinates.append(s.frac_coords) #would give fractional coordinates instead
        species.append(s.specie.Z)
        #species.append(s.specie) #would give strings (e.g. "Fe") instead of atomic number

print(coordinates)