# THE EFFECT OF PURE METAL'S ATOMIC PROPERTIES AND SURFACE CHARACTERISTICS ON ITS WORK FUNCTION: AN ANALYSIS USING SUPPORT VECTOR REGRESSION MODEL

## Requirement

### Installing Library Requirements

This prediction  model is coded using Python 3.13.9. All the libraries that required can be installed using this cell below

In [None]:
%pip install -r requirements.txt

### Importing Libraries Needed

In [None]:
from mp_api.client import MPRester
from pymatgen.core.periodic_table import Element
from pymatgen.core import Structure
from tqdm.auto import tqdm
from sklearn.model_selection import GroupShuffleSplit, GroupKFold, GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.svm import SVR
from sklearn.metrics import r2_score as r2, mean_absolute_error as mae, root_mean_squared_error as rmse
from scipy.stats import loguniform
import shap
from PyALE import ale
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
import pandas as pd
import re
import warnings

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

MP_API_KEY = "oaqaUyUUgOtqC6jLsILaljDuuUEvrX89"

## Data Acquisition

### Data from Materials Project

In [None]:
with MPRester(MP_API_KEY) as mpr:
    # Memilih material logam yang dibentuk oleh 1 unsur dan teramati lewat eksperimen
    summary_docs = mpr.materials.summary._search(
        is_metal = True,
        theoretical = False,
        nelements = [1, 1],
        fields = ["material_id", "nsites", "volume", "structure", "symmetry"]
    )
    summary_structures = {str(doc.material_id): doc.structure for doc in summary_docs}

    # Mengubah data struktur ke sel konvensional
    bulk_summary_conventional = {mid: s.to_conventional() for mid, s in summary_structures.items()}

    material_id = [doc.material_id for doc in summary_docs]

    # Menyeleksi material dari summary_docs yang sifat permukaannya telah dihitung nilainya
    surface_properties_docs = mpr.materials.surface_properties.search(
        material_ids = material_id,
        fields = ["material_id", "pretty_formula", "surfaces", "structure"]
    )

    # Membuat list dari unsur-unsur yang telah dikumpulkan
    unique_elements = list({doc.pretty_formula for doc in surface_properties_docs})

### Data from *pymatgen*

In [None]:
def valence_electrons_count(conf:str)->int: # Fungsi untuk menghitung banyaknya elektron valensi dari setiap unsur
    patterns = r'(\d+)([spdfgh])(\d{1,2})'
    subshells = re.findall(patterns,str(conf))
    if not subshells:
        return 0

    parsed_subshells = []
    for n, l, e in subshells:
        try:
            parsed_subshells.append((int(n), l, int(e)))
        except ValueError:
            continue

    if not parsed_subshells:
        return 0

    max_n=max(n for n, l, e in parsed_subshells)

    valence = 0
    for n, l, e in parsed_subshells:
        if n == max_n:
            valence+=e
        if l == 'd' and n == max_n - 1:
            valence+=e
        if l == 'f' and n == max_n - 2:
            valence+=e
        if l == 'g' and n == max_n - 3:
            valence+=e
        if l == 'h' and n == max_n - 4:
            valence+=e

    return valence

sifat_atomik = []

for simbol in tqdm(unique_elements):
    try:
        unsur = Element(simbol)

        config = unsur.electronic_structure

        data_unsur = {
            'formula_pretty': simbol,
            'atomic_number': unsur.Z,
            'atomic_radius': unsur.atomic_radius,
            '1st_ionization_energy_eV': unsur.ionization_energy, # Ionisasi pertama
            'electron_affinity_eV': unsur.electron_affinity,
            'electronegativity': unsur.X, # Skala Pauling
            'valence_electrons': valence_electrons_count(config),
            'Youngs_modulus': unsur.youngs_modulus,
            'shear_modulus': unsur.rigidity_modulus,
            'bulk_modulus': unsur.bulk_modulus
        }

        # 4. Tambahkan dictionary ke list utama
        sifat_atomik.append(data_unsur)

    except Exception as e:
        # Menangani jika ada data yang hilang di pymatgen (jarang terjadi)
        print(f"Gagal mengambil data Pymatgen untuk unsur '{simbol}': {e}")

### Create Dataframe using pandas + Feature Engineering

In [None]:
print(summary_docs)

In [None]:
print(bulk_summary_conventional)

In [None]:
summary_docs_list = []
for doc in summary_docs:
    summary_docs_list.append({
        "material_id": doc.material_id,
        "symmetry_crystal_system": doc.symmetry.crystal_system if doc.symmetry else None,
        "symmetry_symbol": doc.symmetry.symbol if doc.symmetry else None,
        "bulk_atomic_density": doc.nsites / doc.volume if doc.volume else None,
        "c_over_a_ratio": (doc.structure.lattice.c / doc.structure.lattice.a) if doc.structure and doc.structure.lattice and doc.structure.lattice.a else None,
        "b_over_a_ratio": (doc.structure.lattice.b / doc.structure.lattice.a) if doc.structure and doc.structure.lattice and doc.structure.lattice.a else None,
        "lattice_alpha": doc.structure.lattice.alpha if doc.structure and doc.structure.lattice else None,
        "lattice_beta": doc.structure.lattice.beta if doc.structure and doc.structure.lattice else None,
        "lattice_gamma": doc.structure.lattice.gamma if doc.structure and doc.structure.lattice else None
    })

print(summary_docs_list)

In [None]:
for doc_entry in summary_docs_list:
    material_id = str(doc_entry['material_id'])
    if material_id in bulk_summary_conventional:
        struct = bulk_summary_conventional[material_id]
        doc_entry.update({
            "bulk_atomic_density": len(struct) / struct.volume,
            "c_over_a_ratio": struct.lattice.c / struct.lattice.a,
            "b_over_a_ratio": struct.lattice.b / struct.lattice.a,
            "lattice_alpha": struct.lattice.alpha,
            "lattice_beta": struct.lattice.beta,
            "lattice_gamma": struct.lattice.gamma
        })

print(summary_docs_list)

In [None]:
summary_docs_df = pd.DataFrame(summary_docs_list)

summary_docs_df

In [None]:
def classify_structure(row):
    system = str(row['symmetry_crystal_system']).split(':')[-1].replace("'>", "").strip()
    symbol = str(row['symmetry_symbol'])

    match (system, symbol): # Menggunakan match-case pada tuple (system, symbol)

        # Kubik
        case ('Cubic', s) if s.startswith('F'):
            return 'FCC'
        case ('Cubic', s) if s.startswith('I'):
            return 'BCC'
        case ('Cubic', s) if s.startswith('P'):
            return 'Simple Cubic'

        # Heksagonal
        case ('Hexagonal', 'P6_3/mmc'):
            return 'HCP' # Space group spesifik untuk HCP
        case ('Hexagonal', 'P6/mmm'):
            return 'Primitive Hexagonal'

        # Tetragonal
        case ('Tetragonal', s) if s.startswith('I'):
            return 'Body-Centered Tetragonal (BCT)'

        # Orthorhombic
        case ('Orthorhombic', s) if s.startswith('C'):
            return 'Base-Centered Orthorhombic'

        # Monoclinic
        case ('Monoclinic', s) if s.startswith('C'):
            return 'Base-Centered Monoclinic'

        # Trigonal
        case ('Trigonal', s) if s.startswith('R'):
            return 'Rhombohedral'

        # Default
        case _:
            return 'Other'

summary_docs_df['structure_type'] = summary_docs_df.apply(classify_structure, axis=1)

cols = summary_docs_df.columns.tolist()
structure_type_col = cols.pop(cols.index('structure_type'))
symmetry_symbol_idx = cols.index('symmetry_symbol')
cols.insert(symmetry_symbol_idx , structure_type_col)

summary_docs_df = summary_docs_df[cols]

summary_docs_df = summary_docs_df.drop(columns=['symmetry_crystal_system',])

summary_docs_df

In [None]:
print(surface_properties_docs)

In [None]:
def get_planar_density(slab_structure: Structure) -> dict:
    """
    Menghitung semua data relevan dari lapisan teratas slab.
    """
    try:
        # --- Hitung Luas (HANYA DI SINI) ---
        a_vec = slab_structure.lattice.matrix[0]
        b_vec = slab_structure.lattice.matrix[1]
        surface_area_2D = np.linalg.norm(np.cross(a_vec, b_vec))

        if surface_area_2D == 0:
            return {'surface_area': 0, 'surface_atoms_count': 0, 'planar_density': np.nan}

        # --- Hitung Atom ---
        all_z_coords = [site.coords[2] for site in slab_structure.sites]
        if not all_z_coords:
            return {'surface_area': surface_area_2D, 'surface_atoms_count': 0, 'planar_density': np.nan}
        max_z = max(all_z_coords)

        tolerance = 0.000001
        surface_atoms_count = 0
        for site in slab_structure.sites:
            if abs(site.coords[2] - max_z) < tolerance:
                surface_atoms_count += 1
        
        # --- Hitung Density ---
        planar_density = surface_atoms_count / surface_area_2D
        
        # Kembalikan SEMUA data
        return planar_density
    
    except Exception as e:
        print(f"Error saat menghitung data: {e}")
        return {'surface_area': np.nan, 'surface_atoms_count': np.nan, 'planar_density': np.nan}

In [None]:
surface_properties_lists = []

for doc in tqdm(surface_properties_docs):
    for surface in doc.surfaces:
        cif_string = surface.structure
        
        # 1. Buat objek slab (tetap wajib)
        slab_structure = Structure.from_str(cif_string, fmt="cif")
        
        # 3. Kumpulkan hasil
        surface_properties_lists.append({
            'material_id': doc.material_id,
            'pretty_formula': doc.pretty_formula,
            'work_function': surface.work_function,
            'miller_index': surface.miller_index,
            'surface_energy': surface.surface_energy,
            'fermi_energy': surface.efermi,
            'planar_density': get_planar_density(slab_structure)
        })

surface_properties_df = pd.DataFrame(surface_properties_lists)

# Tampilkan hasilnya
print("DataFrame berhasil dibuat (versi bersih):")
surface_properties_df

In [None]:
# Merge the two dataframes
merged_summary_surface_df_filtered = pd.merge(
    surface_properties_df,
    summary_docs_df,
    left_on=['material_id'],
    right_on=['material_id'],
    how='inner'  # Use 'inner' merge to keep only rows that match in both dataframes
)

merged_summary_surface_df_filtered

In [None]:
print(sifat_atomik)

In [None]:
atomic_properties_df = pd.DataFrame(sifat_atomik)

atomic_properties_df

In [None]:
all_merged_df = pd.merge(
    merged_summary_surface_df_filtered,
    atomic_properties_df,
    left_on = ['pretty_formula'],
    right_on = ['formula_pretty'],
    how = 'left'
)

all_merged_df = all_merged_df.drop(columns = ['formula_pretty'])    

all_merged_df['miller_index'] = all_merged_df['miller_index'].astype(str)
all_merged_df['structure_type'] = all_merged_df['structure_type'].astype(str)
all_merged_df['symmetry_symbol'] = all_merged_df['symmetry_symbol'].astype(str)

all_merged_df

In [None]:
all_merged_df.info()

In [None]:
print(f"Unique structure_type: {all_merged_df['structure_type'].nunique()}")
print(f"Unique miller_index: {all_merged_df['miller_index'].apply(tuple).nunique()}")
print(f"Unique symmetry_symbol: {all_merged_df['symmetry_symbol'].nunique()}")

In [None]:
print("--- Statistik work_function ---")
print(all_merged_df.describe())

In [None]:
all_merged_df.to_csv("metal_surface_properties_full_dataset.csv", index=False)

### Data Splitting

In [None]:
y = all_merged_df['work_function']
groups = all_merged_df['pretty_formula']

cols_to_drop = [
    'work_function',
    'material_id',
    'pretty_formula',
    'miller_index',
    'symmetry_symbol'
]

X_clean = all_merged_df.drop(columns=cols_to_drop, errors='ignore')

outer_cv = GroupShuffleSplit(n_splits=1, test_size=0.2, random_state=42)
train_idx, test_idx = next(outer_cv.split(X_clean, y, groups=groups))

X_train_outer = X_clean.iloc[train_idx]
y_train_outer = y.iloc[train_idx]
groups_train_outer = groups.iloc[train_idx]

X_test_outer = X_clean.iloc[test_idx]
y_test_outer = y.iloc[test_idx]

print("--- Data Split Selesai ---")
print(f"Jumlah data train (luar): {len(X_train_outer)}")
print(f"Jumlah data test (luar): {len(X_test_outer)}")

In [None]:
print("--- Statistik y_train_outer ---")
print(y_train_outer.describe())

## Training, Validation, Hyperparameter Tuning, Evaluation, and Interpretation

### Model's pipeline

#### Global Configuration

In [None]:
categorical_transformer = Pipeline(steps=[
    ('encoder', OneHotEncoder(handle_unknown='ignore', sparse_output=False))
])

neighbors = 5
iteration = 125
random_state = 42

strats = ["median", "knn"]

inner_cv = GroupKFold(n_splits=10)

scoring_metrics = {
    'r2': 'r2',
    'mae': 'neg_mean_absolute_error',
    'rmse': 'neg_root_mean_squared_error'
}

results_history = []

#### Getting Numerical and Categorical Features

In [None]:
def feature_type(X):
    num_features_impute = X.columns[X.isnull().any()].tolist()
    cat_features = X.select_dtypes(include=['object', 'category']).columns.tolist()
    num_features_clean = [col for col in X.columns if col not in num_features_impute + cat_features]
    num_features = num_features_impute + num_features_clean
    return num_features, cat_features

#### Impute Strategy

In [None]:
def imputer_strategy(strat, n_neighbors):
    match strat.lower():
        case 'knn':
            return Pipeline(steps=[
                ('scaler', StandardScaler()),
                ('imputer', KNNImputer(n_neighbors=n_neighbors))
            ])
        case 'median':
            return Pipeline(steps=[
                ('imputer', SimpleImputer(strategy='median')),
                ('scaler', StandardScaler())
            ])
        case _:
            raise ValueError(f"Strategy '{strat}' unrecognized. Only accept 'knn' or 'median'.")

#### Full Pipeline

In [None]:
def full_pipeline(X, strategy, n_neighbors, categorical_transformer):
    num_features, cat_features = feature_type(X)
    imputer = imputer_strategy(strategy, n_neighbors)
    
    preprocessor = ColumnTransformer(
        transformers=[
            ('num', imputer, num_features),
            ('cat', categorical_transformer, cat_features)
        ],
        remainder='drop'
    )
    
    full_pipeline = Pipeline(steps=[
        ('preprocessor', preprocessor),
        ('model', SVR(kernel='rbf'))
    ])
    
    return full_pipeline

### Model's Training, Validation, Hyperparameter Tuning, and Evaluation Functions

#### Hyperparameter Tuning Function

In [None]:
def train_valid_hyperparameter_tuning(search_mode, estimator, params, n_iter, scoring, cv, rand_state, X_train, y_train, groups_train):
    match search_mode.lower():
        case 'random':
            search = RandomizedSearchCV(
                estimator=estimator,
                param_distributions=params,
                n_iter=n_iter,
                scoring=scoring,
                n_jobs=-1,
                refit='rmse',
                cv=cv,
                random_state=rand_state
            )
        case 'grid':
            search = GridSearchCV(
                estimator=estimator,
                param_grid=params,
                scoring=scoring,
                n_jobs=-1,
                refit='rmse',
                cv=cv,
            )
        case _:
            raise ValueError(f"Search mode '{search_mode}' unrecognized. Only accept 'random' or 'grid'.")
    
    model = search.fit(X_train, y_train, groups=groups_train)
    return model

In [None]:
def train_results(step_name, model, search_mode, strategy):
    best_C = model.best_params_['model__C']
    best_gamma = model.best_params_['model__gamma']
    best_epsilon = model.best_params_['model__epsilon']
    
    best_idx = model.best_index_
    best_tr_r2 = model.cv_results_['mean_test_r2'][best_idx]
    best_tr_mae = -model.cv_results_['mean_test_mae'][best_idx]
    best_tr_rmse = -model.cv_results_['mean_test_rmse'][best_idx]
    
    best_std_r2 = model.cv_results_['std_test_r2'][best_idx]
    best_std_mae = model.cv_results_['std_test_mae'][best_idx]
    best_std_rmse = model.cv_results_['std_test_rmse'][best_idx]

    ringkasan = {
        'Tahap': step_name, 'Search Mode': search_mode, 'Imputasi': strategy,
        'Best C': best_C, 'Best gamma': best_gamma, 'Best epsilon': best_epsilon,
        'CV Mean R¬≤': best_tr_r2, 'CV Std R¬≤': best_std_r2,
        'CV Mean MAE': best_tr_mae, 'CV Std MAE': best_std_mae,
        'CV Mean RMSE': best_tr_rmse, 'CV Std RMSE': best_std_rmse
    }
    return ringkasan

In [None]:
def single_run_exp(X, step_name, search_mode, strat, n_neighbors, cat_transformer, rand_state, n_iter, params, scoring, cv, X_train, y_train, groups_train):
    estimator = full_pipeline(X, strat, n_neighbors, cat_transformer)
    
    current_iter = n_iter if search_mode == 'random' else 0
    
    print(f"\n{'='*40}")
    print(f"MODE: {search_mode} | IMPUTER: {strat}") 
    print(f"{'='*40}")
    
    print(f"Starting hyperparameter tuning...")
    model = train_valid_hyperparameter_tuning(search_mode, estimator, params, current_iter, scoring, cv, rand_state, X_train, y_train, groups_train)
    print("Hyperparameter tuning completed.")
    
    ringkasan = train_results(step_name, model, search_mode, strat)
    return model, ringkasan

#### Displaying Results Function

In [None]:
def display_single_result(ringkasan):
    print(f"Best Hyperparameter:")
    for key, value in ringkasan.items():
        if key.startswith('Best '):
            label = key.replace('Best ', '').ljust(8)
            print(f" > {label} = {value}")
    
    w = 10
    metrics = ['R¬≤', 'MAE', 'RMSE']
    sub_headers = ['MEAN', 'STD']
    
    # Hitung lebar per blok metrik: spasi + MEAN(w) + " | " + STD(w) + spasi
    block_width = 1 + w + 3 + w + 1
    
    # 1. Header Utama (Nama Metrik)
    header_parts = [f"{m:^{block_width}}" for m in metrics]
    top_header = "|" + "|".join(header_parts) + "|"
    total_len = len(top_header)
    
    # 2. Sub Header (MEAN | STD berulang)
    sub_header_parts = []
    for _ in metrics:
        sub_header_parts.append(f" {sub_headers[0]:^{w}} | {sub_headers[1]:^{w}} ")
    sub_header = "|" + "|".join(sub_header_parts) + "|"
    
    # 3. Isi Nilai
    val_parts = []
    for metric in metrics:
        # Construct key sesuai output get_cv_best_metrics
        key_mean = f"CV Mean {metric}"
        key_std = f"CV Std {metric}"
        
        val_mean = ringkasan.get(key_mean, 0)
        val_std = ringkasan.get(key_std, 0)
        
        val_parts.append(f" {val_mean:^{w}.4f} | {val_std:^{w}.4f} ")

    values_str = "|" + "|".join(val_parts) + "|"

    print("-" * total_len)
    print(top_header)
    print("-" * total_len)
    print(sub_header)
    print("-" * total_len)
    print(values_str)
    print("-" * total_len)

In [None]:
def display_recap(history):
    # Header diperlebar untuk mengakomodasi judul kolom baru
    print(f"{'='*115}")
    # Header: Menampilkan CV Mean untuk RMSE (Metrik Utama), MAE, dan R2
    header = f"{'TAHAP':<12} | {'MODE':<8} | {'STRAT':<8} | {'CV RMSE':^10} | {'CV MAE':^10} | {'CV R¬≤':^10} | {'Best Params (C/Gam/Eps)':<25}"
    print(header)
    print(f"{'-'*115}")
    
    sorted_history = sorted(history, key=lambda x: x['CV Mean RMSE'], reverse=False)
    
    for row in sorted_history:
        step = row['Tahap'].upper()
        mode = row['Search Mode'].upper()
        strat = row['Imputasi'].upper()
        
        # Ambil nilai Mean CV
        r2 = row.get('CV Mean R¬≤', 0)
        mae = row.get('CV Mean MAE', 0)
        rmse = row.get('CV Mean RMSE', 0)
        
        c = row.get('Best C', 0)
        c = f"{c:.4f}" if isinstance(c, (int, float)) else str(c)
        g = row.get('Best gamma', 0)
        g = f"{g:.4f}" if isinstance(g, (int, float)) else str(g)
        e = row.get('Best epsilon', 0)
        e = f"{e:.4f}" if isinstance(e, (int, float)) else str(e)
        
        # Format parameter agar rapi
        params = f"C:{c}/G:{g}/E:{e}"
        
        line = f"{step:<12} | {mode:<8} | {strat:<8} | {rmse:^10.4f} | {mae:^10.4f} | {r2:^10.4f} | {params:<25} "
        print(line)
        
    print(f"{'='*115}\n")

#### Preliminary and Final Model Comparison Function

In [None]:
def prelim_comparison(X, step_name, search_mode, strats, n_neighbors, cat_transformer, rand_state, n_iter, params, scoring, cv, X_train, y_train, groups_train):
    best_cv_score = float('inf')
    best_candidate_info = {}
    best_model = None
    
    for mode in search_mode:
        current_params = params.get(mode)
        if current_params is None:
            raise ValueError(f"No parameters found for {mode} search mode")
        
        for strat in strats:
            model, ringkasan = single_run_exp(
                X, step_name, mode, strat,
                n_neighbors, cat_transformer, rand_state,
                n_iter, current_params, scoring, cv,
                X_train, y_train, groups_train
            )
            
            display_single_result(ringkasan)
            
            current_score = ringkasan['CV Mean RMSE']
            
            if current_score < best_cv_score:
                best_cv_score = current_score
                best_model = model
                best_candidate_info = ringkasan
    
    if best_candidate_info:
        results_history.append(best_candidate_info)
        print(f"\n>>> PRELIMINARY WINNER: {best_candidate_info['Search Mode'].upper()} - {best_candidate_info['Imputasi'].upper()}")
        print(f">>> Best CV Score (Mean Validation): {best_cv_score:.4f}")
    
    return best_model, best_candidate_info

In [None]:
def comparison(d_rmse, d_mae, d_r2, d_std, tol):
    is_candidate = False
    reason = ""
    
    # --- 1. SELEKSI KANDIDAT (MATCH CASE) ---
    match d_rmse:
        # KASUS 1: RMSE Menang (Prioritas Utama)
        case diff if diff > tol:
            is_candidate = True
            reason = "RMSE improved significantly."
            
        # KASUS 2: RMSE Kalah
        case diff if diff < -tol:
            is_candidate = False
            reason = "RMSE worsened."
            
        # KASUS 3: RMSE Seri -> Cek Tie-Breaker
        case _:
            match (d_mae, d_r2, d_std):
                # Tie-Breaker A: MAE
                case (mae, _, _) if mae > tol:
                    is_candidate = True
                    reason = "RMSE similar, but MAE improved."
                # Tie-Breaker B: R¬≤
                case (mae, r2, _) if abs(mae) <= tol and r2 > tol:
                    is_candidate = True
                    reason = "Error metrics similar, but R¬≤ improved."
                # Tie-Breaker C: Stabilitas
                case (mae, r2, std) if abs(mae) <= tol and abs(r2) <= tol and std > 0:
                    is_candidate = True
                    reason = "Performance identical, but Stability improved."
                # Tidak ada yang menang
                case _:
                    is_candidate = False
                    reason = "No significant improvement found."

    # --- 2. SAFETY NET & KEPUTUSAN FINAL ---
    if is_candidate:
        # Cek apakah R2 anjlok melebihi toleransi?
        r2_drop = -d_r2 
        if r2_drop > tol:
            # KENA VETO
            final_decision = False
            final_reason = f"VETOED: {reason} BUT R¬≤ dropped by {r2_drop:.4f} (Exceeds {tol})."
        else:
            # LOLOS
            final_decision = True
            final_reason = reason
    else:
        # MEMANG TIDAK LOLOS DARI AWAL
        final_decision = False
        final_reason = reason
        
    return final_decision, final_reason

In [None]:
def final_model(X, step_name, search_mode, strat, n_neighbors, cat_transformer, rand_state, n_iter, params, scoring, cv, X_train, y_train, groups_train, current_model, current_candidate_info, tol):
    best_model = current_model
    best_candidate_info = current_candidate_info
    
    prev_rmse = best_candidate_info.get('CV Mean RMSE')
    prev_mae = best_candidate_info.get('CV Mean MAE')
    prev_r2 = best_candidate_info.get('CV Mean R¬≤')
    prev_std = best_candidate_info.get('CV Std RMSE')
    
    model, ringkasan = single_run_exp(
        X, step_name, search_mode, strat,
        n_neighbors, cat_transformer, rand_state, n_iter,
        params, scoring, cv,
        X_train, y_train, groups_train
        )
    
    display_single_result(ringkasan)
    
    curr_rmse = ringkasan['CV Mean RMSE']
    curr_mae = ringkasan['CV Mean MAE']
    curr_r2 = ringkasan['CV Mean R¬≤']
    curr_std = ringkasan['CV Std RMSE']
    
    d_rmse = prev_rmse - curr_rmse  
    d_mae = prev_mae - curr_mae     
    d_r2 = curr_r2 - prev_r2        
    d_std = prev_std - curr_std
    
    is_better, reason = comparison(d_rmse, d_mae, d_r2, d_std, tol)

    if is_better:
        print("\nNew model is better")
        print(f"Reason: {reason}")
        
        best_model = model
        best_candidate_info = ringkasan
        results_history.append(ringkasan)
    else:
        print("\nCurrent model is better")
        print(f"Reason: {reason}")
        
    return best_model, best_candidate_info

#### Full Implementation

In [None]:
def full_implementation(X, step_name, search_mode, strats, n_neighbors, cat_transformer, rand_state, params, n_iter, scoring, cv, X_train, y_train, groups_train, current_model, current_candidate_info, tol):
    print(f"{'#'*60}")
    print(f"{step_name.upper()} PHASE")
    print(f"{'#'*60}")
    
    if step_name.lower() == "preliminary":
        best_prelim_model, best_prelim_candidate_info = prelim_comparison(
            X, step_name, search_mode, strats,
            n_neighbors, cat_transformer, rand_state, n_iter,
            params, scoring, cv,
            X_train, y_train, groups_train
        )
        
        display_recap(results_history)
        return best_prelim_model, best_prelim_candidate_info
    else:
        best_model, best_candidate_info = final_model(
            X, step_name, search_mode, strats,
            n_neighbors, cat_transformer, rand_state, n_iter,
            params, scoring, cv,
            X_train, y_train, groups_train,
            current_model, current_candidate_info, tol
        )
        
        display_recap(results_history)
        return best_model, best_candidate_info

#### Model Evaluation Function

In [None]:
def model_eval(model, X_test, y_test):
    y_pred = model.predict(X_test)
    
    te_r2 = r2(y_test, y_pred)
    te_mae = mae(y_test, y_pred)
    te_rmse = rmse(y_test, y_pred)
    
    results = {
        'Test R¬≤': te_r2,
        'Test MAE': te_mae,
        'Test RMSE': te_rmse
    }
    
    return y_pred, results

### Applying Model

#### Preliminary Tuning

In [None]:
step = "Preliminary"
modes = ["random", "grid"]
tolerance = 0.001

current_model = None
current_candidate_info = None

##### RandomSearchCV vs GridSearchCV Comparison

In [None]:
random_param_dist = {
    'model__C': loguniform(1e-2, 1e2),
    'model__gamma': loguniform(1e-2, 1e1),
    'model__epsilon': loguniform(1e-2, 1e0)
}

coarse_param_grid = {
    'model__C': [0.01, 0.1, 1, 10, 100],
    'model__gamma': [0.01, 0.1, 'scale', 1, 10],
    'model__epsilon': [0.01, 0.05, 0.1, 0.2, 1]
}

prelim_params = {
    'random': random_param_dist,
    'grid': coarse_param_grid
}

prelim_best_model, prelim_best_results = full_implementation(
    X_clean,
    step, modes, strats,
    neighbors, categorical_transformer, random_state,
    prelim_params, iteration, scoring_metrics, inner_cv,
    X_train_outer, y_train_outer, groups_train_outer,
    current_model, current_candidate_info, tolerance
)

#### Fine Tuning

In [None]:
mode = results_history[-1]['Search Mode']
strat = results_history[-1]['Imputasi']

In [None]:
step = "Refine"

refine_param_dist = {
    'model__C': loguniform(1e-1, 1e0),
    'model__gamma': loguniform(5e-3, 5e-2),
    'model__epsilon': loguniform(1e-1, 1e0)
}

refine_model, refine_results = full_implementation(
    X_clean,
    step, mode, strat,
    neighbors, categorical_transformer, random_state,
    refine_param_dist, iteration, scoring_metrics, inner_cv,
    X_train_outer, y_train_outer, groups_train_outer,
    prelim_best_model, prelim_best_results, tolerance
)

In [None]:
step2 = "Refine 2nd"

refine_2nd_param_dist = {
    'model__C': loguniform(1e-1, 5e-1),
    'model__gamma': loguniform(1e-3, 1e-2),
    'model__epsilon': loguniform(1e-1, 3e-1)
}

refine_2nd_model, refine_2nd_results = full_implementation(
    X_clean,
    step2, mode, strat,
    neighbors, categorical_transformer, random_state,
    refine_2nd_param_dist, iteration, scoring_metrics, inner_cv,
    X_train_outer, y_train_outer, groups_train_outer,
    refine_model, refine_results, tolerance
)

In [None]:
step3 = "Refine 3rd"

refine_3rd_param_dist = {
    'model__C': loguniform(3e-1, 5e-1),
    'model__gamma': loguniform(8e-4, 2e-3),
    'model__epsilon': loguniform(6e-2, 2e-1)
}

refine_3rd_model, refine_3rd_results = full_implementation(
    X_clean,
    step3, mode, strat,
    neighbors, categorical_transformer, random_state,
    refine_3rd_param_dist, iteration, scoring_metrics, inner_cv,
    X_train_outer, y_train_outer, groups_train_outer,
    refine_2nd_model, refine_2nd_results, tolerance
)

In [None]:
step4 = "Refine 4th"

refine_4th_param_dist = {
    'model__C': loguniform(49e-2, 53e-2),
    'model__gamma': loguniform(1e-3, 16e-4),
    'model__epsilon': loguniform(5e-2, 9e-2)
}

refine_4th_model, refine_4th_results = full_implementation(
    X_clean,
    step4, mode, strat,
    neighbors, categorical_transformer, random_state,
    refine_4th_param_dist, iteration, scoring_metrics, inner_cv,
    X_train_outer, y_train_outer, groups_train_outer,
    refine_3rd_model, refine_3rd_results, tolerance
)

#### Final Model Evaluation

In [None]:
best_model = refine_4th_model

prediction, test_results = model_eval(best_model, X_test_outer, y_test_outer)

print("Model performance on test set:")
for key, value in test_results.items():
    if key.startswith('Test '):
        label = key.replace('Test ', '').ljust(8)
        print(f" > {label} = {value}")

In [None]:
# Set style agar plot terlihat rapi untuk publikasi
sns.set_theme(style="whitegrid")

# 1. Siapkan Data dan Metrik
y_actual = y_test_outer
y_predicted = prediction
r2_score = test_results['Test R¬≤']
rmse_score = test_results['Test RMSE']
mae_score = test_results['Test MAE']

# 2. Buat Canvas
plt.figure(figsize=(9, 8)) # Ukuran agak persegi agar proporsional

# 3. Plot Titik Scatter
# alpha=0.6 membuat titik agak transparan, membantu melihat area yang padat data
plt.scatter(y_actual, y_predicted,
    color='royalblue', alpha=0.6, edgecolors='w', s=60,
    label='Test Data Points'
)

# 4. Plot Garis Referensi Ideal (y=x)
# Tentukan batas min dan max dari seluruh data agar garisnya pas
data_min = min(y_actual.min(), y_predicted.min())
data_max = max(y_actual.max(), y_predicted.max())
# Tambah sedikit padding biar tidak mepet frame
padding = (data_max - data_min) * 0.05
limits = [data_min - padding, data_max + padding]

plt.plot(limits, limits,
    color='crimson', linestyle='--', linewidth=2.5,
    label='Perfect Prediction ($y=x$)'
)

# 5. Labeling dan Judul
plt.xlabel('Experimental Work Function (eV)', fontsize=14, fontweight='bold')
plt.ylabel('Predicted Work Function (eV)', fontsize=14, fontweight='bold')

# Judul Informatif dengan Metrik
title_text = f"Final SVR Model Performance on Unseen Test Set\n"
subtitle_text = f"$R^2$ = {r2_score:.3f} | RMSE = {rmse_score:.3f} eV | MAE = {mae_score:.3f} eV"
plt.title(title_text + subtitle_text, fontsize=16, pad=20)

# 6. Estetika Tambahan
plt.legend(fontsize=12, loc='upper left')
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.xlim(limits)
plt.ylim(limits)
plt.gca().set_aspect('equal', adjustable='box') # Memastikan skala sumbu X dan Y sama

plt.tight_layout()

# Simpan plot sebagai gambar resolusi tinggi (opsional, uncomment jika butuh)
# plt.savefig('final_model_performance.png', dpi=300, bbox_inches='tight')

plt.show()

### Model's Interpretation

#### SHAP

##### Initialization

In [None]:
# Inisialisasi JS untuk plot SHAP di notebook
shap.initjs()

# --- PENTING! ---
# Ambil model terbaik dari 3rd_fine_tune (PEMENANG KITA)
best_shap_model = refine_4th_model.best_estimator_

# Pisahkan preprocessor dan model SVR-nya
preprocessor = best_shap_model.named_steps['preprocessor']
svr_model = best_shap_model.named_steps['model']

print("SHAP, model pemenang, dan preprocessor siap.")

In [None]:
print("Memproses data train dan test...")
# Proses data mentah pakai preprocessor
X_train_processed = preprocessor.transform(X_train_outer)
X_test_processed = preprocessor.transform(X_test_outer)

# Ambil nama fitur SETELAH di-OHE (ini krusial untuk plot)
# Nama fiturnya akan jadi seperti: 'num__atomic_radius', 'cat__miller_index_(0, 0, 1)', dll.
feature_names = preprocessor.get_feature_names_out()

clean_feature_names = [
    f.replace('num__', '').replace('cat__structure_type_', '')
    for f in feature_names
]

print(f"Data diproses. Jumlah total fitur: {len(feature_names)}")

# --- PENTING: Buat DataFrame untuk plot ---
# Ubah data test yang sudah diproses jadi DataFrame
# Ini agar plot SHAP punya nama kolom yang benar
X_test_processed_df = pd.DataFrame(X_test_processed, columns=clean_feature_names)

In [None]:
print("Membuat 2 jenis ringkasan background data (K-Means)...")

background_data1 = shap.kmeans(X_train_processed, 50)
background_data2 = shap.kmeans(X_train_processed, 100)

print("2 jenis ringkasan background data siap.")

##### SHAP values dan 3 jenis plot dari explainer1

In [None]:
print("Membuat KernelExplainer bernama explainer1...")
explainer1 = shap.KernelExplainer(svr_model.predict, background_data1)

print("Mulai menghitung SHAP values menggunakan explainer1 untuk data test...")
shap_values1 = explainer1.shap_values(X_test_processed)

In [None]:
# --- PLOT 1: Global Feature Importance (Bar Plot) ---
# Menunjukkan rata-rata dampak absolut. Fitur apa yang PALING PENTING?
print("--- Global Feature Importance dari explainer1 (Bar) ---")
shap.summary_plot(shap_values1, X_test_processed_df, plot_type="bar", show=False)

ax = plt.gca()

# 3. Loop untuk setiap batang (bar) di grafik
for p in ax.patches:
    # Ambil lebar batang (ini adalah nilai rata-rata SHAP-nya)
    width = p.get_width()
    
    # Tentukan posisi X dan Y untuk teks
    # X sedikit digeser ke kanan dari ujung batang (width + offset kecil)
    # Y tepat di tengah tinggi batang
    x_pos = width + (width * 0.01) # Geser 1% ke kanan biar gak nempel
    y_pos = p.get_y() + p.get_height() / 2
    
    # Tambahkan teks angka
    # {:.4f} artinya ambil 4 angka di belakang koma
    ax.text(x_pos, y_pos, f'{width:.3f}', 
            va='center', ha='left', fontsize=10, color='black')

# 4. (Opsional) Perlebar batas kanan agar angka tidak terpotong
xmin, xmax = ax.get_xlim()
ax.set_xlim(xmin, xmax * 1.1) # Tambah 10% ruang di kanan

# 5. Tampilkan hasil akhirnya
plt.show()

In [None]:
# --- PLOT 2: Beeswarm Summary Plot ---
# Menunjukkan SETIAP data poin.
# - Sumbu X = Nilai SHAP (dampak ke prediksi)
# - Warna = Nilai fitur (Merah=Tinggi, Biru=Rendah)
print("\n--- SHAP Summary Plot dari explainer1 (Beeswarm) ---")
shap.summary_plot(shap_values1, X_test_processed_df)

In [None]:
# --- PLOT 3: Waterfall Plot (VERSI BARU YANG SUDAH DIPERBAIKI) ---

# Kamu bisa ganti `idx_to_explain` ke angka lain (misal: 5, 10, 20)
# untuk melihat data poin lain di test set.
idx_to_explain = 0

print(f"--- Menampilkan Waterfall Plot untuk data poin ke-{idx_to_explain} dari explainer1 ---")

# --- PERBAIKANNYA DI SINI ---
# 1. Buat 'Explanation' object secara manual untuk SATU data poin
exp_one_sample = shap.Explanation(
    values=shap_values1[idx_to_explain],
    base_values=explainer1.expected_value,
    data=X_test_processed_df.iloc[idx_to_explain].values, # Ambil nilainya
    feature_names=X_test_processed_df.columns.tolist() # Ambil nama fiturnya
)

# 2. Sekarang, panggil waterfall_plot HANYA dengan SATU object itu
shap.waterfall_plot(exp_one_sample)

##### SHAP values dan 4 jenis plot dari explainer2

In [None]:
print("Membuat KernelExplainer bernama explainer2...")
explainer2 = shap.KernelExplainer(svr_model.predict, background_data2)

print("Mulai menghitung SHAP values menggunakan explainer2 untuk data test...")
shap_values2 = explainer2.shap_values(X_test_processed)

In [None]:
# --- PLOT 1: Global Feature Importance (Bar Plot) ---
# Menunjukkan rata-rata dampak absolut. Fitur apa yang PALING PENTING?
print("--- Global Feature Importance dari explainer2 (Bar) ---")
shap.summary_plot(shap_values2, X_test_processed_df, plot_type="bar")

In [None]:
# --- PLOT 2: Beeswarm Summary Plot ---
# Menunjukkan SETIAP data poin.
# - Sumbu X = Nilai SHAP (dampak ke prediksi)
# - Warna = Nilai fitur (Merah=Tinggi, Biru=Rendah)
print("\n--- SHAP Summary Plot dari explainer2 (Beeswarm) ---")
shap.summary_plot(shap_values2, X_test_processed_df)

In [None]:
# --- PLOT 3: Waterfall Plot (VERSI BARU YANG SUDAH DIPERBAIKI) ---

# Kamu bisa ganti `idx_to_explain` ke angka lain (misal: 5, 10, 20)
# untuk melihat data poin lain di test set.
idx_to_explain = 0

print(f"--- Menampilkan Waterfall Plot untuk data poin ke-{idx_to_explain} dari explainer2 ---")

# --- PERBAIKANNYA DI SINI ---
# 1. Buat 'Explanation' object secara manual untuk SATU data poin
exp_one_sample = shap.Explanation(
    values=shap_values2[idx_to_explain],
    base_values=explainer2.expected_value,
    data=X_test_processed_df.iloc[idx_to_explain].values, # Ambil nilainya
    feature_names=X_test_processed_df.columns.tolist() # Ambil nama fiturnya
)

# 2. Sekarang, panggil waterfall_plot HANYA dengan SATU object itu
shap.waterfall_plot(exp_one_sample)

##### SHAP values dan 4 jenis plot dari explainer3

In [None]:
print("Membuat KernelExplainer bernama explainer3...")
explainer3 = shap.KernelExplainer(svr_model.predict, X_train_processed)

print("Mulai menghitung SHAP values menggunakan explainer3 untuk data test...")
shap_values3 = explainer3.shap_values(X_test_processed)

In [None]:
# --- PLOT 1: Global Feature Importance (Bar Plot) ---
# Menunjukkan rata-rata dampak absolut. Fitur apa yang PALING PENTING?
print("--- Global Feature Importance dari explainer3 (Bar) ---")
shap.summary_plot(shap_values3, X_test_processed_df, plot_type="bar")

In [None]:
# --- PLOT 2: Beeswarm Summary Plot ---
# Menunjukkan SETIAP data poin.
# - Sumbu X = Nilai SHAP (dampak ke prediksi)
# - Warna = Nilai fitur (Merah=Tinggi, Biru=Rendah)
print("\n--- SHAP Summary Plot dari explainer3 (Beeswarm) ---")
shap.summary_plot(shap_values3, X_test_processed_df)

In [None]:
# --- PLOT 3: Waterfall Plot (VERSI BARU YANG SUDAH DIPERBAIKI) ---

# Kamu bisa ganti `idx_to_explain` ke angka lain (misal: 5, 10, 20)
# untuk melihat data poin lain di test set.
idx_to_explain = 0

print(f"--- Menampilkan Waterfall Plot untuk data poin ke-{idx_to_explain} dari explainer3 ---")

# --- PERBAIKANNYA DI SINI ---
# 1. Buat 'Explanation' object secara manual untuk SATU data poin
exp_one_sample = shap.Explanation(
    values=shap_values3[idx_to_explain],
    base_values=explainer3.expected_value,
    data=X_test_processed_df.iloc[idx_to_explain].values, # Ambil nilainya
    feature_names=X_test_processed_df.columns.tolist() # Ambil nama fiturnya
)

# 2. Sekarang, panggil waterfall_plot HANYA dengan SATU object itu
shap.waterfall_plot(exp_one_sample)

#### ALE

In [None]:
best_pipeline = refine_4th_model.best_estimator_

##### ALE plot from explainer1 SHAP values

In [None]:
warnings.filterwarnings('ignore')

print("Generating ALE Plots (Emergency Low-Res Mode)...")
print("-" * 60)

# 1. Pastikan Ranking Dataframe siap
mean_shap_values = np.abs(shap_values1).mean(axis=0)
importance_df = pd.DataFrame({
    'feature': clean_feature_names,
    'importance': mean_shap_values
})
top_11_df = importance_df.sort_values(by='importance', ascending=False).head(11)
top_11_features = top_11_df['feature'].tolist()

# 2. Loop Semua Fitur
for i, feature_name in enumerate(top_11_features, 1):
    
    if feature_name not in X_train_outer.columns:
        continue # Skip

    print(f"[{i}/11] Processing: {feature_name}...", end=" ")
    
    imp_score = top_11_df.iloc[i-1]['importance']
    
    plt.figure(figsize=(8, 5))
    success = False
    status_msg = ""
    
    # --- LEVEL 1: STANDARD ---
    try:
        ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                      grid_size=50, include_CI=True)
        success = True
        status_msg = "‚úÖ Standard"

    except Exception:
        # --- LEVEL 2: LOW RES (Grid=10, No CI) ---
        # Biasanya Modulus butuh ini. Grid diturunkan drastis biar bins gak tabrakan.
        try:
            ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                          grid_size=10, include_CI=False) # Matikan CI
            success = True
            status_msg = "‚ö†Ô∏è Low-Res (Grid=10)"
            
        except Exception:
            # --- LEVEL 3: ULTRA LOW RES + JITTER (Grid=5) ---
            # Opsi Nuklir: Cuma minta 5 titik, plus data digoyang noise.
            try:
                X_temp = X_train_outer.copy()
                std_val = X_temp[feature_name].std()
                if std_val == 0: std_val = 1
                
                # Noise agak kasar (5%)
                rng = np.random.RandomState(42)
                X_temp[feature_name] += rng.normal(0, std_val * 0.05, size=len(X_temp))
                
                ale_eff = ale(X=X_temp, model=best_pipeline, feature=[feature_name], 
                              grid_size=5, include_CI=False)
                success = True
                status_msg = "üö® Ultra-Low (Grid=5)"
            except Exception as e_final:
                print(f"‚ùå GAGAL. Error: {e_final}")
                plt.close()
                continue

    if success:
        plt.title(f"#{i} ALE: {feature_name}\n(SHAP: {imp_score:.4f})", fontsize=12)
        plt.xlabel(f"{feature_name}", fontsize=10)
        plt.ylabel("Effect (eV)", fontsize=10)
        plt.grid(True, alpha=0.3)
        print(status_msg)
        plt.show()

print("-" * 60)
print("Selesai.")

##### ALE plot from explainer2 SHAP values

In [None]:
warnings.filterwarnings('ignore')

print("Generating ALE Plots (Emergency Low-Res Mode)...")
print("-" * 60)

# 1. Pastikan Ranking Dataframe siap
mean_shap_values = np.abs(shap_values2).mean(axis=0)
importance_df = pd.DataFrame({
    'feature': clean_feature_names,
    'importance': mean_shap_values
})
top_11_df = importance_df.sort_values(by='importance', ascending=False).head(11)
top_11_features = top_11_df['feature'].tolist()

# 2. Loop Semua Fitur
for i, feature_name in enumerate(top_11_features, 1):
    
    if feature_name not in X_train_outer.columns:
        continue # Skip

    print(f"[{i}/11] Processing: {feature_name}...", end=" ")
    
    imp_score = top_11_df.iloc[i-1]['importance']
    
    plt.figure(figsize=(8, 5))
    success = False
    status_msg = ""
    
    # --- LEVEL 1: STANDARD ---
    try:
        ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                      grid_size=50, include_CI=True)
        success = True
        status_msg = "‚úÖ Standard"

    except Exception:
        # --- LEVEL 2: LOW RES (Grid=10, No CI) ---
        # Biasanya Modulus butuh ini. Grid diturunkan drastis biar bins gak tabrakan.
        try:
            ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                          grid_size=10, include_CI=False) # Matikan CI
            success = True
            status_msg = "‚ö†Ô∏è Low-Res (Grid=10)"
            
        except Exception:
            # --- LEVEL 3: ULTRA LOW RES + JITTER (Grid=5) ---
            # Opsi Nuklir: Cuma minta 5 titik, plus data digoyang noise.
            try:
                X_temp = X_train_outer.copy()
                std_val = X_temp[feature_name].std()
                if std_val == 0: std_val = 1
                
                # Noise agak kasar (5%)
                rng = np.random.RandomState(42)
                X_temp[feature_name] += rng.normal(0, std_val * 0.05, size=len(X_temp))
                
                ale_eff = ale(X=X_temp, model=best_pipeline, feature=[feature_name], 
                              grid_size=5, include_CI=False)
                success = True
                status_msg = "üö® Ultra-Low (Grid=5)"
            except Exception as e_final:
                print(f"‚ùå GAGAL. Error: {e_final}")
                plt.close()
                continue

    if success:
        plt.title(f"#{i} ALE: {feature_name}\n(SHAP: {imp_score:.4f})", fontsize=12)
        plt.xlabel(f"{feature_name}", fontsize=10)
        plt.ylabel("Effect (eV)", fontsize=10)
        plt.grid(True, alpha=0.3)
        print(status_msg)
        plt.show()

print("-" * 60)
print("Selesai.")

##### ALE plot from explainer3 SHAP values

In [None]:
warnings.filterwarnings('ignore')

print("Generating ALE Plots (Emergency Low-Res Mode)...")
print("-" * 60)

# 1. Pastikan Ranking Dataframe siap
mean_shap_values = np.abs(shap_values3).mean(axis=0)
importance_df = pd.DataFrame({
    'feature': clean_feature_names,
    'importance': mean_shap_values
})
top_11_df = importance_df.sort_values(by='importance', ascending=False).head(11)
top_11_features = top_11_df['feature'].tolist()

# 2. Loop Semua Fitur
for i, feature_name in enumerate(top_11_features, 1):
    
    if feature_name not in X_train_outer.columns:
        continue # Skip

    print(f"[{i}/11] Processing: {feature_name}...", end=" ")
    
    imp_score = top_11_df.iloc[i-1]['importance']
    
    plt.figure(figsize=(8, 5))
    success = False
    status_msg = ""
    
    # --- LEVEL 1: STANDARD ---
    try:
        ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                      grid_size=50, include_CI=True)
        success = True
        status_msg = "‚úÖ Standard"

    except Exception:
        # --- LEVEL 2: LOW RES (Grid=10, No CI) ---
        # Biasanya Modulus butuh ini. Grid diturunkan drastis biar bins gak tabrakan.
        try:
            ale_eff = ale(X=X_train_outer, model=best_pipeline, feature=[feature_name], 
                          grid_size=10, include_CI=False) # Matikan CI
            success = True
            status_msg = "‚ö†Ô∏è Low-Res (Grid=10)"
            
        except Exception:
            # --- LEVEL 3: ULTRA LOW RES + JITTER (Grid=5) ---
            # Opsi Nuklir: Cuma minta 5 titik, plus data digoyang noise.
            try:
                X_temp = X_train_outer.copy()
                std_val = X_temp[feature_name].std()
                if std_val == 0: std_val = 1
                
                # Noise agak kasar (5%)
                rng = np.random.RandomState(42)
                X_temp[feature_name] += rng.normal(0, std_val * 0.05, size=len(X_temp))
                
                ale_eff = ale(X=X_temp, model=best_pipeline, feature=[feature_name], 
                              grid_size=5, include_CI=False)
                success = True
                status_msg = "üö® Ultra-Low (Grid=5)"
            except Exception as e_final:
                print(f"‚ùå GAGAL. Error: {e_final}")
                plt.close()
                continue

    if success:
        plt.title(f"#{i} ALE: {feature_name}\n(SHAP: {imp_score:.4f})", fontsize=12)
        plt.xlabel(f"{feature_name}", fontsize=10)
        plt.ylabel("Effect (eV)", fontsize=10)
        plt.grid(True, alpha=0.3)
        print(status_msg)
        plt.show()

print("-" * 60)
print("Selesai.")