### Load data

In [None]:
import numpy as np 
import random
import os
import pandas as pd 
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestRegressor, AdaBoostRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.naive_bayes import GaussianNB
from sklearn import svm 
from sklearn.svm import SVR
from sklearn.linear_model import LassoCV,Lasso
from sklearn.neural_network import MLPClassifier, MLPRegressor
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split, cross_val_score, KFold, StratifiedKFold, StratifiedShuffleSplit
from sklearn.neighbors import KNeighborsRegressor 
import optuna
from xgboost import XGBRegressor
import joblib
from scipy.stats import pearsonr
import pickle
import smogn
from smogn import smoter

#### Note
Add the predicted ethylene or ethane adsorption values to label.csv with the column name 'Predicted

In [44]:
path = "file_path" # path to the directory where the data files are stored
filename = ['global.csv','metal_properties.csv','fr_descriptors.csv','label.csv']

load_data = []
for i in filename:
    load_data.append(pd.read_csv(filepath_or_buffer=os.path.join(path,i)))

load_data_content = {'Global':0 ,'metal':1, "Linker_fr":2}

In [45]:
def select_data(data, select_data_content, select_descriptor):
    """
    Selects and merges specified descriptor data with base physical properties.
    
    Args:
        data: Input dataset where the last element contains labeled data
        select_data_content: Dictionary mapping descriptor names to their indices in data
        select_descriptor: List of descriptor names to include
        
    Returns:
        output_X: Combined feature matrix (excluding MOF identifiers)
    """
    
    # (1) Prepare training dataset
    data_labels = data[-1]  # Assuming last item in data contains labeled samples
    # Select base physical properties columns
    data_physics = data_labels.iloc[:, :]  # Assuming these columns contain physical properties (including 'mof_name')
    # Initialize dataset with base physical properties
    data_all = data_physics.copy()
    
    # Merge selected descriptor data
    for descriptor in select_descriptor:
        idx = select_data_content[descriptor]
        # Merge descriptor data using 'mof' as primary key
        data_all = pd.merge(data_all, data[idx], on='mof', how='left')
    
    # Remove 'mof' column after merging (assuming 'mof_name' is the merge column)
    data_x = data_all.drop(['mof'], axis=1).astype(float)
    output_X = data_x.copy()    
    return output_X

### select_descriptor

In [46]:

select_descriptor = ['Global', 'metal', 'Linker_fr']
data_X = select_data(data = load_data,                             
                             select_data_content = load_data_content, 
                             select_descriptor=select_descriptor)

In [48]:
features = ['temperature', 'gastype', 'Predicted', 'LCD', 'PLD', 'desity(g/cm^3)',
       'VSA(m^2/cm^3)', 'void_fraction', 'atomic_radius', 'electronegativity',
       'electron_affinity', 'ionization_energy', 'atomic_weight', 'oxistates',
       'fr_Al_COO', 'fr_Al_OH', 'fr_Ar_COO', 'fr_Ar_N', 'fr_Ar_NH',
       'fr_C_O_noCOO', 'fr_NH0', 'fr_alkyl_halide', 'fr_aniline',
       'fr_aryl_methyl', 'fr_azo', 'fr_benzene', 'fr_bicyclic', 'fr_furan',
       'fr_nitrile', 'fr_pyridine']
df_cleaned = data_X[features]

### prediction

In [51]:
model = joblib.load("/Model/Qst.pkl")
model_scaler = joblib.load("Model/Qst_scaler.pkl")

In [None]:
X = model_scaler.transform(df_cleaned)
y = model.predict(X)
print(y) 