In [1]:
import numpy as np
import joblib
from rdkit import Chem
import pandas as pd
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [2]:
df = pd.read_csv('ExpriementalData.csv')

In [3]:
# Function to compute RDKit fingerprints
def compute_rdkit_fingerprint(smi):
    mol = Chem.MolFromSmiles(smi)
    if mol is not None:
        return Chem.RDKFingerprint(mol)
    else:
        return None

In [4]:
# Load the saved stacking regressor model
model_path_rdkit = 'rdkit_stacking_regressor.joblib'
stacking_regressor_rdkit = joblib.load(model_path_rdkit)

In [5]:
rdkit_fps = []

smi_array = df["SMILE"]
for smi in smi_array:
    mol = Chem.MolFromSmiles(smi)
    if mol is not None:
        rdkit_fps.append(compute_rdkit_fingerprint(smi))

# Convert fingerprints to numpy array
rdkit_fps = np.array(rdkit_fps)

In [6]:
# Prediction
y_pred_rdkit = stacking_regressor_rdkit.predict(rdkit_fps)
y_pred = []
for pred in y_pred_rdkit:
    y_pred.append(pred)

Potential solutions:
- Use a data structure that matches the device ordinal in the booster.
- Set the device for booster before call to inplace_predict.




In [7]:
y_true = df["y_true"]

In [8]:
# Calculate RMSE, MAE, and R² for the low error predictions
# Calculate RMSE
rmse = mean_squared_error(y_true, y_pred, squared=False)

# Calculate MAE
mae = mean_absolute_error(y_true, y_pred)

# Calculate R²
r2 = r2_score(y_true, y_pred)

# Output results
print("RMSE:", rmse)
print("MAE:", mae)
print("R²:", r2)

RMSE: 0.07295807394372406
MAE: 0.01993150781412951
R²: 0.7597929913245293


In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib.patches as mpatches

# Set figure size and resolution
plt.figure(figsize=(8, 6), dpi=300)

# Plot
sns.regplot(x=y_pred, y=y_true, 
            scatter_kws={"color": "blue", "alpha": 0.5}, 
            line_kws={"lw": 2, 'ls': '--', 'color': 'red', "alpha": 0.7}, 
            ci=None)

# Labels and title
plt.xlabel('Predicted Band Gap', color='black', fontsize=12)
plt.ylabel('Expriemental Band Gap', color='black', fontsize=12)
plt.title("Comparison of Experimental and Predicted Band Gap", color='black', fontsize=14)

# Grid
plt.grid(alpha=0.2)

# Metrics legend
R2 = mpatches.Patch(label=f"R² = {r2:.2f}")
MAE = mpatches.Patch(label=f"MAE = {mae:.2f}")
# RMSE = mpatches.Patch(label=f"RMSE = {rmse_maccs_test:.2f}")  # Added RMSE
plt.legend(handles=[R2, MAE])

# Save high-resolution image
plt.savefig("expirementalolot.png", dpi=1200, bbox_inches='tight')

# Show plot
plt.show()