In [59]:
import pandas as pd
import numpy as np
from sklearn.multioutput import MultiOutputRegressor
from sklearn.neighbors import NearestNeighbors
from xgboost import XGBRegressor
import joblib

In [60]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [61]:
import pandas as pd

file_path = "/content/drive/MyDrive/3d material design/FINAL ML PROPERTIES PREDICTION.xlsx"

df = pd.read_excel(file_path)

df.head()


Unnamed: 0,Sample,PLA%,PETG%,UPVC%,MA%,Tensile (MPa),Modulus (GPa),Elongation (%),Impact (kJ/m²)
0,1,81,3,16,0.0,61.91,3.3,10.46,2.52
1,2,94,4,2,0.0,62.74,3.29,7.08,2.5
2,3,77,0,23,2.5,67.47,3.45,13.64,3.49
3,4,35,0,65,2.5,62.31,3.13,24.62,4.92
4,5,43,6,51,0.0,56.31,3.12,26.07,3.39


In [62]:
df.info()
df.columns

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Sample          250 non-null    int64  
 1   PLA%            250 non-null    int64  
 2   PETG%           250 non-null    int64  
 3   UPVC%           250 non-null    int64  
 4   MA%             250 non-null    float64
 5   Tensile (MPa)   250 non-null    float64
 6   Modulus (GPa)   250 non-null    float64
 7   Elongation (%)  250 non-null    float64
 8   Impact (kJ/m²)  250 non-null    float64
dtypes: float64(5), int64(4)
memory usage: 17.7 KB


Index(['Sample', 'PLA%', 'PETG%', 'UPVC%', 'MA%', 'Tensile (MPa)',
       'Modulus (GPa)', 'Elongation (%)', 'Impact (kJ/m²)'],
      dtype='object')

In [63]:
X = df[["PLA%", "PETG%", "UPVC%", "MA%"]]
Y = df[["Tensile (MPa)", "Modulus (GPa)", "Elongation (%)", "Impact (kJ/m²)"]]

In [64]:
X_feat = X.copy()
X_feat["PLA_PETG"] = X_feat["PLA%"] * X_feat["PETG%"]
X_feat["PLA_UPVC"] = X_feat["PLA%"] * X_feat["UPVC%"]
X_feat["PETG_UPVC"] = X_feat["PETG%"] * X_feat["UPVC%"]
X_feat["MA_present"] = (X_feat["MA%"] > 0).astype(int)

In [65]:
xgb = MultiOutputRegressor(
    XGBRegressor(
        n_estimators=300,
        max_depth=5,
        learning_rate=0.05,
        subsample=0.8,
        colsample_bytree=0.8,
        random_state=42
    )
)

xgb.fit(X_feat, Y)
joblib.dump(xgb, "global_xgb.pkl")

print("Global XGBoost model trained")

Global XGBoost model trained


In [66]:
knn = NearestNeighbors(n_neighbors=15, metric="euclidean")
knn.fit(Y)

In [67]:
MAX_TRAINED_MAH = 3.5                 # safety cap (training support)
ERROR_WEIGHTS = np.array([1/70, 1/4, 1/150, 1/12])  # normalized error
LOCAL_PERTURB = [-5, -2.5, 0, 2.5, 5] # local PETG search

In [68]:
def hybrid_inverse_design(
    target_properties,
    mah_fixed=5.0,
    pla_min=60,
    pla_max=90,
    top_k=5,
    error_tolerance=0.30
):
    # ---- Step 1: kNN search in property space
    distances, indices = knn.kneighbors([target_properties])
    local_df = df.iloc[indices[0]].copy()

    # ---- Step 2: Enforce PLA constraint
    local_df = local_df[
        (local_df["PLA%"] >= pla_min) &
        (local_df["PLA%"] <= pla_max)
    ]

    if local_df.empty:
        return None, None, "No feasible compositions under PLA constraints"

    # ---- Step 3: Local composition optimization
    candidates = []

    for _, row in local_df.iterrows():
        pla = row["PLA%"]

        for delta in LOCAL_PERTURB:
            petg = row["PETG%"] + delta
            upvc = 100 - pla - petg

            if petg < 0 or upvc < 0:
                continue

            candidates.append([pla, petg, upvc, mah_fixed])

    candidates = np.unique(np.array(candidates), axis=0)

    if len(candidates) == 0:
        return None, None, "No valid candidates after optimization"

    # ---- Step 4: Feature engineering (INFERENCE — must match training)
    cand_df = pd.DataFrame(
        candidates,
        columns=["PLA%", "PETG%", "UPVC%", "MA%"]
    )

    # Safety: cap MAH but keep SAME feature name
    cand_df["MA%"] = np.minimum(cand_df["MA%"], MAX_TRAINED_MAH)

    cand_df["PLA_PETG"] = cand_df["PLA%"] * cand_df["PETG%"]
    cand_df["PLA_UPVC"] = cand_df["PLA%"] * cand_df["UPVC%"]
    cand_df["PETG_UPVC"] = cand_df["PETG%"] * cand_df["UPVC%"]
    cand_df["MA_present"] = (cand_df["MA%"] > 0).astype(int)

    model_features = [
        "PLA%", "PETG%", "UPVC%", "MA%",
        "PLA_PETG", "PLA_UPVC", "PETG_UPVC", "MA_present"
    ]

    # ---- Step 5: Predict properties
    preds = xgb.predict(cand_df[model_features])

    # ---- Step 6: Normalized error scoring
    diffs = preds - np.array(target_properties)
    errors = np.sum(ERROR_WEIGHTS * (diffs ** 2), axis=1)

    if errors.min() > error_tolerance:
        return None, None, "Target not achievable within dataset support"

    # ---- Step 7: Return Top-K
    top_idx = np.argsort(errors)[:top_k]

    return candidates[top_idx], preds[top_idx], errors[top_idx]

In [69]:
target = [60.0, 3.2, 20.0, 4.0]

comps, props, errs = hybrid_inverse_design(
    target_properties=target,
    mah_fixed=5.0
)



In [70]:
print("\n========================================")
print("TOP-5 OPTIMAL COMPOSITIONS (Hybrid kNN + XGB)")
print("========================================")

if comps is None:
    print(errs)
else:
    for i in range(len(comps)):
        print(f"\nCandidate #{i+1}")
        print(f"PLA  : {comps[i][0]:.2f} %")
        print(f"PETG : {comps[i][1]:.2f} %")
        print(f"UPVC : {comps[i][2]:.2f} %")
        print(f"MAH  : {comps[i][3]:.2f} % (Conceptual)")
        print("Predicted Properties:")
        print(f"  Tensile    : {props[i][0]:.2f} MPa")
        print(f"  Modulus    : {props[i][1]:.2f} GPa")
        print(f"  Elongation : {props[i][2]:.2f} %")
        print(f"  Impact     : {props[i][3]:.2f} kJ/m²")
        print(f"  Error Score: {errs[i]:.4f}")



TOP-5 OPTIMAL COMPOSITIONS (Hybrid kNN + XGB)

Candidate #1
PLA  : 70.00 %
PETG : 2.00 %
UPVC : 28.00 %
MAH  : 5.00 % (Conceptual)
Predicted Properties:
  Tensile    : 63.49 MPa
  Modulus    : 3.35 GPa
  Elongation : 19.49 %
  Impact     : 3.61 kJ/m²
  Error Score: 0.1938

Candidate #2
PLA  : 69.00 %
PETG : 3.00 %
UPVC : 28.00 %
MAH  : 5.00 % (Conceptual)
Predicted Properties:
  Tensile    : 63.72 MPa
  Modulus    : 3.34 GPa
  Elongation : 20.30 %
  Impact     : 4.23 kJ/m²
  Error Score: 0.2073

Candidate #3
PLA  : 71.00 %
PETG : 2.00 %
UPVC : 27.00 %
MAH  : 5.00 % (Conceptual)
Predicted Properties:
  Tensile    : 64.81 MPa
  Modulus    : 3.35 GPa
  Elongation : 19.51 %
  Impact     : 3.78 kJ/m²
  Error Score: 0.3423

Candidate #4
PLA  : 69.00 %
PETG : 0.50 %
UPVC : 30.50 %
MAH  : 5.00 % (Conceptual)
Predicted Properties:
  Tensile    : 64.80 MPa
  Modulus    : 3.41 GPa
  Elongation : 15.51 %
  Impact     : 3.68 kJ/m²
  Error Score: 0.4836

Candidate #5
PLA  : 69.00 %
PETG : 5.50 %
UP