# Machine Learning Training


In [None]:
import os
import pandas as pd
import numpy as np
import optuna
import warnings
import joblib
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from sklearn.ensemble import (
    RandomForestRegressor,
    GradientBoostingRegressor,
    ExtraTreesRegressor,
    AdaBoostRegressor
)
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import Ridge, LinearRegression

# Configure environment
warnings.filterwarnings("ignore")
optuna.logging.set_verbosity(optuna.logging.WARNING)
os.environ["CUDA_VISIBLE_DEVICES"] = "0,1"

# Set random seed
RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

# Load dataset
file_path = os.path.join(os.getcwd(), "multiwafer_database_all_feature.xlsx")
data = pd.read_excel(file_path)

# Define input and output features
all_features = [
    'P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'Tester', 'P03 Recipe', 'P06 Recipe',
    'Paste 1', 'Paste 2', 'Dark Area %', 'Defect Area %', 'Grain Defect Area %',
    'Average Life Time', 'Sigma Life Time', 'Resistivity', 'Bow', 'Sawmark',
    'Avg THK', 'TTV', 'WARP', 'Wafer Area', 'Vendor name'
]
X = data[all_features]
X.columns = X.columns.str.replace(" ", "_")
target_features = ["Efficiency"]

# Create folder for Optuna logs
os.makedirs("OptimizationLogs", exist_ok=True)

# Model definition and search spaces
model_dict = {
    "XGB": (XGBRegressor, {
        "n_estimators": (50, 500),
        "max_depth": (5, 30),
        "learning_rate": (0.001, 0.3),
        "colsample_bytree": (0.3, 1.0),
        "subsample": (0.5, 1.0)
    }),
    "RF": (RandomForestRegressor, {
        "n_estimators": (50, 500),
        "max_depth": (5, 30),
        "min_samples_split": (2, 20),
        "min_samples_leaf": (1, 10)
    }),
    "GBR": (GradientBoostingRegressor, {
        "n_estimators": (50, 500),
        "max_depth": (5, 30),
        "learning_rate": (0.001, 0.3),
        "subsample": (0.5, 1.0)
    }),
    "LGB": (LGBMRegressor, {
        "n_estimators": (50, 500),
        "max_depth": (5, 30),
        "learning_rate": (0.001, 0.3),
        "subsample": (0.5, 1.0),
        "colsample_bytree": (0.5, 1.0)
    }),
    "ADA": (AdaBoostRegressor, {
        "n_estimators": (50, 500),
        "learning_rate": (0.01, 1.0)
    }),
    "DT": (DecisionTreeRegressor, {
        "max_depth": (5, 30),
        "min_samples_split": (2, 20),
        "min_samples_leaf": (1, 10)
    }),
    "ET": (ExtraTreesRegressor, {
        "n_estimators": (50, 500),
        "max_depth": (5, 30),
        "min_samples_split": (2, 20),
        "min_samples_leaf": (1, 10)
    }),
    "Ridge": (Ridge, {
        "alpha": (1e-4, 10.0)
    }),
}

results = []

# Loop over each output feature
for target in target_features:
    y = data[target]
    X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=RANDOM_STATE)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=RANDOM_STATE)

    for model_name, (model_class, param_dict) in model_dict.items():

        def objective(trial):
            params = {}
            for key, value in param_dict.items():
                if isinstance(value[0], int):
                    params[key] = trial.suggest_int(key, value[0], value[1])
                else:
                    params[key] = trial.suggest_float(key, value[0], value[1], log=True)
            params["random_state"] = RANDOM_STATE
            if "n_jobs" in model_class().get_params():
                params["n_jobs"] = -1
            if model_name == "XGB":
                params["tree_method"] = "hist"
                params["device"] = "cuda"
            if model_name == "LGB":
                params["verbose"] = -1
            model = model_class(**params)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_val)
            return mean_absolute_percentage_error(y_val, y_pred) * 100

        study = optuna.create_study(direction="minimize")
        study.optimize(objective, n_trials=50)

        # Log trial history
        trial_data = [{**trial.params, "MAPE": trial.value} for trial in study.trials]
        df_trials = pd.DataFrame(trial_data)
        df_trials.to_excel(f"OptimizationLogs/OptimizationLog_{target}_{model_name}.xlsx", index=False)

        # Final training using train + val set
        X_final_train = pd.concat([X_train, X_val])
        y_final_train = pd.concat([y_train, y_val])

        best_params = study.best_params
        best_params["random_state"] = RANDOM_STATE
        if "n_jobs" in model_class().get_params():
            best_params["n_jobs"] = -1
        if model_name == "XGB":
            best_params["tree_method"] = "hist"
            best_params["device"] = "cuda"
        if model_name == "LGB":
            best_params["verbose"] = -1

        final_model = model_class(**best_params)
        final_model.fit(X_final_train, y_final_train)
        y_pred = final_model.predict(X_test)

        joblib.dump(final_model, f"({target})best_model_{model_name}.pkl")

        results.append({
            "Output": target,
            "Model": model_name,
            "R2": r2_score(y_test, y_pred),
            "MAPE": mean_absolute_percentage_error(y_test, y_pred) * 100,
            "RMSE": np.sqrt(mean_squared_error(y_test, y_pred))
        })

    # Linear regression baseline
    lr_model = LinearRegression()
    lr_model.fit(pd.concat([X_train, X_val]), pd.concat([y_train, y_val]))
    y_pred_lr = lr_model.predict(X_test)
    joblib.dump(lr_model, f"({target})best_model_LR.pkl")
    results.append({
        "Output": target,
        "Model": "LR",
        "R2": r2_score(y_test, y_pred_lr),
        "MAPE": mean_absolute_percentage_error(y_test, y_pred_lr) * 100,
        "RMSE": np.sqrt(mean_squared_error(y_test, y_pred_lr))
    })

# Save final model performance results
results_df = pd.DataFrame(results)
results_df.to_excel("All_Models_result.xlsx", index=False)

# Deep Learning Training


In [None]:
import os
import random
import pandas as pd
import numpy as np
import optuna
import torch
import torch.nn as nn
import torch.optim as optim
import joblib
import warnings

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, r2_score

# Reproducibility
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
optuna.logging.set_verbosity(optuna.logging.ERROR)
warnings.filterwarnings("ignore", category=UserWarning)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load dataset
file_path = os.path.join(os.getcwd(), "multiwafer_database_all_feature.xlsx")
data = pd.read_excel(file_path)

# Features and multiple targets
all_features = [
    'P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'Tester', 'P03 Recipe', 'P06 Recipe',
    'Paste 1', 'Paste 2', 'Dark Area %', 'Defect Area %', 'Grain Defect Area %',
    'Average Life Time', 'Sigma Life Time', 'Resistivity', 'Bow', 'Sawmark',
    'Avg THK', 'TTV', 'WARP', 'Wafer Area', 'Vendor name'
]
output_features = ["Efficiency"]

X = data[all_features]
X.columns = X.columns.str.replace(" ", "_")
X_scaled = StandardScaler().fit_transform(X)

# Model definition
class MLPModel(nn.Module):
    def __init__(self, input_dim, hidden_dims, dropout):
        super(MLPModel, self).__init__()
        layers = [nn.Linear(input_dim, hidden_dims[0]), nn.ReLU(), nn.Dropout(dropout)]
        for i in range(1, len(hidden_dims)):
            layers.extend([
                nn.Linear(hidden_dims[i - 1], hidden_dims[i]),
                nn.ReLU(),
                nn.Dropout(dropout)
            ])
        layers.append(nn.Linear(hidden_dims[-1], 1))
        self.model = nn.Sequential(*layers)

    def forward(self, x):
        return self.model(x)

# Final results and logs
final_all_results = []
trial_all_logs = []

# Loop over each output variable
for output_feature in output_features:
    y = data[output_feature]
    scaler_y = StandardScaler()
    y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

    X_train, X_temp, y_train, y_temp = train_test_split(X_scaled, y_scaled, test_size=0.4, random_state=42)
    X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

    X_train_tensor = torch.tensor(X_train, dtype=torch.float32).to(device)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1).to(device)
    X_val_tensor = torch.tensor(X_val, dtype=torch.float32).to(device)
    y_val_tensor = torch.tensor(y_val, dtype=torch.float32).view(-1, 1).to(device)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(-1, 1).to(device)

    def objective_mlp(trial):
        num_hidden_layers = trial.suggest_int("num_hidden_layers", 1, 5)
        hidden_dims = [trial.suggest_int(f"hidden_dim_{i+1}", 16, 512) for i in range(num_hidden_layers)]
        dropout = trial.suggest_float("dropout", 0.1, 0.5)
        learning_rate = trial.suggest_float("learning_rate", 1e-4, 1e-2, log=True)
        batch_size = trial.suggest_categorical("batch_size", [32, 64, 128])

        model = MLPModel(input_dim=X_train.shape[1], hidden_dims=hidden_dims, dropout=dropout).to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        criterion = nn.MSELoss()

        train_loader = torch.utils.data.DataLoader(
            list(zip(X_train_tensor, y_train_tensor)), batch_size=batch_size, shuffle=True
        )

        best_val_loss = float("inf")
        no_improve_count = 0
        patience = 30

        for epoch in range(500):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                y_pred = model(batch_X)
                loss = criterion(y_pred, batch_y)
                loss.backward()
                optimizer.step()

            model.eval()
            with torch.no_grad():
                val_pred = model(X_val_tensor)
                val_loss = criterion(val_pred, y_val_tensor).item()

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= patience:
                    break

        with torch.no_grad():
            y_pred_val = model(X_val_tensor).cpu().numpy().flatten()
        return mean_absolute_percentage_error(y_val, y_pred_val) * 100

    study = optuna.create_study(direction="minimize")
    study.optimize(objective_mlp, n_trials=100)

    top_trials = sorted(study.trials, key=lambda x: x.value)[:5]

    for idx, trial in enumerate(top_trials):
        params = trial.params
        hidden_dims = [params[f"hidden_dim_{i+1}"] for i in range(params["num_hidden_layers"])]

        model = MLPModel(X_train.shape[1], hidden_dims, params["dropout"]).to(device)
        optimizer = optim.Adam(model.parameters(), lr=params["learning_rate"])
        criterion = nn.MSELoss()
        batch_size = params["batch_size"]

        train_loader = torch.utils.data.DataLoader(
            list(zip(X_train_tensor, y_train_tensor)), batch_size=batch_size, shuffle=True
        )

        best_val_loss = float("inf")
        no_improve_count = 0
        patience = 30

        for epoch in range(500):
            model.train()
            for batch_X, batch_y in train_loader:
                optimizer.zero_grad()
                y_pred = model(batch_X)
                loss = criterion(y_pred, batch_y)
                loss.backward()
                optimizer.step()

            model.eval()
            with torch.no_grad():
                val_pred = model(X_val_tensor)
                val_loss = criterion(val_pred, y_val_tensor).item()

            if val_loss < best_val_loss:
                best_val_loss = val_loss
                no_improve_count = 0
            else:
                no_improve_count += 1
                if no_improve_count >= patience:
                    break

        model.eval()
        with torch.no_grad():
            y_pred_test_scaled = model(X_test_tensor).cpu().numpy()
        y_pred_test = scaler_y.inverse_transform(y_pred_test_scaled)
        y_true_test = scaler_y.inverse_transform(y_test_tensor.cpu().numpy())

        r2 = r2_score(y_true_test, y_pred_test)
        mape = mean_absolute_percentage_error(y_true_test, y_pred_test) * 100
        rmse = np.sqrt(mean_squared_error(y_true_test, y_pred_test))

        final_all_results.append({
            "Output": output_feature,
            "Trial": idx,
            "R2": r2,
            "MAPE": mape,
            "RMSE": rmse
        })
        trial_all_logs.append({
            "Output": output_feature,
            "Trial": idx,
            "MAPE": mape,
            "Hyperparameters": params
        })

        if idx == 0:
            torch.save(model.state_dict(), f"best_model_MLP_{output_feature}.pth")

            # Final training with train + validation data
            X_final_train = np.vstack([X_train, X_val])
            y_final_train = np.concatenate([y_train, y_val])
            X_final_train_tensor = torch.tensor(X_final_train, dtype=torch.float32).to(device)
            y_final_train_tensor = torch.tensor(y_final_train, dtype=torch.float32).view(-1, 1).to(device)

            final_model = MLPModel(X_final_train.shape[1], hidden_dims, params["dropout"]).to(device)
            final_optimizer = optim.Adam(final_model.parameters(), lr=params["learning_rate"])
            final_criterion = nn.MSELoss()
            final_train_loader = torch.utils.data.DataLoader(
                list(zip(X_final_train_tensor, y_final_train_tensor)),
                batch_size=batch_size, shuffle=True
            )

            best_loss = float("inf")
            no_improve_count = 0
            for epoch in range(500):
                final_model.train()
                for batch_X, batch_y in final_train_loader:
                    final_optimizer.zero_grad()
                    y_pred = final_model(batch_X)
                    loss = final_criterion(y_pred, batch_y)
                    loss.backward()
                    final_optimizer.step()

                final_model.eval()
                with torch.no_grad():
                    val_loss = final_criterion(final_model(X_final_train_tensor), y_final_train_tensor).item()
                if val_loss < best_loss:
                    best_loss = val_loss
                    no_improve_count = 0
                else:
                    no_improve_count += 1
                    if no_improve_count >= patience:
                        break

            final_model.eval()
            with torch.no_grad():
                y_pred_test_final_scaled = final_model(X_test_tensor).cpu().numpy()
            y_pred_test_final = scaler_y.inverse_transform(y_pred_test_final_scaled)
            y_true_test_final = scaler_y.inverse_transform(y_test_tensor.cpu().numpy())

            r2_final = r2_score(y_true_test_final, y_pred_test_final)
            mape_final = mean_absolute_percentage_error(y_true_test_final, y_pred_test_final) * 100
            rmse_final = np.sqrt(mean_squared_error(y_true_test_final, y_pred_test_final))

            final_all_results.append({
                "Output": output_feature,
                "Trial": "Best (Retrain)",
                "R2": r2_final,
                "MAPE": mape_final,
                "RMSE": rmse_final
            })
            torch.save(final_model.state_dict(), f"best_model_MLP_{output_feature}_final.pth")

# Save overall performance results to Excel
results_df = pd.DataFrame(final_all_results)
results_df.to_excel("final_results_MLP_all.xlsx", index=False)

# Save individual trial hyperparameters and performance
logs_df = pd.DataFrame(trial_all_logs)
logs_df.to_excel("trial_logs_MLP_all.xlsx", index=False)

# Feature importance

In [None]:
import os
import joblib
import pandas as pd

# Output directory
output_dir = "SHAP"
os.makedirs(output_dir, exist_ok=True)

# Load dataset
data = pd.read_excel("multiwafer_database_all_feature.xlsx")
data.columns = data.columns.str.replace(" ", "_")

# Input features (updated to match column names after underscore replacement)
all_features = [
    'P01', 'P02', 'P03', 'P04', 'P05', 'P06', 'Tester', 'P03 Recipe', 'P06 Recipe',
    'Paste 1', 'Paste 2', 'Dark Area %', 'Defect Area %', 'Grain Defect Area %',
    'Average Life Time', 'Sigma Life Time', 'Resistivity', 'Bow', 'Sawmark',
    'Avg THK', 'TTV', 'WARP', 'Wafer Area', 'Vendor name'
]

# Select features
X = data[all_features]

# Load trained model
model_path = "./(Efficiency)best_model_ET.pkl"
model = joblib.load(model_path)

# Feature importance
importance = model.feature_importances_
df_importance = pd.DataFrame({
    "Feature": X.columns,
    "Importance": importance
}).sort_values(by="Importance", ascending=False)

# Save to Excel
df_importance.to_excel(os.path.join(output_dir, "Efficiency_Feature_Importance.xlsx"), index=False)

print("Efficiency feature importance saved.")

Efficiency feature importance saved.
