In [None]:
# (Trial 1) Sample = 100 (lowest 0.25%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 0.25
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_1.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 257 samples out of 102488 (lowest 0.25%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_1.xlsx


In [None]:
# (Trial 2) Sample = 100 (lowest 0.5%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01','Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 0.5
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_2.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 513 samples out of 102488 (lowest 0.5%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_2.xlsx


In [None]:
# (Trial 3) Sample = 100 (lowest 1%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 1
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_3.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 1025 samples out of 102488 (lowest 1%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_3.xlsx


In [None]:
# (Trial 4) Sample = 100 (lowest 2.5%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 2.5
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_4.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 2563 samples out of 102488 (lowest 2.5%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_4.xlsx


In [None]:
# (Trial 5) Sample = 100 (lowest 5%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 5
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_5.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 5125 samples out of 102488 (lowest 5%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_5.xlsx


In [None]:
# (Trial 6) Sample = 100 (lowest 10%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 10
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_6.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 10249 samples out of 102488 (lowest 10%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_6.xlsx


In [None]:
# (Trial 7) Sample = 100 (lowest 25%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 25
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_7.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 25622 samples out of 102488 (lowest 25%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_7.xlsx


In [None]:
# (Trial 8) Sample = 100 (lowest 50%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 50
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_8.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 51245 samples out of 102488 (lowest 50%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_8.xlsx


In [None]:
# (Trial 9) Sample = 100 (lowest 100%) / OPTUNA (TPE)

import os
import time
import joblib
import numpy as np
import pandas as pd
import optuna
from datetime import timedelta

optuna.logging.set_verbosity(optuna.logging.WARNING)

# Load data and model
data = pd.read_excel("multiwafer_database.xlsx")
data.columns = data.columns.str.replace(" ", "_")
model = joblib.load("../1. Training/(Efficiency)best_model_ET.pkl")

RANDOM_STATE = 42
np.random.seed(RANDOM_STATE)

equipment_features = ['P02', 'P03', 'P04', 'P05', 'P06', 'Tester']
quality_features = [
    'P01', 'Dark_Area_%', 'Defect_Area_%', 'Grain_Defect_Area_%',
    'Average_Life_Time', 'Sigma_Life_Time',
    'Resistivity', 'Wafer_Area', 'Vendor_name'
]
trained_features = model.feature_names_in_

param_ranges = {
    'P02': (1, 6), 'P03': (1, 15),
    'P04': (1, 6), 'P05': (1, 15), 'P06': (1, 27),
    'Tester': (1, 20)
}

# Define efficiency threshold by percentile
percentile = 100
eff_threshold = np.percentile(data["Efficiency"], percentile)

# Filter samples below percentile threshold
low_eff_samples = data[data["Efficiency"] <= eff_threshold]
total_count = len(data)
sample_count = len(low_eff_samples)
sample_percent = (sample_count / total_count) * 100
print(f"Selected {sample_count} samples out of {total_count} (lowest {percentile}%)")

# Randomly sample 100 data points
samples = low_eff_samples.sample(n=100, random_state=RANDOM_STATE)

bo_configs = [
    (20, 80), (20, 180), (20, 280), (20, 380), (20, 480)
]

output_path = "Golden_path_trial_9.xlsx"
with pd.ExcelWriter(output_path) as writer:
    for init_pt, n_iter in bo_configs:
        results = []
        config_name = f"init{init_pt}_niter{n_iter}"
        total_trials = init_pt + n_iter

        for idx, sample in samples.iterrows():
            quality_values = sample[quality_features]
            initial_eff = sample["Efficiency"]
            initial_path = {key: sample[key] for key in equipment_features}

            def objective(trial):
                path = {
                    key: trial.suggest_int(key, *param_ranges[key])
                    for key in equipment_features
                }
                full_input = {**path, **quality_values.to_dict()}
                input_df = pd.DataFrame([full_input])[trained_features]
                return model.predict(input_df)[0]

            start = time.time()
            study = optuna.create_study(
                direction="maximize",
                sampler=optuna.samplers.TPESampler(
                    n_startup_trials=init_pt,
                    seed=RANDOM_STATE
                )
            )
            study.optimize(objective, n_trials=total_trials)
            elapsed = time.time() - start

            best_path = study.best_params

            result_row = {
                "Data Point": idx,
                "Initial Efficiency": initial_eff,
                "Optimized Efficiency": study.best_value,
                "Elapsed Time": str(timedelta(seconds=int(elapsed)))
            }

            # Add original and optimized path as separate columns
            for key in equipment_features:
                result_row[f"Initial_{key}"] = initial_path[key]
                result_row[f"Best_{key}"] = best_path[key]

            results.append(result_row)

        df_results = pd.DataFrame(results)
        df_results.to_excel(writer, sheet_name=config_name, index=False)
        print(f"Saved: {config_name}")

print(f"\nAll results saved to: {output_path}")

Selected 102488 samples out of 102488 (lowest 100%)
Saved: init20_niter80
Saved: init20_niter180
Saved: init20_niter280
Saved: init20_niter380
Saved: init20_niter480

All results saved to: Golden_path_trial_9.xlsx
