In [1]:
#Importing Necessary Libraries
import pandas as pd
from sklearn.ensemble import IsolationForest

In [2]:
# ---------------------------------------------------------
# Load the simulation log data from the Excel file
# ---------------------------------------------------------
df = pd.read_excel("simulation_logs.xlsx")

In [3]:
# ---------------------------------------------------------
# These are the validation limits defined in the rules file.
# Writing the values here so the checks are easy to follow.
# ---------------------------------------------------------
YIELD_LIMIT = 450          # MPa
DISPLACEMENT_LIMIT = 2.5   # mm
MAX_ITER_LIMIT = 40        # strict upper limit
SOFT_WARN_START = 20       # start of soft warning range

In [4]:
def rule_check(row):
    notes = []

    # -------- Hard Fail Checks --------
    if row["max_stress_MPa"] > YIELD_LIMIT:
        notes.append("Stress above limit")

    if row["displacement_mm"] > DISPLACEMENT_LIMIT:
        notes.append("Displacement above limit")

    if row["convergence_iters"] > MAX_ITER_LIMIT:
        notes.append("Convergence iterations too high")

    if isinstance(row["status_text"], str) and "non" in row["status_text"].lower():
        notes.append("Status indicates non-convergence")

    # -------- Soft Warning: iteration range --------
    if SOFT_WARN_START <= row["convergence_iters"] <= MAX_ITER_LIMIT:
        notes.append("Iterations in warning range")

    # -------- Soft Warning: missing or incomplete data --------
    if pd.isna(row["max_stress_MPa"]) or pd.isna(row["displacement_mm"]) or pd.isna(row["convergence_iters"]):
        notes.append("Missing or incomplete data")

    # -------- Soft Warning: conflicting signal --------
    # Example: solver says it converged but outputs look extreme
    if isinstance(row["status_text"], str):
        text_ok = "converged" in row["status_text"].lower()
        extreme_values = (
            (row["max_stress_MPa"] > YIELD_LIMIT * 0.9) or
            (row["displacement_mm"] > DISPLACEMENT_LIMIT * 0.9)
        )
        if text_ok and extreme_values:
            notes.append("Converged but values look extreme")

    # If nothing was triggered above
    if len(notes) == 0:
        return "OK"
    else:
        return ", ".join(notes)

In [5]:
# ---------------------------------------------------------
# Applying the rule check to every row
# ---------------------------------------------------------
df["rule_result"] = df.apply(rule_check, axis=1)

In [6]:
# ---------------------------------------------------------
# Prepare numeric data for ML model.
# Some rows may have NaN, so filling them with column means.
# This keeps it simple and avoids dropping rows.
# ---------------------------------------------------------
numeric_data = df.select_dtypes(include=["number"])
numeric_data = numeric_data.fillna(numeric_data.mean())

In [7]:
# ---------------------------------------------------------
# Creating a basic IsolationForest for anomaly checks
# ---------------------------------------------------------
model = IsolationForest(contamination=0.1, random_state=5)

# Convert numeric data to array for consistent input
numeric_array = numeric_data.values

# Fit model on array
model.fit(numeric_array)

# Predicting on same array
df["ml_flag"] = model.predict(numeric_array)



In [8]:
# ---------------------------------------------------------
# Combining rules and ML results into a final verdict
# ---------------------------------------------------------
final_output = []

for _, row in df.iterrows():
    if row["rule_result"] != "OK":
        final_output.append(row["rule_result"])
    else:
        if row["ml_flag"] == -1:
            final_output.append("ML flagged as unusual")
        else:
            final_output.append("Valid")

df["final_verdict"] = final_output

In [9]:
# ---------------------------------------------------------
# Showing a small part of the processed data
# ---------------------------------------------------------
print(df[["max_stress_MPa", "displacement_mm", "convergence_iters",
          "rule_result", "ml_flag", "final_verdict"]].head(8))

   max_stress_MPa  displacement_mm  convergence_iters  \
0           320.0              1.2               18.0   
1           890.0              5.6               22.0   
2          1100.0              0.9               45.0   
3             NaN              2.1               15.0   
4           210.0             12.5                8.0   
5           450.0              1.8               90.0   
6          3000.0              0.4               12.0   
7           410.0              1.6                NaN   

                                         rule_result  ml_flag  \
0                                                 OK        1   
1  Stress above limit, Displacement above limit, ...        1   
2  Stress above limit, Convergence iterations too...        1   
3                         Missing or incomplete data        1   
4  Displacement above limit, Converged but values...       -1   
5  Convergence iterations too high, Converged but...       -1   
6  Stress above limit, Converge

In [11]:
df.to_excel("final_simulation_output.xlsx", index=False)
print("Saved final_simulation_output.xlsx")

Saved final_simulation_output.xlsx


In [None]:
#Note - Isolation forest was used for this dataset because no labels are present in the dataset hence an unsupervised method like isolation forest was used