In [1]:
#Importing Necessary Libraries
import pandas as pd
from sklearn.ensemble import IsolationForest

In [2]:
# ---------------------------------------------------------
# Load the simulation log data from the Excel file
# ---------------------------------------------------------
df = pd.read_excel("simulation_logs.xlsx")

In [3]:
# ---------------------------------------------------------
# These are the validation limits defined in the rules file.
# Writing the values here so the checks are easy to follow.
# ---------------------------------------------------------
YIELD_LIMIT = 450          # MPa
DISPLACEMENT_LIMIT = 2.5   # mm
MAX_ITER_LIMIT = 40        # strict upper limit
SOFT_WARN_START = 20       # start of soft warning range

In [4]:
def rule_check(row):
    notes = []
    
    # Extracting for logic checks
    stress = row["max_stress_MPa"]
    disp = row["displacement_mm"]
    iters = row["convergence_iters"]
    status = str(row["status_text"]).lower() if pd.notna(row["status_text"]) else ""

    # -------- Hard Fail Checks --------
    if pd.notna(stress) and stress > YIELD_LIMIT:
        notes.append("Stress above limit")

    if pd.notna(disp) and disp > DISPLACEMENT_LIMIT:
        notes.append("Displacement above limit")

    if pd.notna(iters) and iters > MAX_ITER_LIMIT:
        notes.append("Convergence iterations too high")

    # Fixed: Catch "Did not converge" as well as "non-convergence"
    if "not" in status or "non" in status or "fail" in status:
        notes.append("Status indicates non-convergence")

    # -------- Soft Warning: iteration range --------
    # Fixed boundary: Expected is <= 20, so warning is > 20
    if pd.notna(iters) and SOFT_WARN_START < iters <= MAX_ITER_LIMIT:
        notes.append("Iterations in warning range")

    # -------- Soft Warning: missing or incomplete data --------
    if pd.isna(stress) or pd.isna(disp) or pd.isna(iters):
        notes.append("Missing or incomplete data")

    # -------- Soft Warning: conflicting signal --------
    # If solver says "converged" but physics are at or beyond the limit
    text_ok = "converged" in status and "not" not in status
    extreme_values = (pd.notna(stress) and stress >= YIELD_LIMIT) or \
                     (pd.notna(disp) and disp >= DISPLACEMENT_LIMIT)
    
    if text_ok and extreme_values:
        notes.append("Converged but values look extreme")

    if len(notes) == 0:
        return "OK"
    else:
        return ", ".join(notes)

In [5]:
# ---------------------------------------------------------
# Applying the rule check to every row
# ---------------------------------------------------------
df["rule_result"] = df.apply(rule_check, axis=1)

In [6]:
# ---------------------------------------------------------
# Prepare numeric data for ML model.
# Some rows may have NaN, so filling them with column means.
# This keeps it simple and avoids dropping rows.
# ---------------------------------------------------------
numeric_data = df.select_dtypes(include=["number"])
numeric_data = numeric_data.fillna(numeric_data.mean())

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numeric_array = scaler.fit_transform(numeric_data)

In [8]:
# ---------------------------------------------------------
# Creating a basic IsolationForest for anomaly checks
# ---------------------------------------------------------
model = IsolationForest(contamination=0.3, random_state=5)

# Fit model on array
model.fit(numeric_array)

# Predicting on same array
df["ml_flag"] = model.predict(numeric_array)



In [9]:
# ---------------------------------------------------------
# Combining rules and ML results into a final verdict
# ---------------------------------------------------------
final_output = []
for _, row in df.iterrows():
    if row["rule_result"] != "OK":
        final_output.append(row["rule_result"])
    else:
        if row["ml_flag"] == -1:
            final_output.append("ML flagged as unusual")
        else:
            final_output.append("Valid")

df["final_verdict"] = final_output

In [10]:
# ---------------------------------------------------------
# Showing a small part of the processed data
# ---------------------------------------------------------
print(df[["max_stress_MPa", "displacement_mm", "convergence_iters",
          "rule_result", "ml_flag", "final_verdict"]].head(8))

   max_stress_MPa  displacement_mm  convergence_iters  \
0           320.0              1.2               18.0   
1           890.0              5.6               22.0   
2          1100.0              0.9               45.0   
3             NaN              2.1               15.0   
4           210.0             12.5                8.0   
5           450.0              1.8               90.0   
6          3000.0              0.4               12.0   
7           410.0              1.6                NaN   

                                         rule_result  ml_flag  \
0                                                 OK        1   
1  Stress above limit, Displacement above limit, ...        1   
2  Stress above limit, Convergence iterations too...       -1   
3                         Missing or incomplete data        1   
4  Displacement above limit, Converged but values...       -1   
5  Convergence iterations too high, Converged but...       -1   
6  Stress above limit, Converge

In [12]:
df.to_excel("final_simulation_output.xlsx", index=False)
print("Saved final_simulation_output.xlsx")

Saved final_simulation_output.xlsx


In [44]:
#Note - Isolation forest was used for this dataset because no labels are present in the dataset hence an unsupervised method like isolation forest was used