In [1]:
#Importing Necessary Libraries
import pandas as pd
import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler

In [2]:
# ---------------------------------------------------------
# Loading the simulation log data from the Excel file
# ---------------------------------------------------------
df = pd.read_excel("simulation_logs.xlsx")

In [3]:
# ---------------------------------------------------------
# These are the validation limits defined in the rules file.
# Writing the values here so the checks are easy to follow.
# ---------------------------------------------------------
YIELD_LIMIT = 450          # MPa
DISPLACEMENT_LIMIT = 2.5   # mm
MAX_ITER_LIMIT = 40        # strict upper limit
SOFT_WARN_START = 20       # start of soft warning range

In [4]:
def rule_check(row):
    notes = []
    stress = row["max_stress_MPa"]
    disp = row["displacement_mm"]
    iters = row["convergence_iters"]
    status = str(row["status_text"]).lower() if pd.notna(row["status_text"]) else ""

    if pd.notna(stress) and stress > YIELD_LIMIT: notes.append("Stress above limit")
    if pd.notna(disp) and disp > DISPLACEMENT_LIMIT: notes.append("Displacement above limit")
    if pd.notna(iters) and iters > MAX_ITER_LIMIT: notes.append("Convergence iterations too high")
    if "not" in status or "non" in status or "fail" in status: notes.append("Status indicates non-convergence")
    if pd.notna(iters) and SOFT_WARN_START < iters <= MAX_ITER_LIMIT: notes.append("Iterations in warning range")
    if pd.isna(stress) or pd.isna(disp) or pd.isna(iters): notes.append("Missing or incomplete data")
    
    text_ok = "converged" in status and "not" not in status
    extreme_values = (pd.notna(stress) and stress >= YIELD_LIMIT) or (pd.notna(disp) and disp >= DISPLACEMENT_LIMIT)
    if text_ok and extreme_values: notes.append("Converged but values look extreme")

    return "OK" if len(notes) == 0 else ", ".join(notes)

df["rule_result"] = df.apply(rule_check, axis=1)

In [6]:
# ---------------------------------------------------------
# Prepare numeric data for ML model.
# Some rows may have NaN, so filling them with column means.
# This keeps it simple and avoids dropping rows.
# ---------------------------------------------------------
numeric_data = df.select_dtypes(include=["number"])
numeric_data = numeric_data.fillna(numeric_data.mean())

In [7]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
numeric_array = scaler.fit_transform(numeric_data)

In [8]:
# ---------------------------------------------------------
# Creating a basic IsolationForest for anomaly checks
# ---------------------------------------------------------
model = IsolationForest(contamination='auto', random_state=5)
model.fit(numeric_array)



In [9]:
# 2. Getting the "Anomaly Score" (lower score = more anomalous)
# This measures the distance. We can now flag anything that is statistically "far"
scores = model.decision_function(numeric_array)

# 3. Automatically flag the bottom 20% or anything with a negative score
# Negative score in IsolationForest technically means "Outlier"
df["ml_flag"] = [ -1 if s < 0 else 1 for s in scores]

In [10]:
# ---------------------------------------------------------
# Combining rules and ML results into a final verdict
# ---------------------------------------------------------
final_output = []
for _, row in df.iterrows():
    if row["rule_result"] != "OK":
        final_output.append(row["rule_result"])
    else:
        if row["ml_flag"] == -1:
            final_output.append("ML flagged as unusual")
        else:
            final_output.append("Valid")

In [12]:
df["final_verdict"] = final_output

In [13]:
df.to_excel("final_simulation_output.xlsx", index=False)

In [44]:
#Note - Isolation forest was used for this dataset because no labels are present in the dataset hence an unsupervised method like isolation forest was used